Coverage for nexusLIMS/schemas/em_glossary.py: 100%
116 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""
2EM Glossary field name mappings for NexusLIMS metadata.
4This module provides mappings between NexusLIMS internal field names, display names,
5and EM Glossary (EMG) standardized terminology. The EM Glossary is a community-driven
6ontology for electron microscopy metadata maintained by the Helmholtz Metadata
7Collaboration.
9The module uses RDFLib to parse the EM Glossary OWL ontology file, providing access
10to term labels, definitions, and the full semantic structure.
12**EM Glossary Version:** v2.0.0
14**References:**
15- EM Glossary v2.0.0: [https://purls.helmholtz-metadaten.de/emg/v2.0.0/](https://purls.helmholtz-metadaten.de/emg/v2.0.0/)
16- OWL Ontology: Shipped with NexusLIMS at
17 `nexusLIMS/schemas/references/em_glossary_2.0.owl`
18- License: CC BY 4.0 [https://creativecommons.org/licenses/by/4.0/](https://creativecommons.org/licenses/by/4.0/)
20The mappings in this module enable:
21- Standardized field names across instruments and vendors
22- Cross-reference to EM Glossary IDs for semantic interoperability
23- Human-readable display names for XML output
24- Dynamic loading from the OWL ontology using [RDFLib](https://rdflib.readthedocs.io/en/stable/index.html)
26Examples
27--------
28Get EM Glossary ID for a field:
30>>> from nexusLIMS.schemas.em_glossary import get_emg_id
31>>> get_emg_id("acceleration_voltage")
32'EMG_00000004'
34Get display name for XML:
36>>> from nexusLIMS.schemas.em_glossary import get_display_name
37>>> get_display_name("acceleration_voltage")
38'Voltage'
40Get EMG label from ID:
42>>> from nexusLIMS.schemas.em_glossary import get_emg_label
43>>> get_emg_label("EMG_00000004")
44'Acceleration Voltage'
46Get EMG definition:
48>>> from nexusLIMS.schemas.em_glossary import get_emg_definition
49>>> defn = get_emg_definition("EMG_00000004")
50>>> print(defn)
51The potential difference between anode and cathode.
53Check if field has EMG mapping:
55>>> from nexusLIMS.schemas.em_glossary import has_emg_id
56>>> has_emg_id("acceleration_voltage")
57True
58>>> has_emg_id("custom_vendor_field")
59False
60"""
62import logging
63from functools import lru_cache
64from pathlib import Path
65from typing import Dict
67from rdflib import RDF, RDFS, Graph, Namespace
69_logger = logging.getLogger(__name__)
71EMG_OWL_PATH = Path(__file__).parent / "references" / "em_glossary_2.0.owl"
72"""Path to the EM Glossary OWL file shipped with NexusLIMS"""
74EMG_VERSION = "v2.0.0"
75"""Version of the packaged EM Glossary OWL file"""
77EMG = Namespace("https://purls.helmholtz-metadaten.de/emg/")
78"""RDF Namespace for the EM Glossary"""
80OBO = Namespace("http://purl.obolibrary.org/obo/")
81"""RDF Namespace for OBO"""
84@lru_cache(maxsize=1)
85def _load_emg_graph() -> Graph:
86 """
87 Load the EM Glossary ontology RDF graph.
89 Parses the OWL/RDF file and returns an RDFLib Graph object.
90 Results are cached for performance.
92 Returns
93 -------
94 rdflib.Graph
95 The parsed RDF graph
97 Raises
98 ------
99 FileNotFoundError
100 If the OWL file cannot be found
101 ValueError
102 If the OWL file cannot be parsed
103 """
104 if not EMG_OWL_PATH.exists():
105 msg = f"EM Glossary OWL file not found at {EMG_OWL_PATH}"
106 raise FileNotFoundError(msg)
108 try:
109 g = Graph()
110 g.parse(EMG_OWL_PATH, format="xml")
111 _logger.debug("Loaded EM Glossary ontology from %s", EMG_OWL_PATH)
112 _logger.debug("Graph contains %s triples", len(g))
113 except Exception as e:
114 msg = f"Failed to parse EM Glossary OWL file: {e}"
115 raise ValueError(msg) from e
116 return g
119@lru_cache(maxsize=1)
120def _load_emg_terms() -> Dict[str, Dict[str, str]]:
121 """
122 Load EM Glossary terms with labels and definitions.
124 Extracts all EMG terms from the ontology graph with their labels
125 and definitions (if available).
127 Returns
128 -------
129 dict[str, dict[str, str]]
130 Mapping from EMG_ID -> {'label': str, 'definition': str | None}
132 Examples
133 --------
134 >>> terms = _load_emg_terms()
135 >>> terms['EMG_00000004']['label']
136 'Acceleration Voltage'
137 """
138 g = _load_emg_graph()
140 emg_terms = {}
142 # Query for all EMG Class URIs with labels
143 for s in g.subjects(RDF.type, None):
144 uri_str = str(s)
145 if not uri_str.startswith(str(EMG)):
146 continue
148 # Extract EMG ID from URI
149 emg_id = uri_str.split("/")[-1]
150 if not emg_id.startswith("EMG_"):
151 continue
153 # Get label
154 label = None
155 for o in g.objects(s, RDFS.label):
156 label = str(o)
157 break # Take first label
159 if label is None:
160 continue
162 # Get definition (IAO_0000115 is the standard definition property)
163 definition = None
164 for o in g.objects(s, OBO.IAO_0000115):
165 definition = str(o)
166 break # Take first definition
168 emg_terms[emg_id] = {
169 "label": label,
170 "definition": definition,
171 }
173 if not emg_terms:
174 msg = "No EMG terms found in OWL file. File may be corrupted."
175 raise ValueError(msg)
177 _logger.debug("Loaded %s EMG terms from ontology", len(emg_terms))
178 return emg_terms
181# Mapping from NexusLIMS internal field names to EM Glossary terms
182# Format: internal_field_name -> (display_name, emg_label or None, description)
183# The emg_label is used to look up the EMG_ID from the OWL file
184NEXUSLIMS_TO_EMG_MAPPINGS: Dict[str, tuple[str, str | None, str]] = {
185 # Core acquisition parameters (common to all types)
186 "creation_time": (
187 "Creation Time",
188 None, # No specific EMG term for timestamp
189 "ISO-8601 timestamp with timezone",
190 ),
191 "data_type": (
192 "Data Type",
193 None, # Descriptive field, not in EMG
194 "Human-readable data type description",
195 ),
196 "dataset_type": (
197 "DatasetType",
198 None, # Schema-defined category
199 "Schema-defined dataset category",
200 ),
201 # Image acquisition parameters (SEM/TEM/STEM)
202 "acceleration_voltage": (
203 "Acceleration Voltage",
204 "Acceleration Voltage", # EMG label
205 "Accelerating voltage of the electron/ion beam",
206 ),
207 "working_distance": (
208 "Working Distance",
209 "Working Distance", # EMG label
210 "Distance between final lens and sample surface",
211 ),
212 "beam_current": (
213 "Beam Current",
214 "Beam Current", # EMG label
215 "Electron beam current",
216 ),
217 "emission_current": (
218 "Emission Current",
219 "Emission Current", # EMG label
220 "Emission current from electron source",
221 ),
222 "dwell_time": (
223 "Pixel Dwell Time",
224 "Dwell Time", # EMG label
225 "Time the beam dwells on each pixel during scanning",
226 ),
227 "magnification": (
228 "Magnification",
229 None, # EMG has Magnification but it's complex
230 "Nominal magnification",
231 ),
232 "horizontal_field_width": (
233 "Horizontal Field Width",
234 None, # Not in EMG v2.0.0
235 "Width of the scanned area",
236 ),
237 "vertical_field_width": (
238 "Vertical Field Width",
239 None, # Not in EMG v2.0.0
240 "Height of the scanned area",
241 ),
242 "pixel_width": (
243 "Pixel Width",
244 None, # Not in EMG v2.0.0
245 "Physical width of a single pixel",
246 ),
247 "pixel_height": (
248 "Pixel Height",
249 None, # Not in EMG v2.0.0
250 "Physical height of a single pixel",
251 ),
252 "scan_rotation": (
253 "Scan Rotation",
254 None, # Not in EMG v2.0.0
255 "Rotation angle of the scan frame",
256 ),
257 # Detector information
258 "detector_type": (
259 "Detector",
260 None, # EMG has detector concepts but not simple type field
261 "Type or name of detector used",
262 ),
263 "acquisition_device": (
264 "Acquisition Device",
265 None, # Similar to detector_type
266 "Name of the acquisition device or camera",
267 ),
268 # Stage position (common to SEM/TEM)
269 "stage_x": (
270 "Stage X",
271 None, # Part of complex stage position concept
272 "Stage X coordinate",
273 ),
274 "stage_y": (
275 "Stage Y",
276 None, # Part of complex stage position concept
277 "Stage Y coordinate",
278 ),
279 "stage_z": (
280 "Stage Z",
281 None, # Part of complex stage position concept
282 "Stage Z coordinate",
283 ),
284 "stage_tilt": (
285 "Stage Tilt",
286 None, # Part of complex stage position concept
287 "Stage tilt angle (alpha)",
288 ),
289 "stage_rotation": (
290 "Stage Rotation",
291 None, # Part of complex stage position concept
292 "Stage rotation angle",
293 ),
294 "stage_alpha": (
295 "Stage Alpha",
296 None, # Part of complex stage position concept
297 "Stage alpha tilt angle",
298 ),
299 "stage_beta": (
300 "Stage Beta",
301 None, # Part of complex stage position concept
302 "Stage beta tilt angle",
303 ),
304 # Spectrum acquisition parameters (EDS/EELS)
305 "acquisition_time": (
306 "Acquisition Time",
307 "Acquisition Time", # EMG label
308 "Total time for spectrum acquisition",
309 ),
310 "live_time": (
311 "Live Time",
312 None, # Not in EMG v2.0.0
313 "Live time (excludes dead time) for spectrum acquisition",
314 ),
315 "detector_energy_resolution": (
316 "Energy Resolution",
317 None, # Not in EMG v2.0.0
318 "Energy resolution of the detector",
319 ),
320 "channel_size": (
321 "Channel Size",
322 None, # Not in EMG v2.0.0
323 "Energy width of each channel",
324 ),
325 "starting_energy": (
326 "Starting Energy",
327 None, # Not in EMG v2.0.0
328 "Starting energy of the spectrum",
329 ),
330 "azimuthal_angle": (
331 "Azimuthal Angle",
332 None, # Not in EMG v2.0.0
333 "Azimuthal angle of the detector",
334 ),
335 "elevation_angle": (
336 "Elevation Angle",
337 None, # Not in EMG v2.0.0
338 "Elevation angle of the detector",
339 ),
340 "takeoff_angle": (
341 "Takeoff Angle",
342 None, # Not in EMG v2.0.0
343 "X-ray takeoff angle",
344 ),
345 # Diffraction parameters (TEM)
346 "camera_length": (
347 "Camera Length",
348 "Camera Length", # EMG label
349 "Camera length for diffraction pattern",
350 ),
351 "convergence_angle": (
352 "Convergence Angle",
353 "Convergence Angle", # EMG label
354 "Convergence angle of the electron beam",
355 ),
356 "illumination_mode": (
357 "Illumination Mode",
358 None, # Not in EMG v2.0.0
359 "TEM illumination mode (TEM, STEM, Diffraction, etc.)",
360 ),
361 # Sample/metadata
362 "specimen": (
363 "Specimen",
364 None, # EMG has Specimen but it's complex
365 "Sample or specimen description",
366 ),
367 "operator": (
368 "Operator",
369 None, # Not in EMG (user information)
370 "User who acquired the data",
371 ),
372 # Environmental parameters
373 "temperature": (
374 "Temperature",
375 None, # Not in EMG v2.0.0
376 "Sample or chamber temperature",
377 ),
378 "pressure": (
379 "Pressure",
380 None, # Not in EMG v2.0.0
381 "Chamber pressure",
382 ),
383 "chamber_pressure": (
384 "Chamber Pressure",
385 None, # Not in EMG v2.0.0
386 "Vacuum chamber pressure",
387 ),
388 # Data dimensions
389 "data_dimensions": (
390 "Data Dimensions",
391 None, # Not a measurement, structural metadata
392 "String representation of data shape",
393 ),
394 # Instrument identification
395 "instrument_id": (
396 "Instrument ID",
397 None, # Not in EMG (internal NexusLIMS identifier)
398 "NexusLIMS persistent instrument identifier",
399 ),
400}
401"""Mapping from NexusLIMS internal field names to EM Glossary terms
402Format: `internal_field_name -> (display_name, emg_label or None, description)`
403The emg_label is used to look up the EMG_ID from the OWL file"""
406def get_emg_label(emg_id: str) -> str | None:
407 """
408 Get the EM Glossary label for an EMG ID.
410 Looks up the human-readable label from the OWL ontology file.
412 Parameters
413 ----------
414 emg_id : str
415 EM Glossary ID (e.g., "EMG_00000004")
417 Returns
418 -------
419 str or None
420 EMG label, or None if ID not found
422 Examples
423 --------
424 >>> get_emg_label("EMG_00000004")
425 'Acceleration Voltage'
427 >>> get_emg_label("EMG_00000050")
428 'Working Distance'
430 >>> get_emg_label("EMG_99999999") is None
431 True
432 """
433 try:
434 emg_terms = _load_emg_terms()
435 term_info = emg_terms.get(emg_id)
436 return term_info["label"] if term_info else None
437 except Exception as e:
438 _logger.warning("Failed to load EMG ontology: %s", e)
439 return None
442def get_emg_definition(emg_id: str) -> str | None:
443 """
444 Get the EM Glossary definition for an EMG ID.
446 Looks up the formal definition from the OWL ontology file.
448 Parameters
449 ----------
450 emg_id : str
451 EM Glossary ID (e.g., "EMG_00000004")
453 Returns
454 -------
455 str or None
456 EMG definition, or None if ID not found or no definition available
458 Examples
459 --------
460 >>> defn = get_emg_definition("EMG_00000004")
461 >>> print(defn)
462 The potential difference between anode and cathode.
464 >>> get_emg_definition("EMG_99999999") is None
465 True
466 """
467 try:
468 emg_terms = _load_emg_terms()
469 term_info = emg_terms.get(emg_id)
470 return term_info["definition"] if term_info else None
471 except Exception as e:
472 _logger.warning("Failed to load EMG ontology: %s", e)
473 return None
476def get_emg_id(field_name: str) -> str | None:
477 """
478 Get the EM Glossary ID for a NexusLIMS field name.
480 Looks up the field in NEXUSLIMS_TO_EMG_MAPPINGS, then resolves the
481 EMG label to an ID from the OWL ontology.
483 Parameters
484 ----------
485 field_name : str
486 Internal field name (e.g., "acceleration_voltage")
488 Returns
489 -------
490 str or None
491 EM Glossary ID string (e.g., "EMG_00000004"), or None if not mapped
493 Examples
494 --------
495 >>> get_emg_id("acceleration_voltage")
496 'EMG_00000004'
498 >>> get_emg_id("working_distance")
499 'EMG_00000050'
501 >>> get_emg_id("custom_field") is None
502 True
504 Notes
505 -----
506 Not all NexusLIMS fields have EM Glossary equivalents. This is expected
507 as EMG is a growing ontology and some fields are vendor-specific or
508 outside the scope of EMG's current coverage (v2.0.0).
509 """
510 mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name)
511 if mapping is None or mapping[1] is None:
512 return None
514 emg_label = mapping[1]
516 # Look up the EMG ID from the label
517 try:
518 emg_terms = _load_emg_terms()
519 # Reverse lookup: label -> ID
520 for emg_id, term_info in emg_terms.items():
521 if term_info["label"] == emg_label:
522 return emg_id
523 except Exception as e:
524 _logger.warning("Failed to load EMG ontology: %s", e)
525 return None
527 _logger.debug("EMG label '%s' not found in ontology", emg_label)
528 return None
531def get_display_name(field_name: str) -> str:
532 """
533 Get the human-readable display name for a field.
535 Returns the display name used in XML output and user-facing documentation.
536 If the field is not in the mapping, returns a title-cased version of the
537 field name with underscores replaced by spaces.
539 Parameters
540 ----------
541 field_name : str
542 Internal field name (e.g., "acceleration_voltage")
544 Returns
545 -------
546 str
547 Display name for the field
549 Examples
550 --------
551 >>> get_display_name("acceleration_voltage")
552 'Voltage'
554 >>> get_display_name("working_distance")
555 'Working Distance'
557 >>> get_display_name("custom_field")
558 'Custom Field'
560 Notes
561 -----
562 For unmapped fields, the function applies a simple transformation:
563 replace underscores with spaces and title-case the result. This ensures
564 all fields have reasonable display names even without explicit mappings.
565 """
566 mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name)
567 if mapping is not None:
568 return mapping[0] # Return display name (first element of tuple)
570 # Fallback: convert field_name to Title Case
571 return field_name.replace("_", " ").title()
574def get_description(field_name: str) -> str | None:
575 """
576 Get the NexusLIMS description for a field.
578 Returns a brief description of what the field represents from the
579 NexusLIMS mappings. For EMG formal definitions, use get_emg_definition().
581 Parameters
582 ----------
583 field_name : str
584 Internal field name (e.g., "acceleration_voltage")
586 Returns
587 -------
588 str or None
589 Field description, or None if not mapped
591 Examples
592 --------
593 >>> desc = get_description("acceleration_voltage")
594 >>> print(desc)
595 Accelerating voltage of the electron/ion beam
597 >>> get_description("unknown_field") is None
598 True
599 """
600 mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name)
601 if mapping is None:
602 return None
603 return mapping[2] # Return description (third element of tuple)
606def has_emg_id(field_name: str) -> bool:
607 """
608 Check if a field has an EM Glossary ID mapping.
610 Returns True if the field has a corresponding EMG ID in v2.0.0, False otherwise.
611 This is useful for determining whether semantic annotations are available.
613 Parameters
614 ----------
615 field_name : str
616 Internal field name (e.g., "acceleration_voltage")
618 Returns
619 -------
620 bool
621 True if field has EMG ID, False otherwise
623 Examples
624 --------
625 >>> has_emg_id("acceleration_voltage")
626 True
628 >>> has_emg_id("magnification")
629 False
631 >>> has_emg_id("custom_field")
632 False
633 """
634 emg_id = get_emg_id(field_name)
635 return emg_id is not None
638def get_emg_uri(field_name: str) -> str | None:
639 """
640 Get the full EM Glossary URI for a field.
642 Returns the complete PURL (Persistent URL) for the field's EM Glossary
643 v2.0.0 entry. This enables Tier 3 semantic web integration and linkage to
644 the full EMG ontology.
646 Parameters
647 ----------
648 field_name : str
649 Internal field name (e.g., "acceleration_voltage")
651 Returns
652 -------
653 str or None
654 Full EMG PURL, or None if field has no EMG ID
656 Examples
657 --------
658 >>> get_emg_uri("acceleration_voltage")
659 'https://purls.helmholtz-metadaten.de/emg/v2.0.0/EMG_00000004'
661 >>> get_emg_uri("working_distance")
662 'https://purls.helmholtz-metadaten.de/emg/v2.0.0/EMG_00000050'
664 >>> get_emg_uri("custom_field") is None
665 True
667 Notes
668 -----
669 The returned URI is a PURL that redirects to the canonical EMG ontology
670 entry. These URIs are suitable for use in RDF/OWL ontologies and
671 semantic web applications.
672 """
673 emg_id = get_emg_id(field_name)
674 if emg_id is None:
675 return None
677 # Construct the full PURL with version
678 return f"https://purls.helmholtz-metadaten.de/emg/{EMG_VERSION}/{emg_id}"
681def get_all_mapped_fields() -> list[str]:
682 """
683 Get a list of all fields with NexusLIMS mappings.
685 Returns a sorted list of all internal field names that have entries
686 in the NEXUSLIMS_TO_EMG_MAPPINGS dictionary.
688 Returns
689 -------
690 list[str]
691 Sorted list of field names with mappings
693 Examples
694 --------
695 >>> fields = get_all_mapped_fields()
696 >>> "acceleration_voltage" in fields
697 True
698 >>> len(fields) > 0
699 True
700 """
701 return sorted(NEXUSLIMS_TO_EMG_MAPPINGS.keys())
704def get_fields_with_emg_ids() -> list[str]:
705 """
706 Get a list of fields that have EM Glossary ID mappings.
708 Returns only fields with actual EMG IDs (non-None values), excluding
709 fields that have display names but no EMG equivalents.
711 Returns
712 -------
713 list[str]
714 Sorted list of field names with EMG IDs
716 Examples
717 --------
718 >>> fields = get_fields_with_emg_ids()
719 >>> "acceleration_voltage" in fields
720 True
721 >>> "magnification" in fields # Has display name but no EMG ID
722 False
723 """
724 return sorted([field for field in NEXUSLIMS_TO_EMG_MAPPINGS if has_emg_id(field)])
727def get_all_emg_terms() -> Dict[str, Dict[str, str]]:
728 """
729 Get all EM Glossary terms from the OWL file.
731 Returns the complete mapping of EMG IDs to labels and definitions
732 loaded from the ontology. Useful for discovering available EMG terms.
734 Returns
735 -------
736 dict[str, dict[str, str]]
737 Mapping from EMG_ID -> {'label': str, 'definition': str | None}
739 Examples
740 --------
741 >>> terms = get_all_emg_terms()
742 >>> "EMG_00000004" in terms
743 True
744 >>> terms["EMG_00000004"]["label"]
745 'Acceleration Voltage'
746 >>> print(terms["EMG_00000004"]["definition"])
747 The potential difference between anode and cathode.
748 """
749 try:
750 return _load_emg_terms()
751 except Exception:
752 _logger.exception("Failed to load EMG ontology")
753 return {}