"""
EM Glossary field name mappings for NexusLIMS metadata.
This module provides mappings between NexusLIMS internal field names, display names,
and EM Glossary (EMG) standardized terminology. The EM Glossary is a community-driven
ontology for electron microscopy metadata maintained by the Helmholtz Metadata
Collaboration.
The module uses RDFLib to parse the EM Glossary OWL ontology file, providing access
to term labels, definitions, and the full semantic structure.
**EM Glossary Version:** v2.0.0
**References:**
- EM Glossary v2.0.0: [https://purls.helmholtz-metadaten.de/emg/v2.0.0/](https://purls.helmholtz-metadaten.de/emg/v2.0.0/)
- OWL Ontology: Shipped with NexusLIMS at
`nexusLIMS/schemas/references/em_glossary_2.0.owl`
- License: CC BY 4.0 [https://creativecommons.org/licenses/by/4.0/](https://creativecommons.org/licenses/by/4.0/)
The mappings in this module enable:
- Standardized field names across instruments and vendors
- Cross-reference to EM Glossary IDs for semantic interoperability
- Human-readable display names for XML output
- Dynamic loading from the OWL ontology using [RDFLib](https://rdflib.readthedocs.io/en/stable/index.html)
Examples
--------
Get EM Glossary ID for a field:
>>> from nexusLIMS.schemas.em_glossary import get_emg_id
>>> get_emg_id("acceleration_voltage")
'EMG_00000004'
Get display name for XML:
>>> from nexusLIMS.schemas.em_glossary import get_display_name
>>> get_display_name("acceleration_voltage")
'Voltage'
Get EMG label from ID:
>>> from nexusLIMS.schemas.em_glossary import get_emg_label
>>> get_emg_label("EMG_00000004")
'Acceleration Voltage'
Get EMG definition:
>>> from nexusLIMS.schemas.em_glossary import get_emg_definition
>>> defn = get_emg_definition("EMG_00000004")
>>> print(defn)
The potential difference between anode and cathode.
Check if field has EMG mapping:
>>> from nexusLIMS.schemas.em_glossary import has_emg_id
>>> has_emg_id("acceleration_voltage")
True
>>> has_emg_id("custom_vendor_field")
False
"""
import logging
from functools import lru_cache
from pathlib import Path
from typing import Dict
from rdflib import RDF, RDFS, Graph, Namespace
_logger = logging.getLogger(__name__)
EMG_OWL_PATH = Path(__file__).parent / "references" / "em_glossary_2.0.owl"
"""Path to the EM Glossary OWL file shipped with NexusLIMS"""
EMG_VERSION = "v2.0.0"
"""Version of the packaged EM Glossary OWL file"""
EMG = Namespace("https://purls.helmholtz-metadaten.de/emg/")
"""RDF Namespace for the EM Glossary"""
OBO = Namespace("http://purl.obolibrary.org/obo/")
"""RDF Namespace for OBO"""
@lru_cache(maxsize=1)
def _load_emg_graph() -> Graph:
"""
Load the EM Glossary ontology RDF graph.
Parses the OWL/RDF file and returns an RDFLib Graph object.
Results are cached for performance.
Returns
-------
rdflib.Graph
The parsed RDF graph
Raises
------
FileNotFoundError
If the OWL file cannot be found
ValueError
If the OWL file cannot be parsed
"""
if not EMG_OWL_PATH.exists():
msg = f"EM Glossary OWL file not found at {EMG_OWL_PATH}"
raise FileNotFoundError(msg)
try:
g = Graph()
g.parse(EMG_OWL_PATH, format="xml")
_logger.debug("Loaded EM Glossary ontology from %s", EMG_OWL_PATH)
_logger.debug("Graph contains %s triples", len(g))
except Exception as e:
msg = f"Failed to parse EM Glossary OWL file: {e}"
raise ValueError(msg) from e
return g
@lru_cache(maxsize=1)
def _load_emg_terms() -> Dict[str, Dict[str, str]]:
"""
Load EM Glossary terms with labels and definitions.
Extracts all EMG terms from the ontology graph with their labels
and definitions (if available).
Returns
-------
dict[str, dict[str, str]]
Mapping from EMG_ID -> {'label': str, 'definition': str | None}
Examples
--------
>>> terms = _load_emg_terms()
>>> terms['EMG_00000004']['label']
'Acceleration Voltage'
"""
g = _load_emg_graph()
emg_terms = {}
# Query for all EMG Class URIs with labels
for s in g.subjects(RDF.type, None):
uri_str = str(s)
if not uri_str.startswith(str(EMG)):
continue
# Extract EMG ID from URI
emg_id = uri_str.split("/")[-1]
if not emg_id.startswith("EMG_"):
continue
# Get label
label = None
for o in g.objects(s, RDFS.label):
label = str(o)
break # Take first label
if label is None:
continue
# Get definition (IAO_0000115 is the standard definition property)
definition = None
for o in g.objects(s, OBO.IAO_0000115):
definition = str(o)
break # Take first definition
emg_terms[emg_id] = {
"label": label,
"definition": definition,
}
if not emg_terms:
msg = "No EMG terms found in OWL file. File may be corrupted."
raise ValueError(msg)
_logger.debug("Loaded %s EMG terms from ontology", len(emg_terms))
return emg_terms
# Mapping from NexusLIMS internal field names to EM Glossary terms
# Format: internal_field_name -> (display_name, emg_label or None, description)
# The emg_label is used to look up the EMG_ID from the OWL file
NEXUSLIMS_TO_EMG_MAPPINGS: Dict[str, tuple[str, str | None, str]] = {
# Core acquisition parameters (common to all types)
"creation_time": (
"Creation Time",
None, # No specific EMG term for timestamp
"ISO-8601 timestamp with timezone",
),
"data_type": (
"Data Type",
None, # Descriptive field, not in EMG
"Human-readable data type description",
),
"dataset_type": (
"DatasetType",
None, # Schema-defined category
"Schema-defined dataset category",
),
# Image acquisition parameters (SEM/TEM/STEM)
"acceleration_voltage": (
"Acceleration Voltage",
"Acceleration Voltage", # EMG label
"Accelerating voltage of the electron/ion beam",
),
"working_distance": (
"Working Distance",
"Working Distance", # EMG label
"Distance between final lens and sample surface",
),
"beam_current": (
"Beam Current",
"Beam Current", # EMG label
"Electron beam current",
),
"emission_current": (
"Emission Current",
"Emission Current", # EMG label
"Emission current from electron source",
),
"dwell_time": (
"Pixel Dwell Time",
"Dwell Time", # EMG label
"Time the beam dwells on each pixel during scanning",
),
"magnification": (
"Magnification",
None, # EMG has Magnification but it's complex
"Nominal magnification",
),
"horizontal_field_width": (
"Horizontal Field Width",
None, # Not in EMG v2.0.0
"Width of the scanned area",
),
"vertical_field_width": (
"Vertical Field Width",
None, # Not in EMG v2.0.0
"Height of the scanned area",
),
"pixel_width": (
"Pixel Width",
None, # Not in EMG v2.0.0
"Physical width of a single pixel",
),
"pixel_height": (
"Pixel Height",
None, # Not in EMG v2.0.0
"Physical height of a single pixel",
),
"scan_rotation": (
"Scan Rotation",
None, # Not in EMG v2.0.0
"Rotation angle of the scan frame",
),
# Detector information
"detector_type": (
"Detector",
None, # EMG has detector concepts but not simple type field
"Type or name of detector used",
),
"acquisition_device": (
"Acquisition Device",
None, # Similar to detector_type
"Name of the acquisition device or camera",
),
# Stage position (common to SEM/TEM)
"stage_x": (
"Stage X",
None, # Part of complex stage position concept
"Stage X coordinate",
),
"stage_y": (
"Stage Y",
None, # Part of complex stage position concept
"Stage Y coordinate",
),
"stage_z": (
"Stage Z",
None, # Part of complex stage position concept
"Stage Z coordinate",
),
"stage_tilt": (
"Stage Tilt",
None, # Part of complex stage position concept
"Stage tilt angle (alpha)",
),
"stage_rotation": (
"Stage Rotation",
None, # Part of complex stage position concept
"Stage rotation angle",
),
"stage_alpha": (
"Stage Alpha",
None, # Part of complex stage position concept
"Stage alpha tilt angle",
),
"stage_beta": (
"Stage Beta",
None, # Part of complex stage position concept
"Stage beta tilt angle",
),
# Spectrum acquisition parameters (EDS/EELS)
"acquisition_time": (
"Acquisition Time",
"Acquisition Time", # EMG label
"Total time for spectrum acquisition",
),
"live_time": (
"Live Time",
None, # Not in EMG v2.0.0
"Live time (excludes dead time) for spectrum acquisition",
),
"detector_energy_resolution": (
"Energy Resolution",
None, # Not in EMG v2.0.0
"Energy resolution of the detector",
),
"channel_size": (
"Channel Size",
None, # Not in EMG v2.0.0
"Energy width of each channel",
),
"starting_energy": (
"Starting Energy",
None, # Not in EMG v2.0.0
"Starting energy of the spectrum",
),
"azimuthal_angle": (
"Azimuthal Angle",
None, # Not in EMG v2.0.0
"Azimuthal angle of the detector",
),
"elevation_angle": (
"Elevation Angle",
None, # Not in EMG v2.0.0
"Elevation angle of the detector",
),
"takeoff_angle": (
"Takeoff Angle",
None, # Not in EMG v2.0.0
"X-ray takeoff angle",
),
# Diffraction parameters (TEM)
"camera_length": (
"Camera Length",
"Camera Length", # EMG label
"Camera length for diffraction pattern",
),
"convergence_angle": (
"Convergence Angle",
"Convergence Angle", # EMG label
"Convergence angle of the electron beam",
),
"illumination_mode": (
"Illumination Mode",
None, # Not in EMG v2.0.0
"TEM illumination mode (TEM, STEM, Diffraction, etc.)",
),
# Sample/metadata
"specimen": (
"Specimen",
None, # EMG has Specimen but it's complex
"Sample or specimen description",
),
"operator": (
"Operator",
None, # Not in EMG (user information)
"User who acquired the data",
),
# Environmental parameters
"temperature": (
"Temperature",
None, # Not in EMG v2.0.0
"Sample or chamber temperature",
),
"pressure": (
"Pressure",
None, # Not in EMG v2.0.0
"Chamber pressure",
),
"chamber_pressure": (
"Chamber Pressure",
None, # Not in EMG v2.0.0
"Vacuum chamber pressure",
),
# Data dimensions
"data_dimensions": (
"Data Dimensions",
None, # Not a measurement, structural metadata
"String representation of data shape",
),
# Instrument identification
"instrument_id": (
"Instrument ID",
None, # Not in EMG (internal NexusLIMS identifier)
"NexusLIMS persistent instrument identifier",
),
}
"""Mapping from NexusLIMS internal field names to EM Glossary terms
Format: `internal_field_name -> (display_name, emg_label or None, description)`
The emg_label is used to look up the EMG_ID from the OWL file"""
[docs]
def get_emg_label(emg_id: str) -> str | None:
"""
Get the EM Glossary label for an EMG ID.
Looks up the human-readable label from the OWL ontology file.
Parameters
----------
emg_id : str
EM Glossary ID (e.g., "EMG_00000004")
Returns
-------
str or None
EMG label, or None if ID not found
Examples
--------
>>> get_emg_label("EMG_00000004")
'Acceleration Voltage'
>>> get_emg_label("EMG_00000050")
'Working Distance'
>>> get_emg_label("EMG_99999999") is None
True
"""
try:
emg_terms = _load_emg_terms()
term_info = emg_terms.get(emg_id)
return term_info["label"] if term_info else None
except Exception as e:
_logger.warning("Failed to load EMG ontology: %s", e)
return None
[docs]
def get_emg_definition(emg_id: str) -> str | None:
"""
Get the EM Glossary definition for an EMG ID.
Looks up the formal definition from the OWL ontology file.
Parameters
----------
emg_id : str
EM Glossary ID (e.g., "EMG_00000004")
Returns
-------
str or None
EMG definition, or None if ID not found or no definition available
Examples
--------
>>> defn = get_emg_definition("EMG_00000004")
>>> print(defn)
The potential difference between anode and cathode.
>>> get_emg_definition("EMG_99999999") is None
True
"""
try:
emg_terms = _load_emg_terms()
term_info = emg_terms.get(emg_id)
return term_info["definition"] if term_info else None
except Exception as e:
_logger.warning("Failed to load EMG ontology: %s", e)
return None
[docs]
def get_emg_id(field_name: str) -> str | None:
"""
Get the EM Glossary ID for a NexusLIMS field name.
Looks up the field in NEXUSLIMS_TO_EMG_MAPPINGS, then resolves the
EMG label to an ID from the OWL ontology.
Parameters
----------
field_name : str
Internal field name (e.g., "acceleration_voltage")
Returns
-------
str or None
EM Glossary ID string (e.g., "EMG_00000004"), or None if not mapped
Examples
--------
>>> get_emg_id("acceleration_voltage")
'EMG_00000004'
>>> get_emg_id("working_distance")
'EMG_00000050'
>>> get_emg_id("custom_field") is None
True
Notes
-----
Not all NexusLIMS fields have EM Glossary equivalents. This is expected
as EMG is a growing ontology and some fields are vendor-specific or
outside the scope of EMG's current coverage (v2.0.0).
"""
mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name)
if mapping is None or mapping[1] is None:
return None
emg_label = mapping[1]
# Look up the EMG ID from the label
try:
emg_terms = _load_emg_terms()
# Reverse lookup: label -> ID
for emg_id, term_info in emg_terms.items():
if term_info["label"] == emg_label:
return emg_id
except Exception as e:
_logger.warning("Failed to load EMG ontology: %s", e)
return None
_logger.debug("EMG label '%s' not found in ontology", emg_label)
return None
[docs]
def get_display_name(field_name: str) -> str:
"""
Get the human-readable display name for a field.
Returns the display name used in XML output and user-facing documentation.
If the field is not in the mapping, returns a title-cased version of the
field name with underscores replaced by spaces.
Parameters
----------
field_name : str
Internal field name (e.g., "acceleration_voltage")
Returns
-------
str
Display name for the field
Examples
--------
>>> get_display_name("acceleration_voltage")
'Voltage'
>>> get_display_name("working_distance")
'Working Distance'
>>> get_display_name("custom_field")
'Custom Field'
Notes
-----
For unmapped fields, the function applies a simple transformation:
replace underscores with spaces and title-case the result. This ensures
all fields have reasonable display names even without explicit mappings.
"""
mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name)
if mapping is not None:
return mapping[0] # Return display name (first element of tuple)
# Fallback: convert field_name to Title Case
return field_name.replace("_", " ").title()
[docs]
def get_description(field_name: str) -> str | None:
"""
Get the NexusLIMS description for a field.
Returns a brief description of what the field represents from the
NexusLIMS mappings. For EMG formal definitions, use get_emg_definition().
Parameters
----------
field_name : str
Internal field name (e.g., "acceleration_voltage")
Returns
-------
str or None
Field description, or None if not mapped
Examples
--------
>>> desc = get_description("acceleration_voltage")
>>> print(desc)
Accelerating voltage of the electron/ion beam
>>> get_description("unknown_field") is None
True
"""
mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name)
if mapping is None:
return None
return mapping[2] # Return description (third element of tuple)
[docs]
def has_emg_id(field_name: str) -> bool:
"""
Check if a field has an EM Glossary ID mapping.
Returns True if the field has a corresponding EMG ID in v2.0.0, False otherwise.
This is useful for determining whether semantic annotations are available.
Parameters
----------
field_name : str
Internal field name (e.g., "acceleration_voltage")
Returns
-------
bool
True if field has EMG ID, False otherwise
Examples
--------
>>> has_emg_id("acceleration_voltage")
True
>>> has_emg_id("magnification")
False
>>> has_emg_id("custom_field")
False
"""
emg_id = get_emg_id(field_name)
return emg_id is not None
[docs]
def get_emg_uri(field_name: str) -> str | None:
"""
Get the full EM Glossary URI for a field.
Returns the complete PURL (Persistent URL) for the field's EM Glossary
v2.0.0 entry. This enables Tier 3 semantic web integration and linkage to
the full EMG ontology.
Parameters
----------
field_name : str
Internal field name (e.g., "acceleration_voltage")
Returns
-------
str or None
Full EMG PURL, or None if field has no EMG ID
Examples
--------
>>> get_emg_uri("acceleration_voltage")
'https://purls.helmholtz-metadaten.de/emg/v2.0.0/EMG_00000004'
>>> get_emg_uri("working_distance")
'https://purls.helmholtz-metadaten.de/emg/v2.0.0/EMG_00000050'
>>> get_emg_uri("custom_field") is None
True
Notes
-----
The returned URI is a PURL that redirects to the canonical EMG ontology
entry. These URIs are suitable for use in RDF/OWL ontologies and
semantic web applications.
"""
emg_id = get_emg_id(field_name)
if emg_id is None:
return None
# Construct the full PURL with version
return f"https://purls.helmholtz-metadaten.de/emg/{EMG_VERSION}/{emg_id}"
[docs]
def get_all_mapped_fields() -> list[str]:
"""
Get a list of all fields with NexusLIMS mappings.
Returns a sorted list of all internal field names that have entries
in the NEXUSLIMS_TO_EMG_MAPPINGS dictionary.
Returns
-------
list[str]
Sorted list of field names with mappings
Examples
--------
>>> fields = get_all_mapped_fields()
>>> "acceleration_voltage" in fields
True
>>> len(fields) > 0
True
"""
return sorted(NEXUSLIMS_TO_EMG_MAPPINGS.keys())
[docs]
def get_fields_with_emg_ids() -> list[str]:
"""
Get a list of fields that have EM Glossary ID mappings.
Returns only fields with actual EMG IDs (non-None values), excluding
fields that have display names but no EMG equivalents.
Returns
-------
list[str]
Sorted list of field names with EMG IDs
Examples
--------
>>> fields = get_fields_with_emg_ids()
>>> "acceleration_voltage" in fields
True
>>> "magnification" in fields # Has display name but no EMG ID
False
"""
return sorted([field for field in NEXUSLIMS_TO_EMG_MAPPINGS if has_emg_id(field)])
[docs]
def get_all_emg_terms() -> Dict[str, Dict[str, str]]:
"""
Get all EM Glossary terms from the OWL file.
Returns the complete mapping of EMG IDs to labels and definitions
loaded from the ontology. Useful for discovering available EMG terms.
Returns
-------
dict[str, dict[str, str]]
Mapping from EMG_ID -> {'label': str, 'definition': str | None}
Examples
--------
>>> terms = get_all_emg_terms()
>>> "EMG_00000004" in terms
True
>>> terms["EMG_00000004"]["label"]
'Acceleration Voltage'
>>> print(terms["EMG_00000004"]["definition"])
The potential difference between anode and cathode.
"""
try:
return _load_emg_terms()
except Exception:
_logger.exception("Failed to load EMG ontology")
return {}