Source code for nexusLIMS.extractors.plugins.edax

"""EDAX EDS spectrum (.spc/.msa) extractor plugin."""

import contextlib
import logging
from typing import Any, ClassVar

from hyperspy.io import load

from nexusLIMS.extractors.base import ExtractionContext
from nexusLIMS.extractors.utils import _set_instr_name_and_time, add_to_extensions
from nexusLIMS.instruments import get_instr_from_filepath
from nexusLIMS.schemas.units import ureg
from nexusLIMS.utils.dicts import try_getting_dict_value

_logger = logging.getLogger(__name__)


[docs] class SpcExtractor: """ Extractor for EDAX .spc files. This extractor handles metadata extraction from .spc files saved by EDAX EDS software (Genesis, TEAM, etc.). """ name = "spc_extractor" priority = 100 supported_extensions: ClassVar = {"spc"}
[docs] def supports(self, context: ExtractionContext) -> bool: """ Check if this extractor supports the given file. Parameters ---------- context The extraction context containing file information Returns ------- bool True if file extension is .spc """ extension = context.file_path.suffix.lower().lstrip(".") return extension == "spc"
[docs] def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: """ Extract metadata from a .spc file. Returns the metadata (as a list of dicts) from a .spc file. This type of file is produced by EDAX EDS software. It is read by HyperSpy's file reader and relevant metadata extracted and returned Parameters ---------- context The extraction context containing file information Returns ------- list[dict] List containing a single metadata dict with 'nx_meta' key. If None, the file could not be opened """ filename = context.file_path _logger.debug("Extracting metadata from SPC file: %s", filename) mdict = {"nx_meta": {}} # assume all .spc datasets are EDS single spectra mdict["nx_meta"]["DatasetType"] = "Spectrum" mdict["nx_meta"]["Data Type"] = "EDS_Spectrum" _set_instr_name_and_time(mdict, filename) s = load(filename, lazy=True) # original_metadata puts the entire xml under the root node "spc_header", # so this will just bump that all up to the root level for ease of use. mdict["original_metadata"] = s.original_metadata["spc_header"].as_dictionary() # Map input field names to (output_name, unit) tuples # If unit is None, value is stored as-is; otherwise, create Pint Quantity term_mapping = { "azimuth": ("Azimuthal Angle", "degree"), "liveTime": ("Live Time", "second"), "detReso": ("Detector Energy Resolution", "electron_volt"), "elevation": ("Elevation Angle", "degree"), "evPerChan": ("Channel Size", "electron_volt"), "kV": ("Accelerating Voltage", "kilovolt"), "numPts": ("Number of Spectrum Channels", None), "startEnergy": ("Starting Energy", "kiloelectron_volt"), "endEnergy": ("Ending Energy", "kiloelectron_volt"), "tilt": ("Stage Tilt", "degree"), } for in_term, (out_name, unit) in term_mapping.items(): val = try_getting_dict_value(mdict["original_metadata"], in_term) if val is not None: if unit is not None: with contextlib.suppress(ValueError, TypeError): val = ureg.Quantity(val, unit) mdict["nx_meta"][out_name] = val # add any elements present: if "Sample" in s.metadata and "elements" in s.metadata.Sample: mdict["nx_meta"]["Elements"] = s.metadata.Sample.elements # Move vendor-specific fields to extensions mdict = self._migrate_to_schema_compliant_metadata(mdict) return [mdict]
def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: """ Migrate metadata to schema-compliant format. Moves EDAX-specific fields to extensions section. Parameters ---------- mdict Metadata dictionary with nx_meta containing extracted fields Returns ------- dict Metadata dictionary with schema-compliant nx_meta structure """ nx_meta = mdict.get("nx_meta", {}) extensions = {} # These EDAX-specific fields go to extensions vendor_fields = { "Azimuthal Angle", "Live Time", "Detector Energy Resolution", "Elevation Angle", "Channel Size", "Accelerating Voltage", "Number of Spectrum Channels", "Starting Energy", "Ending Energy", "Stage Tilt", "Elements", } # Build new nx_meta with core fields only new_nx_meta = {} for field in [ "DatasetType", "Data Type", "Creation Time", "Instrument ID", "warnings", ]: if field in nx_meta: new_nx_meta[field] = nx_meta[field] # Move vendor fields to extensions for field_name, value in nx_meta.items(): if field_name in vendor_fields: extensions[field_name] = value elif field_name not in new_nx_meta: # Any other unknown fields also go to extensions extensions[field_name] = value # Add extensions if we have any for key, value in extensions.items(): add_to_extensions(new_nx_meta, key, value) mdict["nx_meta"] = new_nx_meta return mdict
[docs] class MsaExtractor: """ Extractor for EMSA/MAS .msa spectrum files. This extractor handles metadata extraction from .msa files, which may be saved by various EDS acquisition software packages, most commonly as exports from EDAX or Oxford software. """ name = "msa_extractor" priority = 100 supported_extensions: ClassVar = {"msa"}
[docs] def supports(self, context: ExtractionContext) -> bool: """ Check if this extractor supports the given file. Parameters ---------- context The extraction context containing file information Returns ------- bool True if file extension is .msa """ extension = context.file_path.suffix.lower().lstrip(".") return extension == "msa"
[docs] def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: """ Extract metadata from an .msa file. Returns the metadata (as a list of dicts) from an .msa spectrum file. This file may be saved by a number of different EDS acquisition software, but most often is produced as an export from EDAX or Oxford software. This format is a standard, but vendors (such as EDAX) often add other values into the metadata header. See https://www.microscopy.org/resources/scientific_data/ for the fomal specification. Parameters ---------- context The extraction context containing file information Returns ------- list[dict] List containing a single metadata dict with 'nx_meta' key. If None, the file could not be opened """ filename = context.file_path _logger.debug("Extracting metadata from MSA file: %s", filename) s = load(filename, lazy=False) mdict = {"nx_meta": {}} mdict["original_metadata"] = s.original_metadata.as_dictionary() # assume all .spc datasets are EDS single spectra mdict["nx_meta"]["DatasetType"] = "Spectrum" mdict["nx_meta"]["Data Type"] = "EDS_Spectrum" _set_instr_name_and_time(mdict, filename) # Map input field names to (output_name, unit) tuples # If unit is None, value is stored as-is; otherwise, create Pint Quantity term_mapping = { "AZIMANGLE-dg": ("Azimuthal Angle", "degree"), "AmpTime (usec)": ("Amplifier Time", "microsecond"), "Analyzer Type": ("Analyzer Type", None), "BEAMKV -kV": ("Beam Energy", "kiloelectron_volt"), "CHOFFSET": ("Channel Offset", None), "COMMENT": ("EDAX Comment", None), "DATATYPE": ("Data Format", None), "DATE": ("EDAX Date", None), "ELEVANGLE-dg": ("Elevation Angle", "degree"), "Elements": ("User-Selected Elements", None), "FILENAME": ("Originating File of MSA Export", None), "FORMAT": ("File Format", None), "FPGA Version": ("FPGA Version", None), "LIVETIME -s": ("Live Time", "second"), "NCOLUMNS": ("Number of Data Columns", None), "NPOINTS": ("Number of Data Points", None), "OFFSET": ("Offset", None), "OWNER": ("EDAX Owner", None), "REALTIME -s": ("Real Time", "second"), "RESO (MnKa)": ("Energy Resolution", "electron_volt"), "SIGNALTYPE": ("Signal Type", None), "TACTYLR -cm": ("Active Layer Thickness", "centimeter"), "TBEWIND -cm": ("Be Window Thickness", "centimeter"), "TDEADLYR -cm": ("Dead Layer Thickness", "centimeter"), "TIME": ("EDAX Time", None), "TITLE": ("EDAX Title", None), "TakeOff Angle": ("TakeOff Angle", "degree"), "Tilt Angle": ("Stage Tilt", "degree"), "VERSION": ("MSA Format Version", None), "XLABEL": ("X Column Label", None), "XPERCHAN": ("X Units Per Channel", None), "XUNITS": ("X Column Units", None), "YLABEL": ("Y Column Label", None), "YUNITS": ("Y Column Units", None), } for in_term, (out_name, unit) in term_mapping.items(): val = try_getting_dict_value(mdict["original_metadata"], in_term) if val is not None: if unit is not None: with contextlib.suppress(ValueError, TypeError): val = ureg.Quantity(val, unit) mdict["nx_meta"][out_name] = val # Move vendor-specific fields to extensions mdict = self._migrate_to_schema_compliant_metadata(mdict) return [mdict]
def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: """ Migrate metadata to schema-compliant format. Moves EDAX/EMSA-specific fields to extensions section. Parameters ---------- mdict Metadata dictionary with nx_meta containing extracted fields Returns ------- dict Metadata dictionary with schema-compliant nx_meta structure """ nx_meta = mdict.get("nx_meta", {}) extensions = {} # These EDAX/EMSA-specific fields go to extensions vendor_fields = { "Azimuthal Angle", "Live Time", "Detector Energy Resolution", "Elevation Angle", "Channel Size", "Accelerating Voltage", "Number of Spectrum Channels", "Starting Energy", "Ending Energy", "Stage Tilt", "Elements", } # Build new nx_meta with core fields only new_nx_meta = {} for field in [ "DatasetType", "Data Type", "Creation Time", "Instrument ID", "warnings", ]: if field in nx_meta: new_nx_meta[field] = nx_meta[field] # Move vendor fields to extensions for field_name, value in nx_meta.items(): if field_name in vendor_fields: extensions[field_name] = value elif field_name not in new_nx_meta: # Any other unknown fields also go to extensions extensions[field_name] = value # Add extensions if we have any for key, value in extensions.items(): add_to_extensions(new_nx_meta, key, value) mdict["nx_meta"] = new_nx_meta return mdict
# Backward compatibility functions for tests
[docs] def get_spc_metadata(filename): """ Get metadata from a .spc file. .. deprecated:: 1.4.0 This function is deprecated. Use :class:`SpcExtractor` class instead. Parameters ---------- filename : pathlib.Path path to a file saved in the harvested directory of the instrument Returns ------- mdict : dict A description of the file's metadata. """ context = ExtractionContext( file_path=filename, instrument=get_instr_from_filepath(filename) ) extractor = SpcExtractor() return extractor.extract(context)
[docs] def get_msa_metadata(filename): """ Get metadata from an .msa file. .. deprecated:: 1.4.0 This function is deprecated. Use :class:`MsaExtractor` class instead. Parameters ---------- filename : pathlib.Path path to a file saved in the harvested directory of the instrument Returns ------- mdict : dict A description of the file's metadata. """ context = ExtractionContext( file_path=filename, instrument=get_instr_from_filepath(filename) ) extractor = MsaExtractor() return extractor.extract(context)