Source code for nexusLIMS.extractors.plugins.orion_HIM_tif

# ruff: noqa: S314, N817, FBT003
"""Zeiss Orion/Fibics TIFF extractor plugin."""

import logging
import xml.etree.ElementTree as ET
from decimal import Decimal
from pathlib import Path
from typing import Any, ClassVar

from PIL import Image

from nexusLIMS.extractors.base import ExtractionContext
from nexusLIMS.extractors.base import FieldDefinition as FD
from nexusLIMS.extractors.utils import _set_instr_name_and_time, add_to_extensions
from nexusLIMS.schemas import em_glossary
from nexusLIMS.schemas.units import ureg
from nexusLIMS.utils.dicts import set_nested_dict_value, sort_dict

ZEISS_TIFF_TAG = 65000
"""
TIFF tag ID where Zeiss Orion stores XML metadata in TIFF files.
The tag contains serialized XML with an <ImageTags> root element
that holds instrument configuration, beam parameters, stage position,
detector settings, and other acquisition metadata.
"""

FIBICS_TIFF_TAG = 51023
"""
TIFF tag ID where Fibics helium ion microscope stores XML metadata in TIFF files.
The tag contains serialized XML with a <Fibics> root element that holds
application info, image data, scan parameters, stage position, beam info,
and detector settings.
"""

_logger = logging.getLogger(__name__)


[docs] class OrionTiffExtractor: """ Extractor for Zeiss Orion and Fibics helium ion microscope TIFF files. This extractor handles metadata extraction from .tif files saved by Zeiss Orion and Fibics helium ion microscopes (HIM). These files contain embedded XML metadata in custom TIFF tags: - Zeiss: TIFF tag 65000 with <ImageTags> XML - Fibics: TIFF tag 51023 with <Fibics> XML """ name = "orion_HIM_tif_extractor" priority = 150 # Higher than QuantaTiffExtractor (100) to handle Orion TIFFs first supported_extensions: ClassVar = { "tif", "tiff", } # Uses content sniffing in supports() to detect variant
[docs] def supports(self, context: ExtractionContext) -> bool: """ Check if this extractor supports the given file. Uses content sniffing to detect Zeiss/Fibics TIFF files by checking for the presence of custom TIFF tags containing XML metadata. Parameters ---------- context The extraction context containing file information Returns ------- bool True if file is a Zeiss Orion or Fibics TIFF file """ # File must exist to check TIFF tags if not context.file_path.exists(): _logger.warning("File does not exist: %s", context.file_path) return False try: with Image.open(context.file_path) as img: variant = self._detect_variant(img) return variant is not None except Exception as e: _logger.warning("Error checking TIFF tags for %s: %s", context.file_path, e) return False
[docs] def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: """ Extract metadata from a Zeiss Orion or Fibics TIFF file. Parameters ---------- context The extraction context containing file information Returns ------- list[dict] List containing a single metadata dict with 'nx_meta' key """ filename = context.file_path _logger.debug("Extracting metadata from Zeiss/Fibics TIFF file: %s", filename) mdict = {"nx_meta": {}} mdict["nx_meta"]["DatasetType"] = "Image" mdict["nx_meta"]["Data Type"] = "HIM_Imaging" try: _set_instr_name_and_time(mdict, filename) with Image.open(filename) as img: # Detect which variant we have variant = self._detect_variant(img) if variant == "zeiss": xml_data = img.tag_v2[ZEISS_TIFF_TAG] root = ET.fromstring(xml_data) mdict = self._extract_zeiss_metadata(root, img, filename, mdict) elif variant == "fibics": xml_data = img.tag_v2[FIBICS_TIFF_TAG] root = ET.fromstring(xml_data) mdict = self._extract_fibics_metadata(root, img, filename, mdict) else: _logger.warning( "Could not detect Zeiss/Fibics variant for %s", filename ) mdict["nx_meta"]["Data Type"] = "Unknown" mdict["nx_meta"]["Extractor Warnings"] = ( "Could not detect Zeiss/Fibics variant" ) except Exception as e: _logger.exception("Error extracting metadata from %s", filename) mdict["nx_meta"]["Data Type"] = "Unknown" mdict["nx_meta"]["Extractor Warnings"] = f"Extraction failed: {e}" # Migrate metadata to schema-compliant format mdict = self._migrate_to_schema_compliant_metadata(mdict) # Sort the nx_meta dictionary for nicer display mdict["nx_meta"] = sort_dict(mdict["nx_meta"]) return [mdict]
def _detect_variant(self, img: Image.Image) -> str | None: """ Detect whether this is a Zeiss or Fibics TIFF file. Parameters ---------- img PIL Image object Returns ------- str | None "zeiss", "fibics", or None if neither detected """ if ZEISS_TIFF_TAG in img.tag_v2: xml_data = img.tag_v2[ZEISS_TIFF_TAG] try: root = ET.fromstring(xml_data) if root.tag == "ImageTags" or "ImageTags" in root.tag: return "zeiss" except ET.ParseError as e: _logger.warning("Failed to parse Zeiss XML from TIFF tag: %s", e) if FIBICS_TIFF_TAG in img.tag_v2: xml_data = img.tag_v2[FIBICS_TIFF_TAG] try: root = ET.fromstring(xml_data) if root.tag == "Fibics" or "Fibics" in root.tag: return "fibics" except ET.ParseError as e: _logger.warning("Failed to parse Fibics XML from TIFF tag: %s", e) return None def _extract_zeiss_metadata( self, root: ET.Element, img: Image.Image, filename: Path, # noqa: ARG002 mdict: dict, ) -> dict: """ Extract metadata from Zeiss Orion XML format. Parameters ---------- root XML root element img PIL Image object filename Path to the file mdict Metadata dictionary to update Returns ------- dict Updated metadata dictionary """ # Parse Zeiss XML structure # <ImageTags> contains nested sections with Value/Units pairs # Set image dimensions width, height = img.size set_nested_dict_value( mdict, ["nx_meta", "Data Dimensions"], str((width, height)) ) # Define metadata fields using FieldDefinition # Note: XML stores values in Volts, we convert to target units fields = [ # GFIS FD( "", "GFIS.AccelerationVoltage", ["GFIS", "Acceleration Voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "GFIS.ExtractionVoltage", ["GFIS", "Extraction Voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "GFIS.CondenserVoltage", ["GFIS", "Condenser Voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "GFIS.ObjectiveVoltage", ["GFIS", "Objective Voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "GFIS.BeamCurrent", ["GFIS", "Beam Current"], 1, False, target_unit="picoampere", ), FD("", "GFIS.PanX", ["GFIS", "Pan X"], 1, False, target_unit="micrometer"), FD("", "GFIS.PanY", ["GFIS", "Pan Y"], 1, False, target_unit="micrometer"), FD( "", "GFIS.FieldOfView", ["GFIS", "Horizontal Field Width"], 1, False, target_unit="micrometer", ), FD( "", "GFIS.ScanRotation", ["GFIS", "Scan Rotation"], 1, False, target_unit="degree", ), FD( "", "GFIS.StigmationX", ["GFIS", "Stigmation X"], 1, False ), # Dimensionless FD( "", "GFIS.StigmationY", ["GFIS", "Stigmation Y"], 1, False ), # Dimensionless FD( "", "GFIS.ApertureSize", ["GFIS", "Aperture Size"], 1, False, target_unit="micrometer", ), FD( "", "GFIS.ApertureIndex", ["GFIS", "Aperture Index"], 1, False ), # Dimensionless FD("", "GFIS.IonGas", ["GFIS", "Ion Gas"], 1, False), # String FD( "", "GFIS.CrossoverPosition", ["GFIS", "Crossover Position"], 1, False, target_unit="millimeter", ), FD( "", "GFIS.WorkingDistance", ["GFIS", "Working Distance"], 1, False, target_unit="millimeter", ), # Beam FD( "", "AccelerationVoltage", ["acceleration_voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "ExtractionVoltage", ["Beam", "Extraction Voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "BlankerCurrent", ["Beam", "Blanker Current"], 1, False, target_unit="picoampere", ), FD( "", "SampleCurrent", ["Beam", "Sample Current"], 1, False, target_unit="picoampere", ), FD("", "SpotNumber", ["Beam", "Spot Number"], 1, False), # Dimensionless FD( "", "WorkingDistance", ["Beam", "Working Distance"], 1, False, target_unit="millimeter", ), FD( "", "Fov", ["horizontal_field_width"], 1, False, target_unit="micrometer", ), FD("", "PanX", ["Beam", "Pan X"], 1, False, target_unit="micrometer"), FD("", "PanY", ["Beam", "Pan Y"], 1, False, target_unit="micrometer"), FD( "", "StigmationX", ["Beam", "Stigmator X Value"], 1, False ), # Dimensionless FD( "", "StigmationY", ["Beam", "Stigmator Y Value"], 1, False ), # Dimensionless FD( "", "ApertureSize", ["Beam", "Aperture Size"], 1, False ), # Dimensionless (or unknown unit) FD( "", "CrossOverPosition", ["Beam", "Crossover Position"], 1, False, target_unit="millimeter", ), # Scan FD( "", "FrameRetrace", ["Scan", "Frame Retrace"], 1, False, target_unit="microsecond", ), FD( "", "LineRetrace", ["Scan", "Line Retrace"], 1, False, target_unit="microsecond", ), FD("", "AveragingMode", ["Scan", "Averaging Mode"], 1, False), # String FD( "", "NumAverages", ["Scan", "Number of Averages"], 1, False ), # Dimensionless FD("", "ScanRotate", ["scan_rotation"], 1, False, target_unit="degree"), FD( "", "DwellTime", ["Scan", "Dwell Time"], 1, False, target_unit="microsecond", ), FD("", "SAS.ScanSize", ["Scan", "Scan Size"], 1, False), # Dimensionless # Stage FD( "", "StageX", ["Stage Position", "X"], 1, False, target_unit="micrometer", ), FD( "", "StageY", ["Stage Position", "Y"], 1, False, target_unit="micrometer", ), FD( "", "StageZ", ["Stage Position", "Z"], 1, False, target_unit="millimeter", ), FD( "", "StageTilt", ["Stage Position", "Tilt"], 1, False, target_unit="degree", ), FD( "", "StageRotate", ["Stage Position", "Rotation"], 1, False, target_unit="degree", ), FD( "", "Stage.XLocation", ["Stage Position", "X Location"], 1, False, target_unit="micrometer", ), FD( "", "Stage.YLocation", ["Stage Position", "Y Location"], 1, False, target_unit="micrometer", ), # Optics FD( "", "sFimFOV", ["Optics", "sFIM Field of View"], 1, False, target_unit="micrometer", ), FD( "", "McXShift", ["Optics", "MC X Shift"], 1, False, target_unit="microradian", ), FD( "", "McXTilt", ["Optics", "MC X Tilt"], 1, False, target_unit="microradian", ), FD( "", "McYShift", ["Optics", "MC Y Shift"], 1, False, target_unit="microradian", ), FD( "", "McYTilt", ["Optics", "MC Y Tilt"], 1, False, target_unit="microradian", ), FD( "", "ColumnMag", ["Optics", "Column Magnification"], 1, False ), # Dimensionless FD("", "ColumnMode", ["Optics", "Column Mode"], 1, False), # String FD( "", "Lens1Voltage", ["Optics", "Lens 1 Voltage"], 1e-3, False, target_unit="kilovolt", ), FD( "", "Lens2Voltage", ["Optics", "Lens 2 Voltage"], 1e-3, False, target_unit="kilovolt", ), # Detector FD("", "DetectorName", ["Detector", "Name"], 1, False), # String FD( "", "ETGridVoltage", ["Detector", "ET Grid Voltage"], 1, False, target_unit="volt", ), FD( "", "ETContrast", ["Detector", "ET Contrast"], 1, False ), # Dimensionless FD( "", "ETBrightness", ["Detector", "ET Brightness"], 1, False ), # Dimensionless FD( "", "ETImageIntensity", ["Detector", "ET Image Intensity"], 1, False ), # Dimensionless FD( "", "MCPContrast", ["Detector", "MCP Contrast"], 1, False ), # Dimensionless FD( "", "MCPBrightness", ["Detector", "MCP Brightness"], 1, False ), # Dimensionless FD("", "MCPBias", ["Detector", "MCP Bias"], 1, False, target_unit="volt"), FD( "", "MCPImageIntensity", ["Detector", "MCP Image Intensity"], 1, False ), # Dimensionless FD( "", "Detector.Scintillator", ["Detector", "Scintillator"], 1e-3, False, target_unit="kilovolt", ), FD( "", "SampleBiasVoltage", ["Detector", "Sample Bias"], 1, False, target_unit="volt", ), # System FD( "", "GunPressure", ["System", "Gun Pressure"], 1, False, target_unit="torr", ), FD( "", "ColumnPressure", ["System", "Column Pressure"], 1, False, target_unit="torr", ), FD( "", "ChamberPressure", ["System", "Chamber Pressure"], 1, False, target_unit="torr", ), FD( "", "GunTemp", ["System", "Gun Temperature"], 1, False, target_unit="kelvin", ), FD( "", "HeliumPressure", ["System", "Helium Pressure"], 1, False, target_unit="torr", ), FD( "", "Magnification4x5", ["Optics", "Magnification 4x5"], 1, False ), # Dimensionless FD( "", "MagnificationDisplay", ["Optics", "Magnification Display"], 1, False, ), # Dimensionless (x) FD("", "System.Model", ["System", "Model"], 1, False), # String FD("", "System.Name", ["System", "Name"], 1, False), # String FD( "", "TimeStamp", ["System", "Acquisition Date/Time"], 1, False ), # String FD("", "ColumnType", ["System", "Column Type"], 1, False), # String # Flood gun FD("", "FloodGunMode", ["Flood Gun", "Mode"], 1, False), # String FD( "", "FloodGunEnergy", ["Flood Gun", "Energy"], 1, False, target_unit="electron_volt", ), FD( "", "FloodGunTime", ["Flood Gun", "Time"], 1, False, target_unit="microsecond", ), FD( "", "FloodGun.DeflectionX", ["Flood Gun", "Deflection X"], 1, False ), # Dimensionless FD( "", "FloodGun.DeflectionY", ["Flood Gun", "Deflection Y"], 1, False ), # Dimensionless # Misc FD( "", "ScalingX", ["Calibration", "X Scale"], 1, False, target_unit="meter", ), FD( "", "ScalingY", ["Calibration", "Y Scale"], 1, False, target_unit="meter", ), FD( "", "ImageWidth", ["Image", "Width"], 1, False ), # Dimensionless (pixels) FD( "", "ImageHeight", ["Image", "Height"], 1, False ), # Dimensionless (pixels) # Display FD("", "LutMode", ["Display", "LUT Mode"], 1, False), # String FD("", "LowGray", ["Display", "Low Gray Value"], 1, False), # Dimensionless FD( "", "HighGray", ["Display", "High Gray Value"], 1, False ), # Dimensionless FD("", "LUT.LUTGamma", ["Display", "LUT Gamma"], 1, False), # Dimensionless ] # Extract all fields for field in fields: self._parse_zeiss_field( root, field.source_key, field.output_key, mdict, field.factor, field.target_unit, ) return mdict def _extract_fibics_metadata( self, root: ET.Element, img: Image.Image, filename: Path, # noqa: ARG002 mdict: dict, ) -> dict: """ Extract metadata from Fibics XML format. Parameters ---------- root XML root element img PIL Image object filename Path to the file mdict Metadata dictionary to update Returns ------- dict Updated metadata dictionary """ # Set image dimensions width, height = img.size set_nested_dict_value( mdict, ["nx_meta", "Data Dimensions"], str((width, height)) ) # Define Fibics metadata fields using FD # Note: factor=-1 is a sentinel value for "strip_units" conversion fibics_fields = [ # Application section FD( "Application", "Version", ["Application", "Software Version"], 1, False ), # String FD( "Application", "Date", ["Application", "Acquisition Date/Time"], 1, False, ), # String FD( "Application", "SupportsTransparency", ["Application", "Supports Transparency"], 1, False, ), # String FD( "Application", "TransparentPixelValue", ["Application", "Transparent Pixel Value"], 1, False, ), # Dimensionless # Image section FD( "Image", "Width", ["Image", "Width"], 1, False ), # Dimensionless (pixels) FD( "Image", "Height", ["Image", "Height"], 1, False ), # Dimensionless (pixels) FD( "Image", "BoundingBox.Left", ["Image", "Bounding Box Left"], 1, False ), # Dimensionless FD( "Image", "BoundingBox.Right", ["Image", "Bounding Box Right"], 1, False ), # Dimensionless FD( "Image", "BoundingBox.Top", ["Image", "Bounding Box Top"], 1, False ), # Dimensionless FD( "Image", "BoundingBox.Bottom", ["Image", "Bounding Box Bottom"], 1, False, ), # Dimensionless FD("Image", "Machine", ["Image", "Machine Name"], 1, False), # String FD("Image", "Beam", ["Image", "Beam Type"], 1, False), # String FD( "Image", "Aperture", ["Image", "Aperture Description"], 1, False ), # String FD("Image", "Detector", ["Detector", "Name"], 1, False), # String FD( "Image", "Contrast", ["Detector", "Contrast"], 1, False ), # Dimensionless FD( "Image", "Brightness", ["Detector", "Brightness"], 1, False ), # Dimensionless # Scan section FD( "Scan", "Dwell", ["dwell_time"], 1e-3, False, target_unit="microsecond", ), # Convert ns to μs FD( "Scan", "LineAvg", ["Scan", "Line Averaging"], 1, False ), # Dimensionless FD( "Scan", "FOV_X", ["horizontal_field_width"], 1, False, target_unit="micrometer", ), FD( "Scan", "FOV_Y", ["vertical_field_width"], 1, False, target_unit="micrometer", ), FD( "Scan", "ScanRot", ["scan_rotation"], 1, False, target_unit="degree", ), FD("Scan", "Ux", ["Scan", "Affine Ux"], 1, False), # Dimensionless FD("Scan", "Uy", ["Scan", "Affine Uy"], 1, False), # Dimensionless FD("Scan", "Vx", ["Scan", "Affine Vx"], 1, False), # Dimensionless FD("Scan", "Vy", ["Scan", "Affine Vy"], 1, False), # Dimensionless FD("Scan", "Focus", ["Scan", "Focus Value"], 1, False), # Dimensionless FD( "Scan", "StigX", ["Scan", "Stigmator X Value"], 1, False ), # Dimensionless FD( "Scan", "StigY", ["Scan", "Stigmator Y Value"], 1, False ), # Dimensionless # Stage section FD( "Stage", "X", ["Stage Position", "X"], 1, False, target_unit="micrometer", ), FD( "Stage", "Y", ["Stage Position", "Y"], 1, False, target_unit="micrometer", ), FD( "Stage", "Z", ["Stage Position", "Z"], 1, False, target_unit="micrometer", ), FD( "Stage", "Tilt", ["Stage Position", "Tilt"], 1, False, target_unit="degree", ), FD( "Stage", "Rot", ["Stage Position", "Rotation"], 1, False, target_unit="degree", ), FD( "Stage", "M", ["Stage Position", "M"], 1, False, target_unit="millimeter", ), # BeamInfo section FD( "BeamInfo", "BeamI", ["beam_current"], 1, False, target_unit="picoampere", ), FD( "BeamInfo", "AccV", ["acceleration_voltage"], 1e-3, False, target_unit="kilovolt", ), FD("BeamInfo", "Aperture", ["Beam", "Aperture"], 1, False), # Dimensionless FD("BeamInfo", "GFISGas", ["Beam", "GFIS Gas Type"], 1, False), # String FD( "BeamInfo", "GunGasPressure", ["Beam", "Gun Gas Pressure"], 1, False ), # Dimensionless (or unknown unit) FD( "BeamInfo", "SpotControl", ["Beam", "Spot Control"], 1, False ), # Dimensionless # DetectorInfo section - using -1 as sentinel for "strip_units" FD( "DetectorInfo", "Collector", ["Detector", "Collector Voltage"], -1, False, target_unit="volt", ), FD( "DetectorInfo", "Stage Bias", ["Detector", "Stage Bias Voltage"], -1, False, target_unit="volt", ), ] # Extract fields from each section for field in fibics_fields: section = self._find_fibics_section(root, field.section) if section is not None: # Use -1 as sentinel for "strip_units" conversion conversion_factor = ( "strip_units" if field.factor == -1 else field.factor ) value = self._parse_fibics_value( section, field.source_key, conversion_factor, field.target_unit ) if value is not None: set_nested_dict_value( mdict, ["nx_meta", field.output_key] if isinstance(field.output_key, str) else ["nx_meta", *field.output_key], value, ) return mdict def _parse_zeiss_field( # noqa: PLR0913 self, root: ET.Element, field_path: str, output_key: str | list, mdict: dict, conversion_factor: float = 1.0, unit: str | None = None, ) -> None: """ Parse a field from Zeiss XML and set it in the metadata dictionary. Parameters ---------- root XML root element field_path Path to the field. Can be a simple tag name (e.g., "AccelerationVoltage"), a tag name with dots (e.g., "GFIS.AccelerationVoltage"), or a nested path (e.g., "System.Name"). First tries to find as a direct tag name, then falls back to nested navigation. output_key Key path in nx_meta (e.g., "Voltage" or ["Stage Position", "X"]) mdict Metadata dictionary to update conversion_factor Factor to multiply the value by for unit conversion unit Unit name for Pint Quantity. If None, stores as numeric or string value. """ try: # First try to find as a direct tag # (handles dotted names like "GFIS.AccelerationVoltage") current = root.find(field_path) # If not found as direct tag, try nested path navigation if current is None: parts = field_path.split(".") current = root for part in parts: found = False for child in current: if child.tag == part: current = child found = True break if not found: return # Get value and units value = current.find("Value") # if we want to eventually handle units, this is how we extract them # units = current.find("Units") # noqa: ERA001 if value is not None and value.text: try: numeric_value = Decimal(value.text) * Decimal( str(conversion_factor) ) # Create Pint Quantity if unit is specified if unit is not None: final_value = ureg.Quantity(numeric_value, unit) else: final_value = float(numeric_value) set_nested_dict_value( mdict, ["nx_meta", output_key] if isinstance(output_key, str) else ["nx_meta", *output_key], final_value, ) except (ValueError, TypeError, Exception): # If conversion fails, store as string set_nested_dict_value( mdict, ["nx_meta", output_key] if isinstance(output_key, str) else ["nx_meta", *output_key], value.text, ) except Exception as e: # Log parsing errors for individual fields _logger.debug( "Error parsing Zeiss field %s: %s", field_path, e, exc_info=True ) def _find_fibics_section( self, root: ET.Element, section_name: str ) -> ET.Element | None: """ Find a section in Fibics XML. Parameters ---------- root XML root element section_name Name of section to find (e.g., "BeamInfo", "Scan") Returns ------- ET.Element | None Section element if found, None otherwise """ try: for child in root: if child.tag == section_name: return child except Exception: return None return None def _parse_fibics_value( # noqa: PLR0911 self, section: ET.Element, field_name: str, conversion_factor: float | str = 1.0, unit: str | None = None, ) -> float | str | None: """ Parse a value from a Fibics XML section. Parameters ---------- section XML section element field_name Name of field to parse. First tries to find an element with this tag name. If not found, searches for an "item" element with a "name" attribute matching field_name. conversion_factor Factor to multiply the value by for unit conversion, or "strip_units" to remove unit suffixes (e.g., "=500.0 V" becomes 500.0) unit Unit name for Pint Quantity. If None, returns numeric or string value. Returns ------- Quantity | float | str | None Parsed value (as Quantity if unit specified), or None if not found or parsing failed """ try: # First try to find field as direct element field = section.find(field_name) # If not found, try to find an "item" element with matching "name" attribute if field is None: for item in section.findall("item"): if item.get("name") == field_name: field = item break if field is not None and field.text: text = field.text.strip() # Special handling for stripping unit suffixes # (e.g., "=500.0 V" -> "500.0") if conversion_factor == "strip_units": # Remove leading symbols like "=" and trailing units like " V" text = text.lstrip("=").strip() # Try to extract numeric value before unit suffix parts = text.split() if parts: text = parts[0] try: numeric_value = Decimal(text) # Create Pint Quantity if unit is specified if unit is not None: return ureg.Quantity(numeric_value, unit) return float(numeric_value) except (ValueError, Exception): # If conversion fails, return the raw string value return text try: numeric_value = Decimal(text) * Decimal(str(conversion_factor)) # type: ignore[operator] # Create Pint Quantity if unit is specified if unit is not None: return ureg.Quantity(numeric_value, unit) return float(numeric_value) except (ValueError, Exception): # If conversion fails, return the raw string value return text except Exception: return None return None def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: """ Migrate metadata to schema-compliant format. Reorganizes metadata to conform to type-specific Pydantic schemas: - Extracts core EM Glossary fields to top level with standardized names - Moves vendor-specific nested dictionaries to extensions section - Preserves existing extensions from instrument profiles Parameters ---------- mdict Metadata dictionary with nx_meta containing extracted fields Returns ------- dict Metadata dictionary with schema-compliant nx_meta structure """ nx_meta = mdict.get("nx_meta", {}) # Preserve existing extensions from instrument profiles extensions = ( nx_meta.get("extensions", {}).copy() if "extensions" in nx_meta else {} ) # Field mappings from display names to EM Glossary names field_mappings = { "Acceleration Voltage": "acceleration_voltage", "Working Distance": "working_distance", "Beam Current": "beam_current", "Emission Current": "emission_current", "Dwell Time": "dwell_time", "Field of View": "horizontal_field_width", "Pixel Width": "pixel_width", "Pixel Height": "pixel_height", } # Get all EM Glossary field names from the metadata schema # These should remain at top level (not moved to extensions) emg_field_names = set(em_glossary.get_all_mapped_fields()) # Zeiss/Fibics-specific vendor sections that ALWAYS go to extensions extension_top_level_keys = { "Beam", "GFIS", "Detector", "Stage Position", "Image", "Display", "Flood Gun", "Calibration", "System", "Application", "Sample", "Scan", "ScanSettings", "Optics", "Zeiss", "Fibics", } # Build new nx_meta with proper field organization new_nx_meta = {} # Copy required fields for field in ["DatasetType", "Data Type", "Creation Time"]: if field in nx_meta: new_nx_meta[field] = nx_meta[field] # Copy instrument identification if "Instrument ID" in nx_meta: new_nx_meta["Instrument ID"] = nx_meta["Instrument ID"] # Process all fields and categorize for old_name, value in nx_meta.items(): # Skip fields we've already handled if old_name in [ "DatasetType", "Data Type", "Creation Time", "Instrument ID", "Extractor Warnings", "warnings", "extensions", ]: continue # Top-level vendor sections go to extensions if old_name in extension_top_level_keys: extensions[old_name] = value continue # Check if this is a core field that needs renaming if old_name in field_mappings: emg_name = field_mappings[old_name] new_nx_meta[emg_name] = value continue # Keep EM Glossary fields at top level (already using correct names) if old_name in emg_field_names: new_nx_meta[old_name] = value continue # Everything else goes to extensions (vendor-specific by default) # This is safer than the top level where schema validation will reject extensions[old_name] = value # Copy warnings if present if "warnings" in nx_meta: new_nx_meta["warnings"] = nx_meta["warnings"] # Copy Extractor Warnings if present # (will be moved to NexusLIMS Extraction by add_extraction_details) if "Extractor Warnings" in nx_meta: new_nx_meta["Extractor Warnings"] = nx_meta["Extractor Warnings"] # Add extensions section if we have any for key, value in extensions.items(): add_to_extensions(new_nx_meta, key, value) mdict["nx_meta"] = new_nx_meta return mdict