Source code for nexusLIMS.extractors.plugins.tescan_tif

# ruff: noqa: N817, FBT003
"""Tescan (P)FIB/SEM TIFF extractor plugin."""

import configparser
import contextlib
import io
import logging
from decimal import Decimal
from pathlib import Path
from typing import Any, ClassVar

from PIL import Image

from nexusLIMS.extractors.base import ExtractionContext
from nexusLIMS.extractors.base import FieldDefinition as FD
from nexusLIMS.extractors.utils import _set_instr_name_and_time, add_to_extensions
from nexusLIMS.schemas.units import ureg
from nexusLIMS.utils.dicts import set_nested_dict_value, sort_dict

TESCAN_TIFF_TAG = 50431
"""
TIFF tag ID where Tescan stores INI-style metadata in TIFF files.
The tag contains holds instrument configuration, beam parameters, stage position,
detector settings, and other acquisition metadata.
"""

_MAX_ASCII_VALUE = 128
"""Maximum value for ASCII characters. Used to filter non-ASCII binary data."""

_logger = logging.getLogger(__name__)


[docs] class TescanTiffExtractor: """ Extractor for Tescan FIB/SEM TIFF files. This extractor handles metadata extraction from .tif files saved by Tescan FIB and SEM instruments (e.g., AMBER X). The extractor uses a two-tier strategy: 1. Primary: Look for sidecar .hdr file with full metadata in INI format 2. Fallback: Extract basic metadata from TIFF tags if no .hdr file exists The .hdr file contains comprehensive acquisition parameters in two sections: [MAIN] and [SEM], which are parsed using Python's configparser. """ name = "tescan_tif_extractor" priority = 150 supported_extensions: ClassVar = {"tif", "tiff"}
[docs] def supports(self, context: ExtractionContext) -> bool: """ Check if this extractor supports the given file. Performs content sniffing to verify this is a Tescan TIFF file by: 1. Checking file extension (.tif or .tiff) 2. Looking for either a sidecar .hdr file or Tescan-specific TIFF tags Parameters ---------- context The extraction context containing file information Returns ------- bool True if this appears to be a Tescan TIFF file """ extension = context.file_path.suffix.lower().lstrip(".") if extension not in {"tif", "tiff"}: return False # Check for sidecar HDR file hdr_file = self._find_hdr_file(context.file_path) if hdr_file is not None and self._is_tescan_hdr(hdr_file): return True # Fallback: check TIFF tags for Tescan signature try: with Image.open(context.file_path) as img: # Check for TESCAN in Make tag (271) or Software tag (305) make = img.tag_v2.get(271, "") software = img.tag_v2.get(305, "") if "TESCAN" in str(make).upper() or "TESCAN" in str(software).upper(): return True # check for custom Tescan metadata tag tescan_metadata = img.tag_v2.get(TESCAN_TIFF_TAG, "") if tescan_metadata != "": return True except Exception as e: _logger.debug( "Could not read TIFF tags from %s: %s", context.file_path, e, ) return False return False
[docs] def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: """ Extract metadata from a Tescan FIB/SEM TIFF file. Returns the metadata (as a list of dictionaries) from a .tif file saved by Tescan instruments. Uses a three-tier extraction strategy: 1. Try to parse embedded HDR metadata from TIFF Tag 50431 2. If that fails, look for a sidecar .hdr file 3. Always extract basic TIFF tags as well Parameters ---------- context The extraction context containing file information Returns ------- list[dict] List containing a single metadata dict with 'nx_meta' key """ filename = context.file_path _logger.debug("Extracting metadata from Tescan TIFF file: %s", filename) mdict = {"nx_meta": {}} # Assume all datasets coming from Tescan are SEM Images, originally mdict["nx_meta"]["DatasetType"] = "Image" mdict["nx_meta"]["Data Type"] = "SEM_Imaging" _set_instr_name_and_time(mdict, filename) hdr_parsed = False # Strategy 1: Try to parse embedded HDR metadata from TIFF tag 50431 try: embedded_metadata = self._extract_embedded_hdr(filename) if embedded_metadata: mdict.update(embedded_metadata) mdict = self._parse_nx_meta(mdict) hdr_parsed = True _logger.debug("Successfully parsed embedded HDR from TIFF tag") except Exception as e: _logger.debug("Could not parse embedded HDR metadata: %s", e) # Strategy 2: If embedded parsing failed, try sidecar HDR file if not hdr_parsed: hdr_file = self._find_hdr_file(filename) if hdr_file is not None and self._is_tescan_hdr(hdr_file): try: hdr_metadata = self._read_hdr_metadata(hdr_file) mdict.update(hdr_metadata) mdict = self._parse_nx_meta(mdict) hdr_parsed = True _logger.debug("Successfully parsed sidecar HDR file") except Exception as e: _logger.warning( "Failed to parse HDR file %s: %s", hdr_file, e, ) # Strategy 3: Always extract basic TIFF tags (may supplement or override) self._extract_from_tiff_tags(filename, mdict) # Migrate metadata to schema-compliant format mdict = self._migrate_to_schema_compliant_metadata(mdict) # Sort the nx_meta dictionary (recursively) for nicer display mdict["nx_meta"] = sort_dict(mdict["nx_meta"]) return [mdict]
def _find_hdr_file(self, tiff_path: Path) -> Path | None: """ Find the sidecar .hdr file for a given TIFF file. Parameters ---------- tiff_path Path to the TIFF file Returns ------- Path or None Path to the .hdr file if it exists, None otherwise """ hdr_path = tiff_path.with_suffix(".hdr") if hdr_path.exists(): return hdr_path return None def _is_tescan_hdr(self, hdr_path: Path) -> bool: """ Verify that an HDR file is a Tescan format file. Checks for the presence of [MAIN] and [SEM] sections which are characteristic of Tescan HDR files. Parameters ---------- hdr_path Path to the .hdr file Returns ------- bool True if this appears to be a Tescan HDR file """ try: with hdr_path.open("r", encoding="utf-8", errors="ignore") as f: content = f.read(500) # Read first 500 chars # Look for characteristic Tescan sections return "[MAIN]" in content or "Device=TESCAN" in content except Exception as e: _logger.debug("Could not verify HDR file %s: %s", hdr_path, e) return False def _extract_embedded_hdr( self, tiff_path: Path ) -> dict[str, dict[str, str]] | None: """ Extract embedded HDR metadata from TIFF Tag TESCAN_TIFF_TAG. Tescan embeds the complete HDR metadata in TIFF tag TESCAN_TIFF_TAG as a binary blob containing the INI-formatted text. The tag may contain binary garbage at the beginning before the actual metadata starts. Parameters ---------- tiff_path Path to the TIFF file Returns ------- dict or None Dictionary with section names as keys and key-value dicts as values, or None if tag is not present or cannot be parsed """ try: with Image.open(tiff_path) as img: metadata_tag = img.tag_v2.get(TESCAN_TIFF_TAG) if metadata_tag is None: return None # Convert tag to bytes metadata_bytes = self._tag_to_bytes(metadata_tag) # Extract metadata string from binary data metadata_str = self._extract_metadata_string(metadata_bytes) # Clean up non-printable characters metadata_str = self._clean_metadata_string(metadata_str) # Add section headers if missing metadata_str = self._add_section_headers_if_needed(metadata_str) # Parse as INI format return self._parse_hdr_string(metadata_str) except Exception as e: _logger.debug("Failed to extract embedded HDR from tag 50431: %s", e) return None def _tag_to_bytes(self, metadata_tag: Any) -> bytes: """Convert TIFF tag data to bytes. Parameters ---------- metadata_tag Tag data in various formats (bytes, str, etc.) Returns ------- bytes Converted bytes Raises ------ TypeError If tag data is not bytes or str """ if isinstance(metadata_tag, bytes): return metadata_tag if isinstance(metadata_tag, str): return metadata_tag.encode("utf-8") msg = f"Unsupported metadata tag type: {type(metadata_tag)}" raise TypeError(msg) def _extract_metadata_string(self, metadata_bytes: bytes) -> str: """Extract metadata string from binary data by removing garbage. The tag may contain binary garbage at the beginning. This method looks for known keys to find the start of actual metadata. Parameters ---------- metadata_bytes Raw binary metadata from TIFF tag Returns ------- str Cleaned metadata string """ # Look for the start of metadata by searching for known keys search_keys = [b"[MAIN]", b"AccFrames=", b"AccType=", b"Company=", b"Date="] for search_key in search_keys: pos = metadata_bytes.find(search_key) if pos >= 0: metadata_bytes = metadata_bytes[pos:] return metadata_bytes.replace(b"\x00", b"").decode( "utf-8", errors="ignore" ) # Fallback: decode whole thing return metadata_bytes.replace(b"\x00", b"").decode("utf-8", errors="ignore") def _clean_metadata_string(self, metadata_str: str) -> str: """Remove non-printable binary characters from metadata string. Parameters ---------- metadata_str Metadata string that may contain non-printable characters Returns ------- str Cleaned metadata string """ return "".join( c for c in metadata_str if ord(c) < _MAX_ASCII_VALUE and (c.isprintable() or c in "\n\r\t") ) def _add_section_headers_if_needed(self, metadata_str: str) -> str: """Add [MAIN] and [SEM] section headers if missing. Tescan's embedded metadata doesn't include section headers, so this method detects where the SEM section starts and inserts headers. Parameters ---------- metadata_str Metadata string potentially without section headers Returns ------- str Metadata string with section headers """ if "[MAIN]" in metadata_str or "[SEM]" in metadata_str: return metadata_str # Find where SEM section starts by looking for known SEM keys sem_keys = [ "AcceleratorVoltage=", "ApertureDiameter=", "ApertureOptimization=", "ChamberPressure=", "CrossFree=", "HV=", ] sem_start_pos = self._find_sem_section_start(metadata_str, sem_keys) # Insert section headers at line boundaries if sem_start_pos < len(metadata_str): line_start = metadata_str.rfind("\n", 0, sem_start_pos) if line_start < 0: line_start = 0 else: line_start += 1 # Move past the \n return ( "[MAIN]\n" + metadata_str[:line_start] + "[SEM]\n" + metadata_str[line_start:] ) # No SEM section found return "[MAIN]\n" + metadata_str def _find_sem_section_start(self, metadata_str: str, sem_keys: list[str]) -> int: """Find the position where SEM section starts. Parameters ---------- metadata_str Metadata string to search sem_keys List of keys that typically appear in SEM section Returns ------- int Position of first SEM key, or length of string if not found """ sem_start_pos = len(metadata_str) for sem_key in sem_keys: pos = metadata_str.find(sem_key) if pos >= 0 and pos < sem_start_pos: sem_start_pos = pos return sem_start_pos def _parse_hdr_string(self, hdr_string: str) -> dict[str, dict[str, str]]: """ Parse HDR metadata from a string in INI format. Parameters ---------- hdr_string HDR metadata as a string in INI format Returns ------- dict Dictionary with section names as keys and key-value dicts as values """ # Normalize line endings hdr_string = hdr_string.replace("\r\n", "\n").replace("\r", "\n") # Parse with ConfigParser config = configparser.ConfigParser() # Make ConfigParser respect upper/lowercase values config.optionxform = lambda option: option # Use StringIO to read from string buf = io.StringIO(hdr_string) config.read_file(buf) metadata = {} for section in config.sections(): metadata[section] = dict(config.items(section)) return metadata def _read_hdr_metadata(self, hdr_path: Path) -> dict[str, dict[str, str]]: """ Read and parse a Tescan .hdr file. The .hdr file is in INI format with sections like [MAIN] and [SEM]. Parameters ---------- hdr_path Path to the .hdr file Returns ------- dict Dictionary with section names as keys and key-value dicts as values """ with hdr_path.open("r", encoding="utf-8", errors="ignore") as f: hdr_string = f.read() return self._parse_hdr_string(hdr_string) def _extract_from_tiff_tags(self, filename: Path, mdict: dict) -> None: """ Extract basic metadata from TIFF tags. This supplements metadata from HDR files with standard TIFF tags. Only adds fields that haven't already been set by HDR parsing. Updates mdict in place. Parameters ---------- filename Path to the TIFF file mdict Metadata dictionary to update """ try: with Image.open(filename) as img: # Extract standard TIFF tags # 271 = Make # 272 = Model # 305 = Software # 306 = DateTime # 315 = Artist (username) # Only add Make if not already present if "Make" not in mdict["nx_meta"]: make = img.tag_v2.get(271) if make: mdict["nx_meta"]["Make"] = make # Only add Model if not already present if "Model" not in mdict["nx_meta"]: model = img.tag_v2.get(272) if model: mdict["nx_meta"]["Model"] = model # Only add Software Version if not already present if "Software Version" not in mdict["nx_meta"]: software = img.tag_v2.get(305) if software: mdict["nx_meta"]["Software Version"] = software # Always add TIFF DateTime as supplemental info datetime_str = img.tag_v2.get(306) if datetime_str: mdict["nx_meta"]["TIFF DateTime"] = datetime_str # Only add Operator from Artist tag if not already present if "Operator" not in mdict["nx_meta"]: artist = img.tag_v2.get(315) if artist: mdict["nx_meta"]["Operator"] = artist # Only add dimensions if not already present if "Data Dimensions" not in mdict["nx_meta"]: width = img.tag_v2.get(256) # ImageWidth height = img.tag_v2.get(257) # ImageLength if width and height: mdict["nx_meta"]["Data Dimensions"] = str((width, height)) except Exception as e: _logger.warning("Failed to extract TIFF tags from %s: %s", filename, e) mdict["nx_meta"]["Extractor Warnings"] = f"Failed to extract TIFF tags: {e}" def _get_field_definitions(self) -> list: """ Get field definitions for metadata extraction. Returns ------- list List of FieldDefinition tuples """ return [ # [MAIN] section - in order as they appear in HDR file FD("MAIN", "AccFrames", "Accumulated Frames", 1, False), FD("MAIN", "AccType", "Accumulation Type", 1, True), FD("MAIN", "Company", "Company", 1, True), FD("MAIN", "Date", "Acquisition Date", 1, True), FD("MAIN", "Description", "Description", 1, True), FD("MAIN", "Device", "Device", 1, True), FD("MAIN", "DeviceModel", "Device Model", 1, True), FD("MAIN", "FullUserName", "Full User Name", 1, True), FD("MAIN", "ImageStripSize", "Image Strip Size", 1, False), FD( "MAIN", "Magnification", "Magnification", 1e-3, False, target_unit="kiloX", ), FD("MAIN", "MagnificationReference", "Magnification Reference", 1, False), FD("MAIN", "OrigFileName", "Original Filename", 1, True), FD( "MAIN", "PixelSizeX", "Pixel Width", 1e9, False, target_unit="nanometer" ), FD( "MAIN", "PixelSizeY", "Pixel Height", 1e9, False, target_unit="nanometer", ), FD("MAIN", "SerialNumber", "Serial Number", 1, True), FD("MAIN", "Sign", "Sign", 1, True), FD("MAIN", "SoftwareVersion", "Software Version", 1, True), FD("MAIN", "Time", "Acquisition Time", 1, True), FD("MAIN", "UserName", "User Name", 1, True), FD("MAIN", "ViewFieldsCountX", "View Fields Count X", 1, False), FD("MAIN", "ViewFieldsCountY", "View Fields Count Y", 1, False), # [SEM] section - in order as they appear in HDR file FD( "SEM", "AcceleratorVoltage", "Accelerator Voltage", 1e-3, False, target_unit="kilovolt", ), FD( "SEM", "ApertureDiameter", "Aperture Diameter", 1e6, False, target_unit="micrometer", ), FD("SEM", "ApertureOptimization", "Aperture Optimization", 1, False), FD( "SEM", "ChamberPressure", "Chamber Pressure", 1e3, False, target_unit="millipascal", ), FD("SEM", "CrossFree", "Cross Free", 1, False), FD( "SEM", "CrossSectionShiftX", "Cross Section Shift X", 1e6, False, target_unit="micrometer", ), FD( "SEM", "CrossSectionShiftY", "Cross Section Shift Y", 1e6, False, target_unit="micrometer", ), FD( "SEM", "DepthOfFocus", "Depth of Focus", 1e6, False, target_unit="micrometer", ), FD("SEM", "Detector", "Detector Name", 1, True), FD("SEM", "Detector0", "Detector 0", 1, True), FD("SEM", "Detector0FlatField", "Detector 0 Flat Field", 1, False), FD("SEM", "Detector0Gain", "Detector 0 Gain", 1, False), FD("SEM", "Detector0Offset", "Detector 0 Offset", 1, False), FD( "SEM", "DwellTime", "Pixel Dwell Time", 1e6, False, target_unit="microsecond", ), FD( "SEM", "EmissionCurrent", "Emission Current", 1e6, False, target_unit="microampere", ), FD("SEM", "Gun", "Gun Type", 1, True), FD("SEM", "GunShiftX", "Gun Shift X", 1, False), FD("SEM", "GunShiftY", "Gun Shift Y", 1, False), FD("SEM", "GunTiltX", "Gun Tilt X", 1, False), FD("SEM", "GunTiltY", "Gun Tilt Y", 1, False), FD("SEM", "HV", "HV Voltage", 1e-3, False, target_unit="kilovolt"), FD("SEM", "IMLCenteringX", "IML Centering X", 1, False), FD("SEM", "IMLCenteringY", "IML Centering Y", 1, False), FD( "SEM", "ImageShiftX", "Image Shift X", 1e9, False, target_unit="nanometer", ), FD( "SEM", "ImageShiftY", "Image Shift Y", 1e9, False, target_unit="nanometer", ), FD("SEM", "InjectedGas", "Injected Gas", 1, True), FD("SEM", "LUTGamma", "LUT Gamma", 1, False), FD("SEM", "LUTMaximum", "LUT Maximum", 1, False), FD("SEM", "LUTMinimum", "LUT Minimum", 1, False), FD("SEM", "MTDGrid", "MTD Grid", 1e-3, False, target_unit="kilovolt"), FD( "SEM", "MTDScintillator", "MTD Scintillator", 1e-3, False, target_unit="kilovolt", ), FD("SEM", "OBJCenteringX", "OBJ Centering X", 1, False), FD("SEM", "OBJCenteringY", "OBJ Centering Y", 1, False), FD("SEM", "OBJPreCenteringX", "OBJ Pre-Centering X", 1, False), FD("SEM", "OBJPreCenteringY", "OBJ Pre-Centering Y", 1, False), FD("SEM", "PotentialMode", "Potential Mode", 1, True), FD( "SEM", "PredictedBeamCurrent", "Predicted Beam Current", 1e12, False, target_unit="picoampere", ), FD("SEM", "PrimaryDetectorGain", "Primary Detector Gain", 1, False), FD("SEM", "PrimaryDetectorOffset", "Primary Detector Offset", 1, False), FD("SEM", "SampleVoltage", "Sample Voltage", 1, False, target_unit="volt"), FD("SEM", "ScanID", "Scan ID", 1, False), FD("SEM", "ScanMode", "Scan Mode", 1, True), FD("SEM", "ScanRotation", "Scan Rotation", 1, False, target_unit="degree"), FD("SEM", "ScanSpeed", "Scan Speed", 1, False), FD("SEM", "SessionID", "Session ID", 1, True), FD( "SEM", "SpecimenCurrent", "Specimen Current", 1e12, False, target_unit="picoampere", ), FD("SEM", "SpotSize", "Spot Size", 1e9, False, target_unit="nanometer"), FD( "SEM", "StageRotation", ["Stage Position", "Rotation"], 1, False, target_unit="degree", ), FD( "SEM", "StageTilt", ["Stage Position", "Tilt"], 1, False, target_unit="degree", ), FD("SEM", "StageX", ["Stage Position", "X"], 1, False, target_unit="meter"), FD("SEM", "StageY", ["Stage Position", "Y"], 1, False, target_unit="meter"), FD("SEM", "StageZ", ["Stage Position", "Z"], 1, False, target_unit="meter"), FD("SEM", "StigmatorX", "Stigmator X Value", 1, False), FD("SEM", "StigmatorY", "Stigmator Y Value", 1, False), FD( "SEM", "SymmetrizationVoltage", "Symmetrization Voltage", 1e-3, False, target_unit="kilovolt", ), FD("SEM", "SyncMains", "Sync to Mains", 1, True), FD("SEM", "TiltCorrection", "Tilt Correction", 1, False), FD( "SEM", "TubeVoltage", "Tube Voltage", 1e-3, False, target_unit="kilovolt", ), FD( "SEM", "VirtualObserverDistance", "Virtual Observer Distance", 1e3, False, target_unit="millimeter", ), FD("SEM", "WD", "Working Distance", 1e3, False, target_unit="millimeter"), ] def _parse_nx_meta(self, mdict: dict) -> dict: # noqa: PLR0912 """ Parse metadata into NexusLIMS format. Extracts important metadata from the [MAIN] and [SEM] sections of the HDR file and places them in standardized locations under the nx_meta key. Parameters ---------- mdict Metadata dictionary with [MAIN] and [SEM] sections Returns ------- dict Updated metadata dictionary with parsed nx_meta fields """ # Initialize warnings list if "warnings" not in mdict["nx_meta"]: mdict["nx_meta"]["warnings"] = [] main_section = mdict.get("MAIN", {}) sem_section = mdict.get("SEM", {}) # Get field definitions fields = self._get_field_definitions() # Extract standard fields for field in fields: section = main_section if field.section == "MAIN" else sem_section value = section.get(field.source_key) # Try fallback keys for some fields if value is None and field.source_key == "HV": value = sem_section.get("AcceleratorVoltage") elif value is None and field.source_key == "Detector0Gain": value = sem_section.get("PrimaryDetectorGain") elif value is None and field.source_key == "Detector0Offset": value = sem_section.get("PrimaryDetectorOffset") if value: if field.is_string: # Handle nested dict paths vs flat keys # (impossible to test with existing metadata structure, # so exclude from coverage) if isinstance(field.output_key, list): # pragma: no cover set_nested_dict_value( mdict, ["nx_meta", *field.output_key], value ) else: mdict["nx_meta"][field.output_key] = value else: with contextlib.suppress(ValueError): # Convert to Decimal to preserve precision through unit # conversions. The ureg uses non_int_type=Decimal to avoid # floating-point errors during internal conversions. # Also apply scaling factor for unit conversion decimal_val = Decimal(value) * Decimal(str(field.factor)) # Skip if suppress_zero is True and value is zero if field.suppress_zero and decimal_val == 0: continue # Create Pint Quantity if unit is specified if field.target_unit: # Create Quantity with the value after factor conversion quantity = ureg.Quantity(decimal_val, field.target_unit) if isinstance(field.output_key, list): set_nested_dict_value( mdict, ["nx_meta", *field.output_key], quantity ) else: mdict["nx_meta"][field.output_key] = quantity # No unit specified, keep as Decimal for precision elif isinstance(field.output_key, list): set_nested_dict_value( mdict, ["nx_meta", *field.output_key], decimal_val ) else: mdict["nx_meta"][field.output_key] = decimal_val # Handle user information (prefer FullUserName over UserName) full_username = main_section.get("FullUserName") username = main_section.get("UserName") if full_username or username: mdict["nx_meta"]["Operator"] = full_username or username mdict["nx_meta"]["warnings"].append(["Operator"]) return mdict def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: """ Migrate metadata to schema-compliant format. Reorganizes metadata to conform to type-specific Pydantic schemas: - Extracts core EM Glossary fields to top level with standardized names - Moves vendor-specific nested dictionaries and fields to extensions section - Preserves existing extensions from instrument profiles Parameters ---------- mdict Metadata dictionary with nx_meta containing extracted fields Returns ------- dict Metadata dictionary with schema-compliant nx_meta structure """ nx_meta = mdict.get("nx_meta", {}) # Preserve existing extensions from instrument profiles extensions = ( nx_meta.get("extensions", {}).copy() if "extensions" in nx_meta else {} ) # Field mappings from display names to EM Glossary names field_mappings = { "HV Voltage": "acceleration_voltage", "Accelerator Voltage": "acceleration_voltage", "Working Distance": "working_distance", "Beam Current": "beam_current", "Emission Current": "emission_current", "Pixel Dwell Time": "dwell_time", "Horizontal Field Width": "horizontal_field_width", "Pixel Width": "pixel_width", "Pixel Height": "pixel_height", } # Tescan-specific fields that go to extensions (ALL non-core fields) # Since tescan extractor currently extracts many individual fields at top level, # we move them all to extensions except the core EM Glossary ones extension_field_names = { "Operator", # User info # Any other Tescan-specific fields we discover } # Build new nx_meta with proper field organization new_nx_meta = {} # Copy required fields for field in ["DatasetType", "Data Type", "Creation Time"]: if field in nx_meta: new_nx_meta[field] = nx_meta[field] # Copy instrument identification if "Instrument ID" in nx_meta: new_nx_meta["Instrument ID"] = nx_meta["Instrument ID"] # Process all fields and categorize for old_name, value in nx_meta.items(): # Skip fields we've already handled if old_name in [ "DatasetType", "Data Type", "Creation Time", "Instrument ID", "Extractor Warnings", "warnings", "extensions", ]: continue # Check if this is a core field that needs renaming if old_name in field_mappings: emg_name = field_mappings[old_name] new_nx_meta[emg_name] = value continue # Fields explicitly marked as extensions if old_name in extension_field_names: extensions[old_name] = value continue # Everything else goes to extensions (Tescan-specific fields) # This is the safest approach since most Tescan fields are vendor-specific extensions[old_name] = value # Copy warnings if present if "warnings" in nx_meta: new_nx_meta["warnings"] = nx_meta["warnings"] # Add extensions section if we have any for key, value in extensions.items(): add_to_extensions(new_nx_meta, key, value) mdict["nx_meta"] = new_nx_meta return mdict