Coverage for nexusLIMS/extractors/plugins/fei_emi.py: 100%
276 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""FEI TIA (.ser/.emi) extractor plugin."""
3import contextlib
4import logging
5from datetime import datetime as dt
6from pathlib import Path
7from typing import Any, ClassVar, List, Tuple
9import numpy as np
10from hyperspy.io import load as hs_load
11from hyperspy.signal import BaseSignal
13from nexusLIMS.db.models import Instrument
14from nexusLIMS.extractors.base import ExtractionContext
15from nexusLIMS.extractors.utils import add_to_extensions
16from nexusLIMS.instruments import get_instr_from_filepath
17from nexusLIMS.schemas.units import ureg
18from nexusLIMS.utils.dicts import (
19 set_nested_dict_value,
20 sort_dict,
21 try_getting_dict_value,
22)
23from nexusLIMS.utils.time import current_system_tz
25_logger = logging.getLogger(__name__)
28class SerEmiExtractor:
29 """
30 Extractor for FEI TIA series files (.ser with accompanying .emi).
32 This extractor handles metadata extraction from files saved by FEI's
33 (now Thermo Fisher Scientific) TIA (Tecnai Imaging and Analysis) software.
34 The .ser files contain the actual data, while .emi files contain metadata.
35 """
37 name = "ser_emi_extractor"
38 priority = 100
39 supported_extensions: ClassVar = {"ser"}
41 def supports(self, context: ExtractionContext) -> bool:
42 """
43 Check if this extractor supports the given file.
45 Parameters
46 ----------
47 context
48 The extraction context containing file information
50 Returns
51 -------
52 bool
53 True if file extension is .ser
54 """
55 extension = context.file_path.suffix.lower().lstrip(".")
56 return extension == "ser"
58 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: # noqa: PLR0915
59 """
60 Extract metadata from a .ser file and its accompanying .emi file.
62 Returns metadata (as a list of dicts) from an FEI .ser file +
63 its associated .emi files, with some non-relevant information stripped.
65 Parameters
66 ----------
67 context
68 The extraction context containing file information
70 Returns
71 -------
72 list[dict]
73 List containing a single metadata dict with 'nx_meta' key.
74 If files cannot be opened, at least basic metadata will be returned (
75 creation time, etc.)
76 """
77 filename = context.file_path
78 _logger.debug("Extracting metadata from SER/EMI file: %s", filename)
80 # ObjectInfo present in emi; ser_header_parameters present in .ser
81 # ObjectInfo should contain all the interesting metadata,
82 # while ser_header_parameters is mostly technical stuff not really of
83 # interest to anyone
84 warning, emi_filename, ser_error = None, None, False
86 # pylint: disable=broad-exception-caught
87 try:
88 emi_filename, ser_index = get_emi_from_ser(filename)
89 s, emi_loaded = _load_ser(emi_filename, ser_index)
91 except FileNotFoundError:
92 # if emi wasn't found, specifically mention that
93 warning = (
94 "NexusLIMS could not find a corresponding .emi metadata "
95 "file for this .ser file. Metadata extraction will be "
96 "limited."
97 )
98 _logger.warning(warning)
99 emi_loaded = False
100 emi_filename = None
102 except Exception:
103 # otherwise, HyperSpy could not load the .emi, so give generic warning
104 # that .emi could not be loaded for some reason:
105 warning = (
106 "The .emi metadata file associated with this "
107 ".ser file could not be opened by NexusLIMS. "
108 "Metadata extraction will be limited."
109 )
110 _logger.warning(warning)
111 emi_loaded = False
113 if not emi_loaded:
114 # pylint: disable=broad-exception-caught
116 # if we couldn't load the emi, lets at least open the .ser to pull
117 # out the ser_header_info
118 try:
119 s = hs_load(filename, only_valid_data=True, lazy=True)
120 except Exception:
121 warning = (
122 "The .ser file could not be opened (perhaps file is "
123 "corrupted?); Metadata extraction is not possible."
124 )
125 _logger.warning(warning)
126 # set s to an empty signal just so we can process some basic
127 # metadata using same syntax as if we had read it correctly
128 s = BaseSignal(np.zeros(1))
129 ser_error = True
131 metadata = s.original_metadata.as_dictionary()
132 metadata["nx_meta"] = {}
134 # if we've already encountered a warning, add that to the metadata,
135 if warning:
136 metadata["nx_meta"]["Extractor Warning"] = warning
137 # otherwise check to ensure we actually have some metadata read from .emi
138 elif "ObjectInfo" not in metadata or (
139 "ExperimentalConditions" not in metadata["ObjectInfo"]
140 and "ExperimentalDescription" not in metadata["ObjectInfo"]
141 ):
142 warning = (
143 "No experimental metadata was found in the "
144 "corresponding .emi file for this .ser. "
145 "Metadata extraction will be limited."
146 )
147 _logger.warning(warning)
148 metadata["nx_meta"]["Extractor Warning"] = warning
150 # if we successfully found the .emi file, add it to the metadata
151 if emi_filename:
152 try:
153 from nexusLIMS.config import settings # noqa: PLC0415
155 rel_emi_fname = str(emi_filename).replace(
156 str(settings.NX_INSTRUMENT_DATA_PATH) + "/", ""
157 )
158 except Exception:
159 rel_emi_fname = str(emi_filename)
160 metadata["nx_meta"]["emi Filename"] = rel_emi_fname
161 else:
162 metadata["nx_meta"]["emi Filename"] = None
164 # Get the instrument object associated with this file
165 instr = get_instr_from_filepath(filename)
167 # if we found the instrument, then store the name as string, else None
168 instr_name = instr.name if instr is not None else None
169 metadata["nx_meta"]["fname"] = filename
170 # get the modification time:
171 # Use instrument timezone if available, otherwise fall back to system timezone
172 mtime_naive_dt = dt.fromtimestamp(filename.stat().st_mtime) # noqa: DTZ006
173 tz = instr.timezone if instr is not None else None
174 tz = tz if tz is not None else current_system_tz()
175 mtime_aware_dt = tz.localize(mtime_naive_dt)
176 metadata["nx_meta"]["Creation Time"] = mtime_aware_dt.isoformat()
177 metadata["nx_meta"]["Instrument ID"] = instr_name
179 # we could not read the signal, so add some basic metadata and return
180 if ser_error:
181 metadata = _handle_ser_error_metadata(metadata)
182 # Migrate to schema-compliant format (move vendor meta to extensions)
183 metadata = self._migrate_to_schema_compliant_metadata(metadata)
184 return [metadata]
186 metadata = parse_basic_info(metadata, s.data.shape, instr)
187 metadata = parse_acquire_info(metadata)
188 metadata = parse_experimental_conditions(metadata)
189 metadata = parse_experimental_description(metadata)
191 (
192 metadata["nx_meta"]["Data Type"],
193 metadata["nx_meta"]["DatasetType"],
194 ) = parse_data_type(s, metadata)
196 # we don't need to save the filename, it's just for internal processing
197 del metadata["nx_meta"]["fname"]
199 # Migrate metadata to schema-compliant format
200 metadata = self._migrate_to_schema_compliant_metadata(metadata)
202 # sort the nx_meta dictionary (recursively) for nicer display
203 metadata["nx_meta"] = sort_dict(metadata["nx_meta"])
205 return [metadata]
207 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict:
208 """
209 Migrate metadata to schema-compliant format.
211 Reorganizes metadata to conform to type-specific Pydantic schemas:
212 - Extracts core EM Glossary fields to top level with standardized names
213 - Moves vendor-specific nested dictionaries to extensions section
214 - Preserves existing extensions from instrument profiles
216 Parameters
217 ----------
218 mdict
219 Metadata dictionary with nx_meta containing extracted fields
221 Returns
222 -------
223 dict
224 Metadata dictionary with schema-compliant nx_meta structure
225 """
226 nx_meta = mdict.get("nx_meta", {})
227 dataset_type = nx_meta.get("DatasetType", "Image")
229 # Preserve existing extensions from instrument profiles
230 extensions = (
231 nx_meta.get("extensions", {}).copy() if "extensions" in nx_meta else {}
232 )
234 # Field mappings from display names to EM Glossary names
235 field_mappings = {
236 "AccelerationVoltage": "acceleration_voltage",
237 "Convergence Angle": "convergence_angle",
238 "Acquisition Device": "acquisition_device",
239 }
241 # Camera Length is only core for Diffraction datasets
242 if dataset_type == "Diffraction":
243 field_mappings["Camera Length"] = "camera_length"
245 # FEI TIA-specific top-level sections that go to extensions
246 extension_top_level_keys = {
247 "ObjectInfo", # Main FEI metadata section
248 "ser_header_parameters", # SER file header
249 }
251 # Individual vendor-specific fields to move to extensions
252 extension_field_names = {
253 "emi Filename",
254 "Extractor Warning",
255 # Any other FEI-specific fields
256 }
258 # Build new nx_meta with proper field organization
259 new_nx_meta = {}
261 # Copy required fields
262 for field in ["DatasetType", "Data Type", "Creation Time", "Data Dimensions"]:
263 if field in nx_meta:
264 new_nx_meta[field] = nx_meta[field]
266 # Copy instrument identification
267 if "Instrument ID" in nx_meta:
268 new_nx_meta["Instrument ID"] = nx_meta["Instrument ID"]
270 # Process all fields and categorize
271 for old_name, value in nx_meta.items():
272 # Skip fields we've already handled
273 if old_name in [
274 "DatasetType",
275 "Data Type",
276 "Creation Time",
277 "Data Dimensions",
278 "Instrument ID",
279 "Extractor Warnings",
280 "warnings",
281 "extensions",
282 ]:
283 continue
285 # Top-level vendor sections go to extensions
286 if old_name in extension_top_level_keys:
287 extensions[old_name] = value
288 continue
290 # Check if this is a core field that needs renaming
291 if old_name in field_mappings:
292 emg_name = field_mappings[old_name]
293 new_nx_meta[emg_name] = value
294 continue
296 # Vendor-specific individual fields go to extensions
297 if old_name in extension_field_names:
298 extensions[old_name] = value
299 continue
301 # Everything else goes to extensions (FEI-specific fields)
302 # This is safer since most FEI fields are vendor-specific
303 extensions[old_name] = value
305 # Copy warnings if present
306 if "warnings" in nx_meta:
307 new_nx_meta["warnings"] = nx_meta["warnings"]
309 # Add extensions section if we have any
310 for key, value in extensions.items():
311 add_to_extensions(new_nx_meta, key, value)
313 mdict["nx_meta"] = new_nx_meta
314 return mdict
317def _handle_ser_error_metadata(metadata):
318 """Handle metadata when .ser file cannot be read."""
319 metadata["nx_meta"]["DatasetType"] = "Misc"
320 metadata["nx_meta"]["Data Type"] = "Unknown"
321 metadata["nx_meta"]["warnings"] = []
322 # sort the nx_meta dictionary (recursively) for nicer display
323 metadata["nx_meta"] = sort_dict(metadata["nx_meta"])
324 del metadata["nx_meta"]["fname"]
325 return metadata
328def _load_ser(emi_filename: Path, ser_index: int):
329 """
330 Load an data file given the .emi filename and an index of which signal to use.
332 Parameters
333 ----------
334 emi_filename
335 The path to an .emi file
336 ser_index
337 Which .ser file to load data from, given the .emi file above
339 Returns
340 -------
341 hyperspy.signal.BaseSignal
342 The signal loaded by HyperSpy
343 bool
344 Whether the emi file was successfully loaded (should be true if no Exceptions)
345 """
346 # approach here is for every .ser we want to examine, load the
347 # metadata from the corresponding .emi file. If multiple .ser files
348 # are related to this emi, HyperSpy returns a list, so we select out
349 # the right signal from that list if that's what is returned
351 # make sure to load with "only_valid_data" so data shape is correct
352 # loading the emi with HS will try loading the .ser too, so this will
353 # fail if there's an issue with the .ser file
354 emi_s = hs_load(emi_filename, lazy=True, only_valid_data=True)
356 # if there is more than one dataset, emi_s will be a list, so pick
357 # out the matching signal from the list, which will be the "index"
358 # from the filename minus 1:
359 # if there is more than one dataset, emi_s will be a list, so pick
360 # out the matching signal, otherwise use the signal as-is
361 s = emi_s[ser_index - 1] if isinstance(emi_s, list) else emi_s
363 return s, True
366def parse_basic_info(metadata, shape, instrument: Instrument):
367 """
368 Parse basic metadata from file.
370 Parse the metadata that is saved at specific places within
371 the .emi tag structure into a consistent place in the metadata dictionary
372 returned by :py:meth:`get_ser_metadata`. Specifically, this method handles
373 the creation date, equipment manufacturer, and data shape/type.
375 Parameters
376 ----------
377 metadata : dict
378 A metadata dictionary as returned by :py:meth:`get_ser_metadata`
379 shape
380 The shape of the dataset
381 instrument : Instrument
382 The instrument this file was collected on
384 Returns
385 -------
386 metadata : dict
387 The same metadata dictionary with some values added under the
388 root-level ``nx_meta`` key
389 """
390 # try to set creation time to acquisition time from metadata
391 acq_time = try_getting_dict_value(metadata, ["ObjectInfo", "AcquireDate"])
392 if acq_time is not None:
393 # Use instrument timezone if available, otherwise fall back to system timezone
394 tz = instrument.timezone if instrument else current_system_tz()
395 naive_dt = dt.strptime(acq_time, "%a %b %d %H:%M:%S %Y") # noqa: DTZ007
396 # Both instrument.timezone and current_system_tz() return pytz objects,
397 # so use localize() for proper DST handling
398 aware_dt = tz.localize(naive_dt)
399 metadata["nx_meta"]["Creation Time"] = aware_dt.isoformat()
401 # manufacturer is at high level, so parse it now
402 manufacturer = try_getting_dict_value(metadata, ["ObjectInfo", "Manufacturer"])
403 if manufacturer is not None:
404 metadata["nx_meta"]["Manufacturer"] = manufacturer
406 metadata["nx_meta"]["Data Dimensions"] = str(shape)
407 metadata["nx_meta"]["warnings"] = []
409 # set type to STEM Image by default (this seems to be most common)
410 metadata["nx_meta"]["DatasetType"] = "Image"
411 metadata["nx_meta"]["Data Type"] = "STEM_Imaging"
413 return metadata
416def parse_experimental_conditions(metadata):
417 """
418 Parse experimental conditions.
420 Parse the metadata that is saved at specific places within
421 the .emi tag structure into a consistent place in the metadata dictionary
422 returned by :py:meth:`get_ser_metadata`. Specifically looks at the
423 "ExperimentalConditions" node of the metadata structure.
425 Parameters
426 ----------
427 metadata : dict
428 A metadata dictionary as returned by :py:meth:`get_ser_metadata`
430 Returns
431 -------
432 metadata : dict
433 The same metadata dictionary with some values added under the
434 root-level ``nx_meta`` key
435 """
436 # Map input field names to (output_name, unit) tuples
437 # If unit is None, value is stored as-is; otherwise, create Pint Quantity
438 term_mapping = {
439 ("DwellTimePath",): ("Dwell Time Path", "second"),
440 ("FrameTime",): ("Frame Time", "second"),
441 ("CameraNamePath",): ("Camera Name Path", None),
442 ("Binning",): ("Binning", None),
443 ("BeamPosition",): ("Beam Position", "micrometer"),
444 ("EnergyResolution",): ("Energy Resolution", "electron_volt"),
445 ("IntegrationTime",): ("Integration Time", "second"),
446 ("NumberSpectra",): ("Number of Spectra", None),
447 ("ShapingTime",): ("Shaping Time", "second"),
448 ("ScanArea",): ("Scan Area", None),
449 }
450 base = ["ObjectInfo", "AcquireInfo"]
452 if try_getting_dict_value(metadata, base) is not None:
453 metadata = map_keys_with_units(term_mapping, base, metadata)
455 return metadata
458def parse_acquire_info(metadata):
459 """
460 Parse acquisition conditions.
462 Parse the metadata that is saved at specific places within
463 the .emi tag structure into a consistent place in the metadata dictionary
464 returned by :py:meth:`get_ser_metadata`. Specifically looks at the
465 "AcquireInfo" node of the metadata structure.
467 Parameters
468 ----------
469 metadata : dict
470 A metadata dictionary as returned by :py:meth:`get_ser_metadata`
472 Returns
473 -------
474 metadata : dict
475 The same metadata dictionary with some values added under the
476 root-level ``nx_meta`` key
477 """
478 # Map input field names to (output_name, unit) tuples
479 term_mapping = {
480 ("AcceleratingVoltage",): ("Microscope Accelerating Voltage", "volt"),
481 ("Tilt1",): ("Microscope Tilt 1", None),
482 ("Tilt2",): ("Microscope Tilt 2", None),
483 }
484 base = ["ObjectInfo", "ExperimentalConditions", "MicroscopeConditions"]
486 if try_getting_dict_value(metadata, base) is not None:
487 metadata = map_keys_with_units(term_mapping, base, metadata)
489 return metadata
492def parse_experimental_description(metadata):
493 """
494 Parse experimental description.
496 Parse the metadata that is saved at specific places within
497 the .emi tag structure into a consistent place in the metadata dictionary
498 returned by :py:meth:`get_ser_metadata`. Specifically looks at the
499 "ExperimentalDescription" node of the metadata structure.
501 Parameters
502 ----------
503 metadata : dict
504 A metadata dictionary as returned by :py:meth:`get_ser_metadata`
506 Returns
507 -------
508 metadata : dict
509 The same metadata dictionary with some values added under the
510 root-level ``nx_meta`` key
512 Notes
513 -----
514 The terms to extract in this section were
515 """
516 # These terms were captured by looping through a selection of
517 # representative .ser/.emi datafiles and running something like the
518 # following
519 base = ["ObjectInfo", "ExperimentalDescription"]
521 experimental_description = try_getting_dict_value(metadata, base)
522 if experimental_description is not None and isinstance(
523 experimental_description,
524 dict,
525 ):
526 term_mapping = {}
527 for k in metadata["ObjectInfo"]["ExperimentalDescription"]:
528 term, fei_unit = split_fei_metadata_units(k)
529 pint_unit = fei_unit_to_pint(fei_unit)
531 # Determine output field name(s)
532 if "Stage" in term:
533 # Make stage position a nested list
534 term = term.replace("Stage ", "")
535 out_name = ["Stage Position", term]
536 elif "Filter " in term:
537 # Make filter settings a nested list
538 term = term.replace("Filter ", "")
539 out_name = ["Tecnai Filter", term.title()]
540 else:
541 out_name = term
543 term_mapping[(k,)] = (out_name, pint_unit)
545 metadata = map_keys_with_units(term_mapping, base, metadata)
547 # Microscope Mode often has excess spaces, so fix that if needed:
548 if "Mode" in metadata["nx_meta"]:
549 metadata["nx_meta"]["Mode"] = metadata["nx_meta"]["Mode"].strip()
551 return metadata
554def get_emi_from_ser(ser_fname: Path) -> Path:
555 """
556 Get the accompanying `.emi` filename from an ser filename.
558 This method assumes that the `.ser` file will be the same name as the `.emi` file,
559 but with an underscore and a digit appended. i.e. ``file.emi`` would
560 result in `.ser` files named ``file_1.ser``, ``file_2.ser``, etc.
562 Parameters
563 ----------
564 ser_fname
565 The absolute path of an FEI TIA `.ser` data file
567 Returns
568 -------
569 emi_fname
570 The absolute path of the accompanying `.emi` metadata file
571 index : int
572 The number of this .ser file (i.e. 1, 2, 3, etc.)
574 Raises
575 ------
576 FileNotFoundError
577 If the accompanying .emi file cannot be resolved to be a file
578 """
579 # separate filename from extension
580 filename = ser_fname.parent / ser_fname.stem
581 # remove everything after the last underscore and add the .emi extension
582 emi_fname = Path("_".join(str(filename).split("_")[:-1]) + ".emi")
583 index = int(str(filename).rsplit("_", maxsplit=1)[-1])
585 if not emi_fname.is_file():
586 msg = f"Could not find .emi file with expected name: {emi_fname}"
587 raise FileNotFoundError(msg)
588 return emi_fname, index
591def fei_unit_to_pint(fei_unit):
592 """
593 Convert FEI unit string to Pint unit name.
595 Parameters
596 ----------
597 fei_unit : str or None
598 The unit string from FEI metadata (e.g., "kV", "uA", "um", "deg")
600 Returns
601 -------
602 str or None
603 The corresponding Pint unit name, or None if no unit or not recognized
604 """
605 if fei_unit is None:
606 return None
608 # Map FEI units to Pint unit names
609 unit_map = {
610 "kV": "kilovolt",
611 "V": "volt",
612 "uA": "microampere",
613 "um": "micrometer",
614 "deg": "degree",
615 "s": "second",
616 "eV": "electron_volt",
617 "keV": "kiloelectron_volt",
618 "mm": "millimeter",
619 "nm": "nanometer",
620 "mrad": "milliradian",
621 }
623 return unit_map.get(fei_unit)
626def split_fei_metadata_units(metadata_term):
627 """
628 Split metadata into value and units.
630 If present, separate a metadata term into its value and units.
631 In the FEI metadata structure, units are indicated separated by an
632 underscore at the end of the term. i.e. ``High tension_kV`` indicates that
633 the `High tension` metadata value has units of `kV`.
635 Parameters
636 ----------
637 metadata_term : str
638 The metadata term read from the FEI tag structure
640 Returns
641 -------
642 mdata_and_unit : :obj:`tuple` of :obj:`str`
643 A length-2 tuple with the metadata value name as the first
644 item and the unit (if present) as the second item
645 """
646 mdata_and_unit = tuple(metadata_term.split("_"))
648 if len(mdata_and_unit) == 1:
649 mdata_and_unit = (*mdata_and_unit, None)
651 # capitalize any words in metadata term that are all lowercase:
652 mdata_term = " ".join(
653 [w.title() if w.islower() else w for w in mdata_and_unit[0].split()],
654 )
655 # replace weird "Stem" capitalization
656 mdata_term = mdata_term.replace("Stem ", "STEM ")
658 return (mdata_term, mdata_and_unit[1])
661def map_keys_with_units(term_mapping, base, metadata):
662 """
663 Map keys into NexusLIMS metadata structure with unit support.
665 Maps input metadata terms to NexusLIMS metadata structure, with support
666 for (output_name, unit) tuples in the term_mapping values to create Pint
667 Quantities.
669 Parameters
670 ----------
671 term_mapping : dict
672 Dictionary where keys are tuples of strings (the input terms),
673 and values are tuples of (output_name, unit) where output_name
674 is either a string or list of strings, and unit is either a string
675 (Pint unit name) or None
676 base : list
677 The 'root' path within the metadata dictionary
678 metadata : dict
679 A metadata dictionary
681 Returns
682 -------
683 metadata : dict
684 The same metadata dictionary with values added to nx_meta
685 """
686 for in_term in term_mapping:
687 out_spec, unit = term_mapping[in_term]
688 if isinstance(in_term, tuple):
689 in_term = list(in_term) # noqa: PLW2901
690 if isinstance(out_spec, str):
691 out_spec = [out_spec]
693 val = try_getting_dict_value(metadata, base + in_term)
694 # only add the value to this list if we found it
695 if val is not None:
696 # Clean up string values (remove " um" etc.)
697 if isinstance(val, str):
698 val = val.replace(" um", "").strip()
700 # Convert to numeric first (handles string numbers)
701 val = _convert_to_numeric(val)
703 # Create Quantity if unit specified and value is numeric
704 if unit is not None and isinstance(val, (int, float)):
705 with contextlib.suppress(ValueError, TypeError):
706 val = ureg.Quantity(val, unit)
708 set_nested_dict_value(
709 metadata,
710 ["nx_meta", *out_spec],
711 val,
712 )
713 return metadata
716def parse_data_type(s, metadata):
717 """
718 Parse the data type from the signal's metadata.
720 Determine `"Data Type"` and `"DatasetType"` for the given .ser file based
721 off of metadata and signal characteristics. This method is used to
722 determine whether the image is TEM or STEM, Image or Diffraction,
723 Spectrum or Spectrum Image, etc.
725 Due to lack of appropriate metadata written by the FEI software,
726 a heuristic of axis limits and size is used to determine whether a
727 spectrum's data type is EELS or EDS. This may not be a perfect
728 determination.
730 Parameters
731 ----------
732 s : :py:class:`hyperspy.signal.BaseSignal` (or subclass)
733 The HyperSpy signal that contains the data of interest
734 metadata : dict
735 A metadata dictionary as returned by :py:meth:`get_ser_metadata`
737 Returns
738 -------
739 data_type : str
740 The string that should be stored at metadata['nx_meta']['Data Type']
741 dataset_type : str
742 The string that should be stored at metadata['nx_meta']['DatasetType']
743 """
744 # default value that will be overwritten if the conditions below are met
745 dataset_type = "Misc"
747 # instrument configuration
748 instr_conf = []
749 _set_instrument_type(instr_conf, metadata)
751 # images have signal dimension of two:
752 if s.axes_manager.signal_dimension == 2: # noqa: PLR2004
753 instr_mod, dataset_type = _signal_dim_2(metadata)
755 # if signal dimension is 1, it's a spectrum and not an image
756 elif s.axes_manager.signal_dimension == 1:
757 instr_mod = ["Spectrum"]
758 dataset_type = "Spectrum"
759 if s.axes_manager.navigation_dimension > 0:
760 instr_mod.append("Imaging")
761 dataset_type = "SpectrumImage"
762 # do some basic axis value analysis to guess signal type since we
763 # don't have any indication of EELS vs. EDS; assume 5 keV and above
764 # is EDS
765 if s.axes_manager.signal_axes[0].high_value > 5000: # noqa: PLR2004
766 if "EDS" not in instr_conf:
767 instr_conf.append("EDS")
768 # EELS spectra are usually 2048 channels
769 elif s.axes_manager.signal_axes[0].size == 2048: # noqa: PLR2004
770 instr_conf.append("EELS")
772 data_type = "_".join(instr_conf + instr_mod)
774 return data_type, dataset_type
777def _set_instrument_type(instr_conf, metadata):
778 # sometimes there is no metadata for follow-on signals in an .emi/.ser
779 # bundle (i.e. .ser files after the first one)
780 if "Mode" in metadata["nx_meta"]:
781 if "STEM" in metadata["nx_meta"]["Mode"]:
782 instr_conf.append("STEM")
783 elif "TEM" in metadata["nx_meta"]["Mode"]:
784 instr_conf.append("TEM")
785 # if there is no metadata read from .emi, make determination
786 # off of instrument (this is really a guess)
787 elif metadata["nx_meta"]["Instrument ID"] is not None:
788 if "STEM" in metadata["nx_meta"]["Instrument ID"]:
789 instr_conf.append("STEM")
790 else:
791 instr_conf.append("TEM")
792 else:
793 # default to TEM, (since STEM is technically a sub-technique of TEM)
794 instr_conf.append("TEM")
797def _signal_dim_2(metadata) -> Tuple[List[str], str]:
798 """
799 Parse data type for a Signal with "signal dimension" of size 2.
801 Parameters
802 ----------
803 metadata
805 Returns
806 -------
807 list of str
808 The instrument mode
809 str
810 The dataset type
811 """
812 # default to an image dataset type for 2 dimensional signal
813 dataset_type = "Image"
814 # instrument modality:
815 instr_mod = ["Imaging"]
816 if "Mode" in metadata["nx_meta"]:
817 if "Image" in metadata["nx_meta"]["Mode"]:
818 instr_mod = ["Imaging"]
819 dataset_type = "Image"
820 elif "Diffraction" in metadata["nx_meta"]["Mode"]:
821 # Diffraction mode is only actually diffraction in TEM mode,
822 # In STEM, imaging happens in diffraction mode
823 if "STEM" in metadata["nx_meta"]["Mode"]:
824 instr_mod = ["Imaging"]
825 dataset_type = "Image"
826 elif "TEM" in metadata["nx_meta"]["Mode"]:
827 instr_mod = ["Diffraction"]
828 dataset_type = "Diffraction"
829 return instr_mod, dataset_type
832def _convert_to_numeric(val):
833 if isinstance(val, str):
834 if "." in val:
835 try:
836 return float(val)
837 except ValueError:
838 return val
839 else:
840 try:
841 return int(val)
842 except ValueError:
843 return val
844 else:
845 return val
848# Backward compatibility function for tests
849def get_ser_metadata(filename):
850 """
851 Get metadata from a .ser file and its accompanying .emi file.
853 .. deprecated::
854 This function is deprecated. Use SerEmiExtractor class instead.
856 Parameters
857 ----------
858 filename : pathlib.Path
859 path to a file saved in the harvested directory of the instrument
861 Returns
862 -------
863 mdict : dict
864 A description of the file's metadata.
865 """
866 context = ExtractionContext(
867 file_path=filename, instrument=get_instr_from_filepath(filename)
868 )
869 extractor = SerEmiExtractor()
870 return extractor.extract(context)