Coverage for nexusLIMS/extractors/__init__.py: 100%
188 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""
2Extract metadata from various electron microscopy file types.
4Extractors should return a list of dictionaries, where each dictionary contains
5the extracted metadata under the key ``nx_meta``. The ``nx_meta`` structure is
6validated against the :class:`~nexusLIMS.schemas.metadata.NexusMetadata` Pydantic
7schema to ensure consistency across all extractors.
9Required Fields
10---------------
11All extractors must include these fields in ``nx_meta``:
13* ``'Creation Time'`` - ISO-8601 timestamp string **with timezone** (e.g.,
14 ``"2024-01-15T10:30:00-05:00"`` or ``"2024-01-15T15:30:00Z"``)
15* ``'Data Type'`` - Human-readable description using underscores (e.g.,
16 ``"STEM_Imaging"``, ``"TEM_EDS"``, ``"SEM_Imaging"``)
17* ``'DatasetType'`` - Schema-defined category, must be one of: ``"Image"``,
18 ``"Spectrum"``, ``"SpectrumImage"``, ``"Diffraction"``, ``"Misc"``, or ``"Unknown"``
20Optional Fields
21---------------
22Common optional fields include:
24* ``'Data Dimensions'`` - Dataset shape as string (e.g., ``"(1024, 1024)"``)
25* ``'Instrument ID'`` - Instrument PID from database (e.g., ``"FEI-Titan-TEM-635816"``)
26* ``'warnings'`` - List of warning messages or [message, context] pairs
28Additional instrument-specific fields are allowed beyond these standard fields.
30Schema Validation
31-----------------
32The ``nx_meta`` structure is validated using Pydantic strict mode. Validation occurs
33after default values are set (e.g., missing ``DatasetType`` defaults to ``"Misc"``).
34If validation fails, a ``pydantic.ValidationError`` is raised with detailed information
35about which fields are invalid.
37For complete schema details, see :class:`~nexusLIMS.schemas.metadata.NexusMetadata`.
38"""
40import base64
41import inspect
42import json
43import logging
44import shutil
45from datetime import datetime as dt
46from decimal import Decimal
47from pathlib import Path
48from typing import Any, Callable, Dict, Tuple
50import hyperspy.api as hs
51import numpy as np
52from benedict import benedict
53from pydantic import ValidationError
55from nexusLIMS.extractors.base import ExtractionContext
56from nexusLIMS.extractors.registry import get_registry
57from nexusLIMS.instruments import get_instr_from_filepath
58from nexusLIMS.schemas.metadata import (
59 DiffractionMetadata,
60 ImageMetadata,
61 NexusMetadata,
62 SpectrumImageMetadata,
63 SpectrumMetadata,
64)
65from nexusLIMS.schemas.units import ureg
66from nexusLIMS.utils.paths import replace_instrument_data_path
67from nexusLIMS.utils.time import current_system_tz
68from nexusLIMS.version import __version__
70from . import utils
71from .plugins.preview_generators.hyperspy_preview import sig_to_thumbnail
72from .plugins.preview_generators.image_preview import (
73 down_sample_image,
74 image_to_square_thumbnail,
75)
76from .plugins.preview_generators.text_preview import text_to_thumbnail
78_logger = logging.getLogger(__name__)
81def _config_available() -> bool:
82 """Return True if NexusLIMS settings can be loaded without error."""
83 try:
84 from nexusLIMS.config import settings # noqa: PLC0415
86 _ = settings.NX_DATA_PATH
87 except Exception:
88 return False
89 else:
90 return True
93PLACEHOLDER_PREVIEW = Path(__file__).parent / "assets" / "extractor_error.png"
94"""Path to placeholder preview image used when preview generation fails."""
96__all__ = [
97 "PLACEHOLDER_PREVIEW",
98 "_logger",
99 "create_preview",
100 "down_sample_image",
101 "flatten_dict",
102 "get_instr_from_filepath",
103 "get_registry",
104 "image_to_square_thumbnail",
105 "parse_metadata",
106 "sig_to_thumbnail",
107 "text_to_thumbnail",
108 "unextracted_preview_map",
109 "utils",
110 "validate_nx_meta",
111]
113unextracted_preview_map = {
114 "txt": text_to_thumbnail,
115 "png": image_to_square_thumbnail,
116 "tiff": image_to_square_thumbnail,
117 "bmp": image_to_square_thumbnail,
118 "gif": image_to_square_thumbnail,
119 "jpg": image_to_square_thumbnail,
120 "jpeg": image_to_square_thumbnail,
121}
122"""Filetypes that will only have basic metadata extracted but will nonetheless
123have a custom preview image generated"""
126def _add_extraction_details(
127 nx_meta: Dict,
128 extractor_module: Callable,
129) -> Dict[str, str]:
130 """
131 Add extraction details to the NexusLIMS metadata.
133 Adds metadata about the extraction process, given an extractor module
134 to the ``nx_meta`` metadata dictionary under the ``'NexusLIMS Extraction'``
135 sub-key. The ``'Extractor Module'`` metadata key will contain the fully
136 qualified path of a given extractor, e.g.
137 ``nexusLIMS.extractors.basic_metadata``.
139 Note
140 ----
141 If the ``'NexusLIMS Extraction'`` key already exists in the ``nx_meta``
142 metadata dictionary, this method *will* overwrite its value.
144 Parameters
145 ----------
146 nx_meta
147 The metadata dictionary as returend by :py:meth:`parse_metadata`
148 extractor_module
149 The (callable) module for a specific metadata extractor from the
150 :py:mod:`~nexusLIMS.extractors` module.
152 Returns
153 -------
154 dict
155 An updated ``nx_meta`` dictionary, containing extraction details
157 """
158 # PHASE 1 MIGRATION: Handle both old-style functions and new-style extractors
159 # Try to get the module name in different ways for backward compatibility
160 module_name = None
162 # Try __module__ attribute first (works for new extractor system)
163 if hasattr(extractor_module, "__module__"):
164 module_name = extractor_module.__module__
166 # Fallback to inspect.getmodule() for old-style functions
167 if module_name is None: # pragma: no cover
168 module = inspect.getmodule(extractor_module) # pragma: no cover
169 # Last resort - use "unknown"
170 module_name = ( # pragma: no cover
171 module.__name__ if module is not None else "unknown"
172 )
174 # Build NexusLIMS Extraction details
175 extraction_details = {
176 "Date": dt.now(tz=current_system_tz()).isoformat(),
177 "Module": module_name,
178 "Version": __version__,
179 }
181 # Move "Extractor Warnings" from nx_meta to extraction details if present
182 # Check both nx_meta and extensions (some extractors migrate it to extensions)
183 if "Extractor Warnings" in nx_meta["nx_meta"]:
184 extraction_details["Extractor Warnings"] = nx_meta["nx_meta"].pop(
185 "Extractor Warnings"
186 )
187 elif (
188 "extensions" in nx_meta["nx_meta"]
189 and "Extractor Warnings" in nx_meta["nx_meta"]["extensions"]
190 ):
191 extraction_details["Extractor Warnings"] = nx_meta["nx_meta"]["extensions"].pop(
192 "Extractor Warnings"
193 )
195 nx_meta["nx_meta"]["NexusLIMS Extraction"] = extraction_details
197 return nx_meta
200def get_schema_for_dataset_type(dataset_type: str) -> type[NexusMetadata]:
201 """
202 Select the appropriate schema class based on DatasetType.
204 This function maps dataset types to their corresponding type-specific
205 metadata schemas. Type-specific schemas (ImageMetadata, SpectrumMetadata, etc.)
206 provide stricter validation of fields appropriate for each data type.
208 Parameters
209 ----------
210 dataset_type : str
211 The value of the 'DatasetType' field. Must be one of: 'Image', 'Spectrum',
212 'SpectrumImage', 'Diffraction', 'Misc', or 'Unknown'.
214 Returns
215 -------
216 type[NexusMetadata]
217 The schema class to use for validation. Returns a type-specific schema
218 (ImageMetadata, SpectrumMetadata, etc.) for known dataset types, or the
219 base NexusMetadata schema for 'Misc' and 'Unknown' types.
221 Notes
222 -----
223 Schema mapping:
224 - 'Image' → ImageMetadata (SEM/TEM/STEM images)
225 - 'Spectrum' → SpectrumMetadata (EDS/EELS spectra)
226 - 'SpectrumImage' → SpectrumImageMetadata (hyperspectral data)
227 - 'Diffraction' → DiffractionMetadata (diffraction patterns)
228 - 'Misc' → NexusMetadata (base schema)
229 - 'Unknown' → NexusMetadata (base schema)
230 - Other values → NexusMetadata (fallback)
232 Examples
233 --------
234 >>> schema = get_schema_for_dataset_type("Image")
235 >>> schema.__name__
236 'ImageMetadata'
238 >>> schema = get_schema_for_dataset_type("Unknown")
239 >>> schema.__name__
240 'NexusMetadata'
241 """
242 schema_mapping = {
243 "Image": ImageMetadata,
244 "Spectrum": SpectrumMetadata,
245 "SpectrumImage": SpectrumImageMetadata,
246 "Diffraction": DiffractionMetadata,
247 "Misc": NexusMetadata,
248 "Unknown": NexusMetadata,
249 }
251 return schema_mapping.get(dataset_type, NexusMetadata)
254def validate_nx_meta(
255 metadata_dict: dict[str, Any], *, filename: Path | None = None
256) -> dict[str, Any]:
257 """
258 Validate the nx_meta structure against type-specific metadata schemas.
260 This function ensures that metadata returned by extractor plugins conforms
261 to the required structure defined in the type-specific metadata schemas
262 (ImageMetadata, SpectrumMetadata, etc.). The appropriate schema is selected
263 based on the 'DatasetType' field. Validation is performed strictly - any
264 schema violations will raise a ValidationError with detailed information
265 about the failure.
267 Parameters
268 ----------
269 metadata_dict : dict[str, Any]
270 Dictionary containing an 'nx_meta' key with the metadata to validate.
271 This is the format returned by all extractor plugins.
272 filename : :class:`~pathlib.Path` or None, optional
273 The file path being processed. Used only for error message context.
274 If None, error messages will not include file path information.
276 Returns
277 -------
278 dict[str, Any]
279 The original metadata_dict, unchanged. Validation does not modify data,
280 it only checks conformance to the schema.
282 Raises
283 ------
284 pydantic.ValidationError
285 If the nx_meta structure fails validation. The error message will include
286 detailed information about which fields are invalid and why.
288 Notes
289 -----
290 This function validates:
292 - **Required fields**: 'Creation Time', 'Data Type', 'DatasetType' must be present
293 - **ISO-8601 timestamps**: 'Creation Time' must be valid ISO-8601 with timezone
294 - **Controlled vocabularies**: 'DatasetType' must be one of the allowed values
295 - **Type-specific fields**: Fields appropriate for the dataset type (e.g.,
296 'acceleration_voltage' for Image, 'acquisition_time' for Spectrum)
297 - **Type constraints**: All fields must match their expected types
298 - **Pint Quantities**: Physical measurements must use Pint Quantity objects
300 The validation system uses type-specific schemas:
301 - Image → ImageMetadata (SEM/TEM/STEM imaging)
302 - Spectrum → SpectrumMetadata (EDS/EELS spectra)
303 - SpectrumImage → SpectrumImageMetadata (hyperspectral)
304 - Diffraction → DiffractionMetadata (TEM diffraction)
305 - Misc/Unknown → NexusMetadata (base schema)
307 All schemas support the 'extensions' section for instrument-specific
308 metadata that doesn't fit the core schema.
310 Examples
311 --------
312 Valid metadata passes without modification:
314 >>> metadata = {
315 ... "nx_meta": {
316 ... "Creation Time": "2024-01-15T10:30:00-05:00",
317 ... "Data Type": "STEM_Imaging",
318 ... "DatasetType": "Image",
319 ... }
320 ... }
321 >>> result = validate_nx_meta(metadata)
322 >>> result == metadata
323 True
325 Invalid metadata raises ValidationError:
327 >>> bad_metadata = {
328 ... "nx_meta": {
329 ... "Creation Time": "invalid-timestamp",
330 ... "Data Type": "STEM_Imaging",
331 ... "DatasetType": "Image",
332 ... }
333 ... }
334 >>> validate_nx_meta(bad_metadata) # doctest: +SKIP
335 Traceback (most recent call last):
336 ...
337 pydantic.ValidationError: ...
339 See Also
340 --------
341 nexusLIMS.schemas.metadata.NexusMetadata
342 The base Pydantic schema model for nx_meta validation
343 nexusLIMS.schemas.metadata.ImageMetadata
344 Schema for Image dataset types
345 nexusLIMS.schemas.metadata.SpectrumMetadata
346 Schema for Spectrum dataset types
347 get_schema_for_dataset_type
348 Helper function that selects the appropriate schema
349 parse_metadata
350 Main extraction function that uses this validator
351 """
352 nx_meta = metadata_dict["nx_meta"]
354 # Get dataset type and select appropriate schema
355 dataset_type = nx_meta.get("DatasetType", "Misc")
356 schema_class = get_schema_for_dataset_type(dataset_type)
358 try:
359 schema_class.model_validate(nx_meta)
360 except ValidationError as e:
361 # Enhance error message with file and dataset type context
362 if filename:
363 msg = f"Validation failed for {filename} ({dataset_type}): {e}"
364 else:
365 msg = f"Validation failed ({dataset_type}): {e}"
366 _logger.exception(msg)
367 raise
369 return metadata_dict
372def parse_metadata( # noqa: PLR0912, PLR0915
373 fname: Path,
374 *,
375 write_output: bool = True,
376 generate_preview: bool = True,
377 overwrite: bool = True,
378) -> Tuple[Dict[str, Any] | None, Path | list[Path] | None]:
379 """
380 Parse metadata from a file and optionaly generate a preview image.
382 Given an input filename, read the file, determine what "type" of file (i.e.
383 what instrument it came from) it is, filter the metadata (if necessary) to
384 what we are interested in, and return it as a dictionary (writing to the
385 NexusLIMS directory as JSON by default). Also calls the preview
386 generation method, if desired.
388 For files containing multiple signals (e.g., multi-signal DM3/DM4 files),
389 generates one preview per signal and returns a list of preview paths.
391 Parameters
392 ----------
393 fname
394 The filename from which to read data
395 write_output
396 Whether to write the metadata dictionary as a json file in the NexusLIMS
397 folder structure
398 generate_preview
399 Whether to generate the thumbnail preview of this dataset (that
400 operation is not done in this method, it is just called from here so
401 it can be done at the same time)
402 overwrite
403 Whether to overwrite the .json metadata file and thumbnail
404 image if either exists
406 Returns
407 -------
408 nx_meta : list[dict] or None
409 A list of metadata dicts, one per signal in the file. If None,
410 the file could not be opened. Single-signal files return a list
411 with one dict, multi-signal files return a list with multiple dicts.
412 preview_fname : list[Path] or None
413 A list of file paths for the generated preview images, one per signal.
414 For single-signal files, returns a list with one path. Returns `None`
415 if preview generation was not requested.
416 """
417 extension = fname.suffix[1:]
419 # Create extraction context
420 instrument = get_instr_from_filepath(fname)
421 context = ExtractionContext(file_path=fname, instrument=instrument)
423 # Get extractor from registry
424 registry = get_registry()
425 extractor = registry.get_extractor(context)
427 # Extract metadata using the selected extractor
428 # All extractors now return a list of dicts (one per signal)
429 nx_meta_list = extractor.extract(context)
431 # Create a pseudo-module for extraction details tracking
432 class ExtractorMethod:
433 """Pseudo-module for extraction details tracking."""
435 def __init__(self, extractor_name: str):
436 # Use the plugin module path for all extractors
437 self.__module__ = f"nexusLIMS.extractors.plugins.{extractor_name}"
438 self.__name__ = self.__module__
440 def __call__(self, f: Path) -> dict: # noqa: ARG002
441 return nx_meta_list # pragma: no cover
443 # Defensive check: extractors should always return a list but handle None gracefully
444 if nx_meta_list is None:
445 return None, None
447 extractor_method = ExtractorMethod(extractor.name)
449 # Handle preview generation logic if the extractor is
450 # the basic fallback and extension is not in unextracted_preview_map,
451 # don't generate a preview
452 if extractor.name == "basic_file_info_extractor":
453 if extension not in unextracted_preview_map:
454 generate_preview = False
455 _logger.info(
456 "No specialized extractor found for file extension; "
457 "setting generate_preview to False",
458 )
459 else:
460 generate_preview = True
461 _logger.info(
462 "No specialized extractor found for file extension; "
463 "but file extension was in unextracted_preview_map; "
464 "setting generate_preview to True",
465 )
467 # Add extraction details to metadata
468 nx_meta_list = [_add_extraction_details(m, extractor_method) for m in nx_meta_list]
470 signal_count = len(nx_meta_list)
471 preview_fnames = []
473 # Set the dataset type to Misc if it was not set by the file reader
474 for nx_meta in nx_meta_list:
475 if "DatasetType" not in nx_meta["nx_meta"]:
476 nx_meta["nx_meta"]["DatasetType"] = "Misc"
477 nx_meta["nx_meta"]["Data Type"] = "Miscellaneous"
479 # Validate each metadata dict against the schema (strict mode)
480 # This happens AFTER setting defaults to allow extractors to omit optional fields
481 for nx_meta in nx_meta_list:
482 validate_nx_meta(nx_meta, filename=fname)
484 # Write output for each signal (single and multi-signal files)
485 _can_write = write_output and _config_available()
486 if write_output and not _can_write:
487 _logger.warning(
488 "NexusLIMS config unavailable; skipping metadata file write "
489 "(pass write_output=False to suppress this warning)"
490 )
492 if _can_write:
493 for i, nx_meta in enumerate(nx_meta_list):
494 # For single-signal files, omit suffix for backward compatibility
495 if signal_count == 1:
496 out_fname = replace_instrument_data_path(fname, ".json")
497 else:
498 # For multi-signal files, append signal index to filename
499 base_path = replace_instrument_data_path(fname, "")
500 out_fname = Path(f"{base_path}_signal{i}.json")
502 if not out_fname.exists() or overwrite:
503 # Create the directory for the metadata file, if needed
504 out_fname.parent.mkdir(parents=True, exist_ok=True)
505 # Make sure that the nx_meta dict comes first in the json output
506 out_dict = {"nx_meta": nx_meta["nx_meta"]}
507 for k, v in nx_meta.items():
508 if k == "nx_meta":
509 pass
510 else:
511 out_dict[k] = v
512 with out_fname.open(mode="w", encoding="utf-8") as f:
513 _logger.debug("Dumping metadata to %s", out_fname)
514 json.dump(
515 out_dict,
516 f,
517 sort_keys=False,
518 indent=2,
519 cls=_CustomEncoder,
520 )
522 # Generate previews for each signal
523 _can_preview = generate_preview and _config_available()
524 if generate_preview and not _can_preview:
525 _logger.warning(
526 "NexusLIMS config unavailable; skipping preview generation "
527 "(pass generate_preview=False to suppress this warning)"
528 )
530 if _can_preview:
531 for i in range(signal_count):
532 # For single-signal files, omit suffix for backward compatibility
533 signal_idx = i if signal_count > 1 else None
534 preview = create_preview(
535 fname=fname,
536 overwrite=overwrite,
537 signal_index=signal_idx,
538 )
539 preview_fnames.append(preview)
540 else:
541 preview_fnames = [None] * signal_count
543 return nx_meta_list, preview_fnames
546def create_preview( # noqa: PLR0911, PLR0912, PLR0915
547 fname: Path, *, overwrite: bool, signal_index: int | None = None
548) -> Path | None:
549 """
550 Generate a preview image for a given file using the plugin system.
552 This method uses the preview generator plugin system to create thumbnail
553 previews. It first tries to find a suitable preview generator plugin, and
554 falls back to legacy methods if no plugin is found.
556 Parameters
557 ----------
558 fname
559 The filename from which to read data
560 overwrite
561 Whether to overwrite the .json metadata file and thumbnail
562 image if either exists
563 signal_index
564 For files with multiple signals, the index of the signal to preview.
565 If None, generates a single preview (legacy behavior). If an int,
566 generates preview with _signalN suffix in filename.
568 Returns
569 -------
570 preview_fname : Optional[pathlib.Path]
571 The filename of the generated preview image; if None, a preview could not be
572 successfully generated.
573 """
574 # Generate preview filename with signal index suffix if provided
575 if signal_index is None:
576 preview_fname = replace_instrument_data_path(fname, ".thumb.png")
577 else:
578 preview_fname = replace_instrument_data_path(
579 fname, f"_signal{signal_index}.thumb.png"
580 )
582 # Skip if preview exists and overwrite is False
583 if preview_fname.is_file() and not overwrite:
584 _logger.info("Preview already exists: %s", preview_fname)
585 return preview_fname
587 # Create context for preview generation
588 instrument = get_instr_from_filepath(fname)
589 context = ExtractionContext(
590 file_path=fname, instrument=instrument, signal_index=signal_index
591 )
593 # Try to get a preview generator from the registry
594 registry = get_registry()
595 generator = registry.get_preview_generator(context)
597 if generator:
598 # Use plugin-based preview generation
599 _logger.info("Generating preview using %s: %s", generator.name, preview_fname)
600 # Create the directory for the thumbnail, if needed
601 preview_fname.parent.mkdir(parents=True, exist_ok=True)
603 success = generator.generate(context, preview_fname)
604 if success:
605 return preview_fname
607 _logger.warning(
608 "Preview generator %s failed for %s",
609 generator.name,
610 fname,
611 )
612 # Fall through to legacy methods
614 # Legacy fallback for .tif files (special case with downsampling)
615 extension = fname.suffix[1:]
616 if extension == "tif":
617 _logger.info("Using legacy downsampling for .tif: %s", preview_fname)
618 preview_fname.parent.mkdir(parents=True, exist_ok=True)
619 factor = 2
620 down_sample_image(fname, out_path=preview_fname, factor=factor)
621 return preview_fname
623 # Legacy fallback for files in unextracted_preview_map
624 if extension in unextracted_preview_map:
625 _logger.info("Using legacy preview map for %s: %s", extension, preview_fname)
626 preview_fname.parent.mkdir(parents=True, exist_ok=True)
627 preview_return = unextracted_preview_map[extension](
628 f=fname,
629 out_path=preview_fname,
630 output_size=500,
631 )
633 # handle the case where PIL cannot open an image
634 if preview_return is False:
635 return None
637 return preview_fname
639 # Legacy fallback for HyperSpy-loadable files
640 _logger.info("Trying legacy HyperSpy preview generation: %s", preview_fname)
641 load_options = {"lazy": True}
642 if extension == "ser":
643 load_options["only_valid_data"] = True
645 # noinspection PyBroadException
646 try:
647 s = hs.load(fname, **load_options)
648 except Exception: # pylint: disable=broad-exception-caught
649 _logger.warning(
650 "Signal could not be loaded by HyperSpy. "
651 "Using placeholder image for preview.",
652 )
653 preview_fname.parent.mkdir(parents=True, exist_ok=True)
654 shutil.copyfile(PLACEHOLDER_PREVIEW, preview_fname)
655 return preview_fname
657 # If s is a list of signals, select the appropriate one
658 if isinstance(s, list):
659 num_sigs = len(s)
660 original_fname = s[0].metadata.General.original_filename
661 if signal_index is not None:
662 # Use specified signal index
663 s = s[signal_index]
664 s.metadata.General.title = (
665 s.metadata.General.title
666 + f" (signal {signal_index + 1} of "
667 + f'{num_sigs} in file "{original_fname}")'
668 )
669 else:
670 # Legacy: use first signal only
671 s = s[0]
672 s.metadata.General.title = (
673 s.metadata.General.title
674 + f' (1 of {num_sigs} total signals in file "{original_fname}")'
675 )
676 elif not s.metadata.General.title:
677 s.metadata.General.title = s.metadata.General.original_filename.replace(
678 extension,
679 "",
680 ).strip(".")
682 # Generate the preview
683 _logger.info("Generating HyperSpy preview: %s", preview_fname)
684 preview_fname.parent.mkdir(parents=True, exist_ok=True)
685 s.compute(show_progressbar=False)
686 try:
687 sig_to_thumbnail(s, out_path=preview_fname)
688 except Exception: # pylint: disable=broad-exception-caught
689 _logger.warning(
690 "Legacy HyperSpy preview generation failed for %s. "
691 "Using placeholder image for preview.",
692 fname,
693 )
694 shutil.copyfile(PLACEHOLDER_PREVIEW, preview_fname)
696 return preview_fname
699def flatten_dict(_dict, parent_key="", separator=" "): # noqa: ARG001
700 """
701 Flatten a nested dictionary into a single level.
703 Utility method to take a nested dictionary structure and flatten it into a
704 single level, separating the levels by a string as specified by
705 ``separator``.
707 Uses python-benedict for robust nested dictionary operations.
709 Parameters
710 ----------
711 _dict : dict
712 The dictionary to flatten
713 parent_key : str
714 The "root" key to add to the existing keys (unused in current implementation)
715 separator : str
716 The string to use to separate values in the flattened keys (i.e.
717 {'a': {'b': 'c'}} would become {'a' + sep + 'b': 'c'})
719 Returns
720 -------
721 flattened_dict : str
722 The dictionary with depth one, with nested dictionaries flattened
723 into root-level keys
724 """
725 # Disable keypath_separator to avoid conflicts with keys containing
726 # dots or other special chars
727 return benedict(_dict, keypath_separator=None).flatten(separator=separator)
730class _CustomEncoder(json.JSONEncoder):
731 """
732 Allow non-serializable types to be written in a JSON format.
734 A custom JSON Encoder class that will allow certain types to be serialized that are
735 not able to be by default (taken from https://stackoverflow.com/a/27050186).
736 """
738 def default(self, o): # noqa: PLR0911
739 if isinstance(o, np.integer):
740 return int(o)
741 if isinstance(o, np.floating):
742 return float(o)
743 if isinstance(o, np.ndarray):
744 return o.tolist()
745 if isinstance(o, np.bytes_):
746 return o.decode()
747 if isinstance(o, np.void):
748 # np.void array may contain arbitary binary, so base64 encode it
749 return base64.b64encode(o.tolist()).decode("utf-8")
750 # Handle Pint Quantity objects
751 if isinstance(o, ureg.Quantity):
752 return {"value": float(o.magnitude), "unit": str(o.units)}
753 # Handle Decimal objects (convert to float for JSON serialization)
754 if isinstance(o, Decimal):
755 return float(o)
757 return super().default(o)