Coverage for nexusLIMS/extractors/__init_

1"""

2Extract metadata from various electron microscopy file types.

4Extractors should return a list of dictionaries, where each dictionary contains

5the extracted metadata under the key ``nx_meta``. The ``nx_meta`` structure is

6validated against the :class:`~nexusLIMS.schemas.metadata.NexusMetadata` Pydantic

7schema to ensure consistency across all extractors.

9Required Fields

10---------------

11All extractors must include these fields in ``nx_meta``:

13* ``'Creation Time'`` - ISO-8601 timestamp string **with timezone** (e.g.,

14 ``"2024-01-15T10:30:00-05:00"`` or ``"2024-01-15T15:30:00Z"``)

15* ``'Data Type'`` - Human-readable description using underscores (e.g.,

16 ``"STEM_Imaging"``, ``"TEM_EDS"``, ``"SEM_Imaging"``)

17* ``'DatasetType'`` - Schema-defined category, must be one of: ``"Image"``,

18 ``"Spectrum"``, ``"SpectrumImage"``, ``"Diffraction"``, ``"Misc"``, or ``"Unknown"``

20Optional Fields

21---------------

22Common optional fields include:

24* ``'Data Dimensions'`` - Dataset shape as string (e.g., ``"(1024, 1024)"``)

25* ``'Instrument ID'`` - Instrument PID from database (e.g., ``"FEI-Titan-TEM-635816"``)

26* ``'warnings'`` - List of warning messages or [message, context] pairs

28Additional instrument-specific fields are allowed beyond these standard fields.

30Schema Validation

31-----------------

32The ``nx_meta`` structure is validated using Pydantic strict mode. Validation occurs

33after default values are set (e.g., missing ``DatasetType`` defaults to ``"Misc"``).

34If validation fails, a ``pydantic.ValidationError`` is raised with detailed information

35about which fields are invalid.

37For complete schema details, see :class:`~nexusLIMS.schemas.metadata.NexusMetadata`.

38"""

40import base64

41import inspect

42import json

43import logging

44import shutil

45from datetime import datetime as dt

46from decimal import Decimal

47from pathlib import Path

48from typing import Any, Callable, Dict, Tuple

50import hyperspy.api as hs

51import numpy as np

52from benedict import benedict

53from pydantic import ValidationError

55from nexusLIMS.extractors.base import ExtractionContext

56from nexusLIMS.extractors.registry import get_registry

57from nexusLIMS.instruments import get_instr_from_filepath

58from nexusLIMS.schemas.metadata import (

59 DiffractionMetadata,

60 ImageMetadata,

61 NexusMetadata,

62 SpectrumImageMetadata,

63 SpectrumMetadata,

64)

65from nexusLIMS.schemas.units import ureg

66from nexusLIMS.utils.paths import replace_instrument_data_path

67from nexusLIMS.utils.time import current_system_tz

68from nexusLIMS.version import __version__

70from . import utils

71from .plugins.preview_generators.hyperspy_preview import sig_to_thumbnail

72from .plugins.preview_generators.image_preview import (

73 down_sample_image,

74 image_to_square_thumbnail,

75)

76from .plugins.preview_generators.text_preview import text_to_thumbnail

78_logger = logging.getLogger(__name__)

81def _config_available() -> bool:

82 """Return True if NexusLIMS settings can be loaded without error."""

83 try:

84 from nexusLIMS.config import settings # noqa: PLC0415

86 _ = settings.NX_DATA_PATH

87 except Exception:

88 return False

89 else:

90 return True

93PLACEHOLDER_PREVIEW = Path(__file__).parent / "assets" / "extractor_error.png"

94"""Path to placeholder preview image used when preview generation fails."""

96__all__ = [

97 "PLACEHOLDER_PREVIEW",

98 "_logger",

99 "create_preview",

100 "down_sample_image",

101 "flatten_dict",

102 "get_instr_from_filepath",

103 "get_registry",

104 "image_to_square_thumbnail",

105 "parse_metadata",

106 "sig_to_thumbnail",

107 "text_to_thumbnail",

108 "unextracted_preview_map",

109 "utils",

110 "validate_nx_meta",

111]

112

113unextracted_preview_map = {

114 "txt": text_to_thumbnail,

115 "png": image_to_square_thumbnail,

116 "tiff": image_to_square_thumbnail,

117 "bmp": image_to_square_thumbnail,

118 "gif": image_to_square_thumbnail,

119 "jpg": image_to_square_thumbnail,

120 "jpeg": image_to_square_thumbnail,

121}

122"""Filetypes that will only have basic metadata extracted but will nonetheless

123have a custom preview image generated"""

124

125

126def _add_extraction_details(

127 nx_meta: Dict,

128 extractor_module: Callable,

129) -> Dict[str, str]:

130 """

131 Add extraction details to the NexusLIMS metadata.

132

133 Adds metadata about the extraction process, given an extractor module

134 to the ``nx_meta`` metadata dictionary under the ``'NexusLIMS Extraction'``

135 sub-key. The ``'Extractor Module'`` metadata key will contain the fully

136 qualified path of a given extractor, e.g.

137 ``nexusLIMS.extractors.basic_metadata``.

138

139 Note

140 ----

141 If the ``'NexusLIMS Extraction'`` key already exists in the ``nx_meta``

142 metadata dictionary, this method *will* overwrite its value.

143

144 Parameters

145 ----------

146 nx_meta

147 The metadata dictionary as returend by :py:meth:`parse_metadata`

148 extractor_module

149 The (callable) module for a specific metadata extractor from the

150 :py:mod:`~nexusLIMS.extractors` module.

151

152 Returns

153 -------

154 dict

155 An updated ``nx_meta`` dictionary, containing extraction details

156

157 """

158 # PHASE 1 MIGRATION: Handle both old-style functions and new-style extractors

159 # Try to get the module name in different ways for backward compatibility

160 module_name = None

161

162 # Try __module__ attribute first (works for new extractor system)

163 if hasattr(extractor_module, "__module__"):

164 module_name = extractor_module.__module__

165

166 # Fallback to inspect.getmodule() for old-style functions

167 if module_name is None: # pragma: no cover

168 module = inspect.getmodule(extractor_module) # pragma: no cover

169 # Last resort - use "unknown"

170 module_name = ( # pragma: no cover

171 module.__name__ if module is not None else "unknown"

172 )

173

174 # Build NexusLIMS Extraction details

175 extraction_details = {

176 "Date": dt.now(tz=current_system_tz()).isoformat(),

177 "Module": module_name,

178 "Version": __version__,

179 }

180

181 # Move "Extractor Warnings" from nx_meta to extraction details if present

182 # Check both nx_meta and extensions (some extractors migrate it to extensions)

183 if "Extractor Warnings" in nx_meta["nx_meta"]:

184 extraction_details["Extractor Warnings"] = nx_meta["nx_meta"].pop(

185 "Extractor Warnings"

186 )

187 elif (

188 "extensions" in nx_meta["nx_meta"]

189 and "Extractor Warnings" in nx_meta["nx_meta"]["extensions"]

190 ):

191 extraction_details["Extractor Warnings"] = nx_meta["nx_meta"]["extensions"].pop(

192 "Extractor Warnings"

193 )

194

195 nx_meta["nx_meta"]["NexusLIMS Extraction"] = extraction_details

196

197 return nx_meta

198

199

200def get_schema_for_dataset_type(dataset_type: str) -> type[NexusMetadata]:

201 """

202 Select the appropriate schema class based on DatasetType.

203

204 This function maps dataset types to their corresponding type-specific

205 metadata schemas. Type-specific schemas (ImageMetadata, SpectrumMetadata, etc.)

206 provide stricter validation of fields appropriate for each data type.

207

208 Parameters

209 ----------

210 dataset_type : str

211 The value of the 'DatasetType' field. Must be one of: 'Image', 'Spectrum',

212 'SpectrumImage', 'Diffraction', 'Misc', or 'Unknown'.

213

214 Returns

215 -------

216 type[NexusMetadata]

217 The schema class to use for validation. Returns a type-specific schema

218 (ImageMetadata, SpectrumMetadata, etc.) for known dataset types, or the

219 base NexusMetadata schema for 'Misc' and 'Unknown' types.

220

221 Notes

222 -----

223 Schema mapping:

224 - 'Image' → ImageMetadata (SEM/TEM/STEM images)

225 - 'Spectrum' → SpectrumMetadata (EDS/EELS spectra)

226 - 'SpectrumImage' → SpectrumImageMetadata (hyperspectral data)

227 - 'Diffraction' → DiffractionMetadata (diffraction patterns)

228 - 'Misc' → NexusMetadata (base schema)

229 - 'Unknown' → NexusMetadata (base schema)

230 - Other values → NexusMetadata (fallback)

231

232 Examples

233 --------

234 >>> schema = get_schema_for_dataset_type("Image")

235 >>> schema.__name__

236 'ImageMetadata'

237

238 >>> schema = get_schema_for_dataset_type("Unknown")

239 >>> schema.__name__

240 'NexusMetadata'

241 """

242 schema_mapping = {

243 "Image": ImageMetadata,

244 "Spectrum": SpectrumMetadata,

245 "SpectrumImage": SpectrumImageMetadata,

246 "Diffraction": DiffractionMetadata,

247 "Misc": NexusMetadata,

248 "Unknown": NexusMetadata,

249 }

250

251 return schema_mapping.get(dataset_type, NexusMetadata)

252

253

254def validate_nx_meta(

255 metadata_dict: dict[str, Any], *, filename: Path | None = None

256) -> dict[str, Any]:

257 """

258 Validate the nx_meta structure against type-specific metadata schemas.

259

260 This function ensures that metadata returned by extractor plugins conforms

261 to the required structure defined in the type-specific metadata schemas

262 (ImageMetadata, SpectrumMetadata, etc.). The appropriate schema is selected

263 based on the 'DatasetType' field. Validation is performed strictly - any

264 schema violations will raise a ValidationError with detailed information

265 about the failure.

266

267 Parameters

268 ----------

269 metadata_dict : dict[str, Any]

270 Dictionary containing an 'nx_meta' key with the metadata to validate.

271 This is the format returned by all extractor plugins.

272 filename : :class:`~pathlib.Path` or None, optional

273 The file path being processed. Used only for error message context.

274 If None, error messages will not include file path information.

275

276 Returns

277 -------

278 dict[str, Any]

279 The original metadata_dict, unchanged. Validation does not modify data,

280 it only checks conformance to the schema.

281

282 Raises

283 ------

284 pydantic.ValidationError

285 If the nx_meta structure fails validation. The error message will include

286 detailed information about which fields are invalid and why.

287

288 Notes

289 -----

290 This function validates:

291

292 - **Required fields**: 'Creation Time', 'Data Type', 'DatasetType' must be present

293 - **ISO-8601 timestamps**: 'Creation Time' must be valid ISO-8601 with timezone

294 - **Controlled vocabularies**: 'DatasetType' must be one of the allowed values

295 - **Type-specific fields**: Fields appropriate for the dataset type (e.g.,

296 'acceleration_voltage' for Image, 'acquisition_time' for Spectrum)

297 - **Type constraints**: All fields must match their expected types

298 - **Pint Quantities**: Physical measurements must use Pint Quantity objects

299

300 The validation system uses type-specific schemas:

301 - Image → ImageMetadata (SEM/TEM/STEM imaging)

302 - Spectrum → SpectrumMetadata (EDS/EELS spectra)

303 - SpectrumImage → SpectrumImageMetadata (hyperspectral)

304 - Diffraction → DiffractionMetadata (TEM diffraction)

305 - Misc/Unknown → NexusMetadata (base schema)

306

307 All schemas support the 'extensions' section for instrument-specific

308 metadata that doesn't fit the core schema.

309

310 Examples

311 --------

312 Valid metadata passes without modification:

313

314 >>> metadata = {

315 ... "nx_meta": {

316 ... "Creation Time": "2024-01-15T10:30:00-05:00",

317 ... "Data Type": "STEM_Imaging",

318 ... "DatasetType": "Image",

319 ... }

320 ... }

321 >>> result = validate_nx_meta(metadata)

322 >>> result == metadata

323 True

324

325 Invalid metadata raises ValidationError:

326

327 >>> bad_metadata = {

328 ... "nx_meta": {

329 ... "Creation Time": "invalid-timestamp",

330 ... "Data Type": "STEM_Imaging",

331 ... "DatasetType": "Image",

332 ... }

333 ... }

334 >>> validate_nx_meta(bad_metadata) # doctest: +SKIP

335 Traceback (most recent call last):

336 ...

337 pydantic.ValidationError: ...

338

339 See Also

340 --------

341 nexusLIMS.schemas.metadata.NexusMetadata

342 The base Pydantic schema model for nx_meta validation

343 nexusLIMS.schemas.metadata.ImageMetadata

344 Schema for Image dataset types

345 nexusLIMS.schemas.metadata.SpectrumMetadata

346 Schema for Spectrum dataset types

347 get_schema_for_dataset_type

348 Helper function that selects the appropriate schema

349 parse_metadata

350 Main extraction function that uses this validator

351 """

352 nx_meta = metadata_dict["nx_meta"]

353

354 # Get dataset type and select appropriate schema

355 dataset_type = nx_meta.get("DatasetType", "Misc")

356 schema_class = get_schema_for_dataset_type(dataset_type)

357

358 try:

359 schema_class.model_validate(nx_meta)

360 except ValidationError as e:

361 # Enhance error message with file and dataset type context

362 if filename:

363 msg = f"Validation failed for {filename} ({dataset_type}): {e}"

364 else:

365 msg = f"Validation failed ({dataset_type}): {e}"

366 _logger.exception(msg)

367 raise

368

369 return metadata_dict

370

371

372def parse_metadata( # noqa: PLR0912, PLR0915

373 fname: Path,

374 *,

375 write_output: bool = True,

376 generate_preview: bool = True,

377 overwrite: bool = True,

378) -> Tuple[Dict[str, Any] | None, Path | list[Path] | None]:

379 """

380 Parse metadata from a file and optionaly generate a preview image.

381

382 Given an input filename, read the file, determine what "type" of file (i.e.

383 what instrument it came from) it is, filter the metadata (if necessary) to

384 what we are interested in, and return it as a dictionary (writing to the

385 NexusLIMS directory as JSON by default). Also calls the preview

386 generation method, if desired.

387

388 For files containing multiple signals (e.g., multi-signal DM3/DM4 files),

389 generates one preview per signal and returns a list of preview paths.

390

391 Parameters

392 ----------

393 fname

394 The filename from which to read data

395 write_output

396 Whether to write the metadata dictionary as a json file in the NexusLIMS

397 folder structure

398 generate_preview

399 Whether to generate the thumbnail preview of this dataset (that

400 operation is not done in this method, it is just called from here so

401 it can be done at the same time)

402 overwrite

403 Whether to overwrite the .json metadata file and thumbnail

404 image if either exists

405

406 Returns

407 -------

408 nx_meta : list[dict] or None

409 A list of metadata dicts, one per signal in the file. If None,

410 the file could not be opened. Single-signal files return a list

411 with one dict, multi-signal files return a list with multiple dicts.

412 preview_fname : list[Path] or None

413 A list of file paths for the generated preview images, one per signal.

414 For single-signal files, returns a list with one path. Returns `None`

415 if preview generation was not requested.

416 """

417 extension = fname.suffix[1:]

418

419 # Create extraction context

420 instrument = get_instr_from_filepath(fname)

421 context = ExtractionContext(file_path=fname, instrument=instrument)

422

423 # Get extractor from registry

424 registry = get_registry()

425 extractor = registry.get_extractor(context)

426

427 # Extract metadata using the selected extractor

428 # All extractors now return a list of dicts (one per signal)

429 nx_meta_list = extractor.extract(context)

430

431 # Create a pseudo-module for extraction details tracking

432 class ExtractorMethod:

433 """Pseudo-module for extraction details tracking."""

434

435 def __init__(self, extractor_name: str):

436 # Use the plugin module path for all extractors

437 self.__module__ = f"nexusLIMS.extractors.plugins.{extractor_name}"

438 self.__name__ = self.__module__

439

440 def __call__(self, f: Path) -> dict: # noqa: ARG002

441 return nx_meta_list # pragma: no cover

442

443 # Defensive check: extractors should always return a list but handle None gracefully

444 if nx_meta_list is None:

445 return None, None

446

447 extractor_method = ExtractorMethod(extractor.name)

448

449 # Handle preview generation logic if the extractor is

450 # the basic fallback and extension is not in unextracted_preview_map,

451 # don't generate a preview

452 if extractor.name == "basic_file_info_extractor":

453 if extension not in unextracted_preview_map:

454 generate_preview = False

455 _logger.info(

456 "No specialized extractor found for file extension; "

457 "setting generate_preview to False",

458 )

459 else:

460 generate_preview = True

461 _logger.info(

462 "No specialized extractor found for file extension; "

463 "but file extension was in unextracted_preview_map; "

464 "setting generate_preview to True",

465 )

466

467 # Add extraction details to metadata

468 nx_meta_list = [_add_extraction_details(m, extractor_method) for m in nx_meta_list]

469

470 signal_count = len(nx_meta_list)

471 preview_fnames = []

472

473 # Set the dataset type to Misc if it was not set by the file reader

474 for nx_meta in nx_meta_list:

475 if "DatasetType" not in nx_meta["nx_meta"]:

476 nx_meta["nx_meta"]["DatasetType"] = "Misc"

477 nx_meta["nx_meta"]["Data Type"] = "Miscellaneous"

478

479 # Validate each metadata dict against the schema (strict mode)

480 # This happens AFTER setting defaults to allow extractors to omit optional fields

481 for nx_meta in nx_meta_list:

482 validate_nx_meta(nx_meta, filename=fname)

483

484 # Write output for each signal (single and multi-signal files)

485 _can_write = write_output and _config_available()

486 if write_output and not _can_write:

487 _logger.warning(

488 "NexusLIMS config unavailable; skipping metadata file write "

489 "(pass write_output=False to suppress this warning)"

490 )

491

492 if _can_write:

493 for i, nx_meta in enumerate(nx_meta_list):

494 # For single-signal files, omit suffix for backward compatibility

495 if signal_count == 1:

496 out_fname = replace_instrument_data_path(fname, ".json")

497 else:

498 # For multi-signal files, append signal index to filename

499 base_path = replace_instrument_data_path(fname, "")

500 out_fname = Path(f"{base_path}_signal{i}.json")

501

502 if not out_fname.exists() or overwrite:

503 # Create the directory for the metadata file, if needed

504 out_fname.parent.mkdir(parents=True, exist_ok=True)

505 # Make sure that the nx_meta dict comes first in the json output

506 out_dict = {"nx_meta": nx_meta["nx_meta"]}

507 for k, v in nx_meta.items():

508 if k == "nx_meta":

509 pass

510 else:

511 out_dict[k] = v

512 with out_fname.open(mode="w", encoding="utf-8") as f:

513 _logger.debug("Dumping metadata to %s", out_fname)

514 json.dump(

515 out_dict,

516 f,

517 sort_keys=False,

518 indent=2,

519 cls=_CustomEncoder,

520 )

521

522 # Generate previews for each signal

523 _can_preview = generate_preview and _config_available()

524 if generate_preview and not _can_preview:

525 _logger.warning(

526 "NexusLIMS config unavailable; skipping preview generation "

527 "(pass generate_preview=False to suppress this warning)"

528 )

529

530 if _can_preview:

531 for i in range(signal_count):

532 # For single-signal files, omit suffix for backward compatibility

533 signal_idx = i if signal_count > 1 else None

534 preview = create_preview(

535 fname=fname,

536 overwrite=overwrite,

537 signal_index=signal_idx,

538 )

539 preview_fnames.append(preview)

540 else:

541 preview_fnames = [None] * signal_count

542

543 return nx_meta_list, preview_fnames

544

545

546def create_preview( # noqa: PLR0911, PLR0912, PLR0915

547 fname: Path, *, overwrite: bool, signal_index: int | None = None

548) -> Path | None:

549 """

550 Generate a preview image for a given file using the plugin system.

551

552 This method uses the preview generator plugin system to create thumbnail

553 previews. It first tries to find a suitable preview generator plugin, and

554 falls back to legacy methods if no plugin is found.

555

556 Parameters

557 ----------

558 fname

559 The filename from which to read data

560 overwrite

561 Whether to overwrite the .json metadata file and thumbnail

562 image if either exists

563 signal_index

564 For files with multiple signals, the index of the signal to preview.

565 If None, generates a single preview (legacy behavior). If an int,

566 generates preview with _signalN suffix in filename.

567

568 Returns

569 -------

570 preview_fname : Optional[pathlib.Path]

571 The filename of the generated preview image; if None, a preview could not be

572 successfully generated.

573 """

574 # Generate preview filename with signal index suffix if provided

575 if signal_index is None:

576 preview_fname = replace_instrument_data_path(fname, ".thumb.png")

577 else:

578 preview_fname = replace_instrument_data_path(

579 fname, f"_signal{signal_index}.thumb.png"

580 )

581

582 # Skip if preview exists and overwrite is False

583 if preview_fname.is_file() and not overwrite:

584 _logger.info("Preview already exists: %s", preview_fname)

585 return preview_fname

586

587 # Create context for preview generation

588 instrument = get_instr_from_filepath(fname)

589 context = ExtractionContext(

590 file_path=fname, instrument=instrument, signal_index=signal_index

591 )

592

593 # Try to get a preview generator from the registry

594 registry = get_registry()

595 generator = registry.get_preview_generator(context)

596

597 if generator:

598 # Use plugin-based preview generation

599 _logger.info("Generating preview using %s: %s", generator.name, preview_fname)

600 # Create the directory for the thumbnail, if needed

601 preview_fname.parent.mkdir(parents=True, exist_ok=True)

602

603 success = generator.generate(context, preview_fname)

604 if success:

605 return preview_fname

606

607 _logger.warning(

608 "Preview generator %s failed for %s",

609 generator.name,

610 fname,

611 )

612 # Fall through to legacy methods

613

614 # Legacy fallback for .tif files (special case with downsampling)

615 extension = fname.suffix[1:]

616 if extension == "tif":

617 _logger.info("Using legacy downsampling for .tif: %s", preview_fname)

618 preview_fname.parent.mkdir(parents=True, exist_ok=True)

619 factor = 2

620 down_sample_image(fname, out_path=preview_fname, factor=factor)

621 return preview_fname

622

623 # Legacy fallback for files in unextracted_preview_map

624 if extension in unextracted_preview_map:

625 _logger.info("Using legacy preview map for %s: %s", extension, preview_fname)

626 preview_fname.parent.mkdir(parents=True, exist_ok=True)

627 preview_return = unextracted_preview_map[extension](

628 f=fname,

629 out_path=preview_fname,

630 output_size=500,

631 )

632

633 # handle the case where PIL cannot open an image

634 if preview_return is False:

635 return None

636

637 return preview_fname

638

639 # Legacy fallback for HyperSpy-loadable files

640 _logger.info("Trying legacy HyperSpy preview generation: %s", preview_fname)

641 load_options = {"lazy": True}

642 if extension == "ser":

643 load_options["only_valid_data"] = True

644

645 # noinspection PyBroadException

646 try:

647 s = hs.load(fname, **load_options)

648 except Exception: # pylint: disable=broad-exception-caught

649 _logger.warning(

650 "Signal could not be loaded by HyperSpy. "

651 "Using placeholder image for preview.",

652 )

653 preview_fname.parent.mkdir(parents=True, exist_ok=True)

654 shutil.copyfile(PLACEHOLDER_PREVIEW, preview_fname)

655 return preview_fname

656

657 # If s is a list of signals, select the appropriate one

658 if isinstance(s, list):

659 num_sigs = len(s)

660 original_fname = s[0].metadata.General.original_filename

661 if signal_index is not None:

662 # Use specified signal index

663 s = s[signal_index]

664 s.metadata.General.title = (

665 s.metadata.General.title

666 + f" (signal {signal_index + 1} of "

667 + f'{num_sigs} in file "{original_fname}")'

668 )

669 else:

670 # Legacy: use first signal only

671 s = s[0]

672 s.metadata.General.title = (

673 s.metadata.General.title

674 + f' (1 of {num_sigs} total signals in file "{original_fname}")'

675 )

676 elif not s.metadata.General.title:

677 s.metadata.General.title = s.metadata.General.original_filename.replace(

678 extension,

679 "",

680 ).strip(".")

681

682 # Generate the preview

683 _logger.info("Generating HyperSpy preview: %s", preview_fname)

684 preview_fname.parent.mkdir(parents=True, exist_ok=True)

685 s.compute(show_progressbar=False)

686 try:

687 sig_to_thumbnail(s, out_path=preview_fname)

688 except Exception: # pylint: disable=broad-exception-caught

689 _logger.warning(

690 "Legacy HyperSpy preview generation failed for %s. "

691 "Using placeholder image for preview.",

692 fname,

693 )

694 shutil.copyfile(PLACEHOLDER_PREVIEW, preview_fname)

695

696 return preview_fname

697

698

699def flatten_dict(_dict, parent_key="", separator=" "): # noqa: ARG001

700 """

701 Flatten a nested dictionary into a single level.

702

703 Utility method to take a nested dictionary structure and flatten it into a

704 single level, separating the levels by a string as specified by

705 ``separator``.

706

707 Uses python-benedict for robust nested dictionary operations.

708

709 Parameters

710 ----------

711 _dict : dict

712 The dictionary to flatten

713 parent_key : str

714 The "root" key to add to the existing keys (unused in current implementation)

715 separator : str

716 The string to use to separate values in the flattened keys (i.e.

717 {'a': {'b': 'c'}} would become {'a' + sep + 'b': 'c'})

718

719 Returns

720 -------

721 flattened_dict : str

722 The dictionary with depth one, with nested dictionaries flattened

723 into root-level keys

724 """

725 # Disable keypath_separator to avoid conflicts with keys containing

726 # dots or other special chars

727 return benedict(_dict, keypath_separator=None).flatten(separator=separator)

728

729

730class _CustomEncoder(json.JSONEncoder):

731 """

732 Allow non-serializable types to be written in a JSON format.

733

734 A custom JSON Encoder class that will allow certain types to be serialized that are

735 not able to be by default (taken from https://stackoverflow.com/a/27050186).

736 """

737

738 def default(self, o): # noqa: PLR0911

739 if isinstance(o, np.integer):

740 return int(o)

741 if isinstance(o, np.floating):

742 return float(o)

743 if isinstance(o, np.ndarray):

744 return o.tolist()

745 if isinstance(o, np.bytes_):

746 return o.decode()

747 if isinstance(o, np.void):

748 # np.void array may contain arbitary binary, so base64 encode it

749 return base64.b64encode(o.tolist()).decode("utf-8")

750 # Handle Pint Quantity objects

751 if isinstance(o, ureg.Quantity):

752 return {"value": float(o.magnitude), "unit": str(o.units)}

753 # Handle Decimal objects (convert to float for JSON serialization)

754 if isinstance(o, Decimal):

755 return float(o)

756

757 return super().default(o)

Coverage for nexusLIMS/extractors/init.py: 100%

188 statements