Coverage for nexusLIMS/extractors/__init__.py: 100%

188 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1""" 

2Extract metadata from various electron microscopy file types. 

3 

4Extractors should return a list of dictionaries, where each dictionary contains 

5the extracted metadata under the key ``nx_meta``. The ``nx_meta`` structure is 

6validated against the :class:`~nexusLIMS.schemas.metadata.NexusMetadata` Pydantic 

7schema to ensure consistency across all extractors. 

8 

9Required Fields 

10--------------- 

11All extractors must include these fields in ``nx_meta``: 

12 

13* ``'Creation Time'`` - ISO-8601 timestamp string **with timezone** (e.g., 

14 ``"2024-01-15T10:30:00-05:00"`` or ``"2024-01-15T15:30:00Z"``) 

15* ``'Data Type'`` - Human-readable description using underscores (e.g., 

16 ``"STEM_Imaging"``, ``"TEM_EDS"``, ``"SEM_Imaging"``) 

17* ``'DatasetType'`` - Schema-defined category, must be one of: ``"Image"``, 

18 ``"Spectrum"``, ``"SpectrumImage"``, ``"Diffraction"``, ``"Misc"``, or ``"Unknown"`` 

19 

20Optional Fields 

21--------------- 

22Common optional fields include: 

23 

24* ``'Data Dimensions'`` - Dataset shape as string (e.g., ``"(1024, 1024)"``) 

25* ``'Instrument ID'`` - Instrument PID from database (e.g., ``"FEI-Titan-TEM-635816"``) 

26* ``'warnings'`` - List of warning messages or [message, context] pairs 

27 

28Additional instrument-specific fields are allowed beyond these standard fields. 

29 

30Schema Validation 

31----------------- 

32The ``nx_meta`` structure is validated using Pydantic strict mode. Validation occurs 

33after default values are set (e.g., missing ``DatasetType`` defaults to ``"Misc"``). 

34If validation fails, a ``pydantic.ValidationError`` is raised with detailed information 

35about which fields are invalid. 

36 

37For complete schema details, see :class:`~nexusLIMS.schemas.metadata.NexusMetadata`. 

38""" 

39 

40import base64 

41import inspect 

42import json 

43import logging 

44import shutil 

45from datetime import datetime as dt 

46from decimal import Decimal 

47from pathlib import Path 

48from typing import Any, Callable, Dict, Tuple 

49 

50import hyperspy.api as hs 

51import numpy as np 

52from benedict import benedict 

53from pydantic import ValidationError 

54 

55from nexusLIMS.extractors.base import ExtractionContext 

56from nexusLIMS.extractors.registry import get_registry 

57from nexusLIMS.instruments import get_instr_from_filepath 

58from nexusLIMS.schemas.metadata import ( 

59 DiffractionMetadata, 

60 ImageMetadata, 

61 NexusMetadata, 

62 SpectrumImageMetadata, 

63 SpectrumMetadata, 

64) 

65from nexusLIMS.schemas.units import ureg 

66from nexusLIMS.utils.paths import replace_instrument_data_path 

67from nexusLIMS.utils.time import current_system_tz 

68from nexusLIMS.version import __version__ 

69 

70from . import utils 

71from .plugins.preview_generators.hyperspy_preview import sig_to_thumbnail 

72from .plugins.preview_generators.image_preview import ( 

73 down_sample_image, 

74 image_to_square_thumbnail, 

75) 

76from .plugins.preview_generators.text_preview import text_to_thumbnail 

77 

78_logger = logging.getLogger(__name__) 

79 

80 

81def _config_available() -> bool: 

82 """Return True if NexusLIMS settings can be loaded without error.""" 

83 try: 

84 from nexusLIMS.config import settings # noqa: PLC0415 

85 

86 _ = settings.NX_DATA_PATH 

87 except Exception: 

88 return False 

89 else: 

90 return True 

91 

92 

93PLACEHOLDER_PREVIEW = Path(__file__).parent / "assets" / "extractor_error.png" 

94"""Path to placeholder preview image used when preview generation fails.""" 

95 

96__all__ = [ 

97 "PLACEHOLDER_PREVIEW", 

98 "_logger", 

99 "create_preview", 

100 "down_sample_image", 

101 "flatten_dict", 

102 "get_instr_from_filepath", 

103 "get_registry", 

104 "image_to_square_thumbnail", 

105 "parse_metadata", 

106 "sig_to_thumbnail", 

107 "text_to_thumbnail", 

108 "unextracted_preview_map", 

109 "utils", 

110 "validate_nx_meta", 

111] 

112 

113unextracted_preview_map = { 

114 "txt": text_to_thumbnail, 

115 "png": image_to_square_thumbnail, 

116 "tiff": image_to_square_thumbnail, 

117 "bmp": image_to_square_thumbnail, 

118 "gif": image_to_square_thumbnail, 

119 "jpg": image_to_square_thumbnail, 

120 "jpeg": image_to_square_thumbnail, 

121} 

122"""Filetypes that will only have basic metadata extracted but will nonetheless 

123have a custom preview image generated""" 

124 

125 

126def _add_extraction_details( 

127 nx_meta: Dict, 

128 extractor_module: Callable, 

129) -> Dict[str, str]: 

130 """ 

131 Add extraction details to the NexusLIMS metadata. 

132 

133 Adds metadata about the extraction process, given an extractor module 

134 to the ``nx_meta`` metadata dictionary under the ``'NexusLIMS Extraction'`` 

135 sub-key. The ``'Extractor Module'`` metadata key will contain the fully 

136 qualified path of a given extractor, e.g. 

137 ``nexusLIMS.extractors.basic_metadata``. 

138 

139 Note 

140 ---- 

141 If the ``'NexusLIMS Extraction'`` key already exists in the ``nx_meta`` 

142 metadata dictionary, this method *will* overwrite its value. 

143 

144 Parameters 

145 ---------- 

146 nx_meta 

147 The metadata dictionary as returend by :py:meth:`parse_metadata` 

148 extractor_module 

149 The (callable) module for a specific metadata extractor from the 

150 :py:mod:`~nexusLIMS.extractors` module. 

151 

152 Returns 

153 ------- 

154 dict 

155 An updated ``nx_meta`` dictionary, containing extraction details 

156 

157 """ 

158 # PHASE 1 MIGRATION: Handle both old-style functions and new-style extractors 

159 # Try to get the module name in different ways for backward compatibility 

160 module_name = None 

161 

162 # Try __module__ attribute first (works for new extractor system) 

163 if hasattr(extractor_module, "__module__"): 

164 module_name = extractor_module.__module__ 

165 

166 # Fallback to inspect.getmodule() for old-style functions 

167 if module_name is None: # pragma: no cover 

168 module = inspect.getmodule(extractor_module) # pragma: no cover 

169 # Last resort - use "unknown" 

170 module_name = ( # pragma: no cover 

171 module.__name__ if module is not None else "unknown" 

172 ) 

173 

174 # Build NexusLIMS Extraction details 

175 extraction_details = { 

176 "Date": dt.now(tz=current_system_tz()).isoformat(), 

177 "Module": module_name, 

178 "Version": __version__, 

179 } 

180 

181 # Move "Extractor Warnings" from nx_meta to extraction details if present 

182 # Check both nx_meta and extensions (some extractors migrate it to extensions) 

183 if "Extractor Warnings" in nx_meta["nx_meta"]: 

184 extraction_details["Extractor Warnings"] = nx_meta["nx_meta"].pop( 

185 "Extractor Warnings" 

186 ) 

187 elif ( 

188 "extensions" in nx_meta["nx_meta"] 

189 and "Extractor Warnings" in nx_meta["nx_meta"]["extensions"] 

190 ): 

191 extraction_details["Extractor Warnings"] = nx_meta["nx_meta"]["extensions"].pop( 

192 "Extractor Warnings" 

193 ) 

194 

195 nx_meta["nx_meta"]["NexusLIMS Extraction"] = extraction_details 

196 

197 return nx_meta 

198 

199 

200def get_schema_for_dataset_type(dataset_type: str) -> type[NexusMetadata]: 

201 """ 

202 Select the appropriate schema class based on DatasetType. 

203 

204 This function maps dataset types to their corresponding type-specific 

205 metadata schemas. Type-specific schemas (ImageMetadata, SpectrumMetadata, etc.) 

206 provide stricter validation of fields appropriate for each data type. 

207 

208 Parameters 

209 ---------- 

210 dataset_type : str 

211 The value of the 'DatasetType' field. Must be one of: 'Image', 'Spectrum', 

212 'SpectrumImage', 'Diffraction', 'Misc', or 'Unknown'. 

213 

214 Returns 

215 ------- 

216 type[NexusMetadata] 

217 The schema class to use for validation. Returns a type-specific schema 

218 (ImageMetadata, SpectrumMetadata, etc.) for known dataset types, or the 

219 base NexusMetadata schema for 'Misc' and 'Unknown' types. 

220 

221 Notes 

222 ----- 

223 Schema mapping: 

224 - 'Image' → ImageMetadata (SEM/TEM/STEM images) 

225 - 'Spectrum' → SpectrumMetadata (EDS/EELS spectra) 

226 - 'SpectrumImage' → SpectrumImageMetadata (hyperspectral data) 

227 - 'Diffraction' → DiffractionMetadata (diffraction patterns) 

228 - 'Misc' → NexusMetadata (base schema) 

229 - 'Unknown' → NexusMetadata (base schema) 

230 - Other values → NexusMetadata (fallback) 

231 

232 Examples 

233 -------- 

234 >>> schema = get_schema_for_dataset_type("Image") 

235 >>> schema.__name__ 

236 'ImageMetadata' 

237 

238 >>> schema = get_schema_for_dataset_type("Unknown") 

239 >>> schema.__name__ 

240 'NexusMetadata' 

241 """ 

242 schema_mapping = { 

243 "Image": ImageMetadata, 

244 "Spectrum": SpectrumMetadata, 

245 "SpectrumImage": SpectrumImageMetadata, 

246 "Diffraction": DiffractionMetadata, 

247 "Misc": NexusMetadata, 

248 "Unknown": NexusMetadata, 

249 } 

250 

251 return schema_mapping.get(dataset_type, NexusMetadata) 

252 

253 

254def validate_nx_meta( 

255 metadata_dict: dict[str, Any], *, filename: Path | None = None 

256) -> dict[str, Any]: 

257 """ 

258 Validate the nx_meta structure against type-specific metadata schemas. 

259 

260 This function ensures that metadata returned by extractor plugins conforms 

261 to the required structure defined in the type-specific metadata schemas 

262 (ImageMetadata, SpectrumMetadata, etc.). The appropriate schema is selected 

263 based on the 'DatasetType' field. Validation is performed strictly - any 

264 schema violations will raise a ValidationError with detailed information 

265 about the failure. 

266 

267 Parameters 

268 ---------- 

269 metadata_dict : dict[str, Any] 

270 Dictionary containing an 'nx_meta' key with the metadata to validate. 

271 This is the format returned by all extractor plugins. 

272 filename : :class:`~pathlib.Path` or None, optional 

273 The file path being processed. Used only for error message context. 

274 If None, error messages will not include file path information. 

275 

276 Returns 

277 ------- 

278 dict[str, Any] 

279 The original metadata_dict, unchanged. Validation does not modify data, 

280 it only checks conformance to the schema. 

281 

282 Raises 

283 ------ 

284 pydantic.ValidationError 

285 If the nx_meta structure fails validation. The error message will include 

286 detailed information about which fields are invalid and why. 

287 

288 Notes 

289 ----- 

290 This function validates: 

291 

292 - **Required fields**: 'Creation Time', 'Data Type', 'DatasetType' must be present 

293 - **ISO-8601 timestamps**: 'Creation Time' must be valid ISO-8601 with timezone 

294 - **Controlled vocabularies**: 'DatasetType' must be one of the allowed values 

295 - **Type-specific fields**: Fields appropriate for the dataset type (e.g., 

296 'acceleration_voltage' for Image, 'acquisition_time' for Spectrum) 

297 - **Type constraints**: All fields must match their expected types 

298 - **Pint Quantities**: Physical measurements must use Pint Quantity objects 

299 

300 The validation system uses type-specific schemas: 

301 - Image → ImageMetadata (SEM/TEM/STEM imaging) 

302 - Spectrum → SpectrumMetadata (EDS/EELS spectra) 

303 - SpectrumImage → SpectrumImageMetadata (hyperspectral) 

304 - Diffraction → DiffractionMetadata (TEM diffraction) 

305 - Misc/Unknown → NexusMetadata (base schema) 

306 

307 All schemas support the 'extensions' section for instrument-specific 

308 metadata that doesn't fit the core schema. 

309 

310 Examples 

311 -------- 

312 Valid metadata passes without modification: 

313 

314 >>> metadata = { 

315 ... "nx_meta": { 

316 ... "Creation Time": "2024-01-15T10:30:00-05:00", 

317 ... "Data Type": "STEM_Imaging", 

318 ... "DatasetType": "Image", 

319 ... } 

320 ... } 

321 >>> result = validate_nx_meta(metadata) 

322 >>> result == metadata 

323 True 

324 

325 Invalid metadata raises ValidationError: 

326 

327 >>> bad_metadata = { 

328 ... "nx_meta": { 

329 ... "Creation Time": "invalid-timestamp", 

330 ... "Data Type": "STEM_Imaging", 

331 ... "DatasetType": "Image", 

332 ... } 

333 ... } 

334 >>> validate_nx_meta(bad_metadata) # doctest: +SKIP 

335 Traceback (most recent call last): 

336 ... 

337 pydantic.ValidationError: ... 

338 

339 See Also 

340 -------- 

341 nexusLIMS.schemas.metadata.NexusMetadata 

342 The base Pydantic schema model for nx_meta validation 

343 nexusLIMS.schemas.metadata.ImageMetadata 

344 Schema for Image dataset types 

345 nexusLIMS.schemas.metadata.SpectrumMetadata 

346 Schema for Spectrum dataset types 

347 get_schema_for_dataset_type 

348 Helper function that selects the appropriate schema 

349 parse_metadata 

350 Main extraction function that uses this validator 

351 """ 

352 nx_meta = metadata_dict["nx_meta"] 

353 

354 # Get dataset type and select appropriate schema 

355 dataset_type = nx_meta.get("DatasetType", "Misc") 

356 schema_class = get_schema_for_dataset_type(dataset_type) 

357 

358 try: 

359 schema_class.model_validate(nx_meta) 

360 except ValidationError as e: 

361 # Enhance error message with file and dataset type context 

362 if filename: 

363 msg = f"Validation failed for {filename} ({dataset_type}): {e}" 

364 else: 

365 msg = f"Validation failed ({dataset_type}): {e}" 

366 _logger.exception(msg) 

367 raise 

368 

369 return metadata_dict 

370 

371 

372def parse_metadata( # noqa: PLR0912, PLR0915 

373 fname: Path, 

374 *, 

375 write_output: bool = True, 

376 generate_preview: bool = True, 

377 overwrite: bool = True, 

378) -> Tuple[Dict[str, Any] | None, Path | list[Path] | None]: 

379 """ 

380 Parse metadata from a file and optionaly generate a preview image. 

381 

382 Given an input filename, read the file, determine what "type" of file (i.e. 

383 what instrument it came from) it is, filter the metadata (if necessary) to 

384 what we are interested in, and return it as a dictionary (writing to the 

385 NexusLIMS directory as JSON by default). Also calls the preview 

386 generation method, if desired. 

387 

388 For files containing multiple signals (e.g., multi-signal DM3/DM4 files), 

389 generates one preview per signal and returns a list of preview paths. 

390 

391 Parameters 

392 ---------- 

393 fname 

394 The filename from which to read data 

395 write_output 

396 Whether to write the metadata dictionary as a json file in the NexusLIMS 

397 folder structure 

398 generate_preview 

399 Whether to generate the thumbnail preview of this dataset (that 

400 operation is not done in this method, it is just called from here so 

401 it can be done at the same time) 

402 overwrite 

403 Whether to overwrite the .json metadata file and thumbnail 

404 image if either exists 

405 

406 Returns 

407 ------- 

408 nx_meta : list[dict] or None 

409 A list of metadata dicts, one per signal in the file. If None, 

410 the file could not be opened. Single-signal files return a list 

411 with one dict, multi-signal files return a list with multiple dicts. 

412 preview_fname : list[Path] or None 

413 A list of file paths for the generated preview images, one per signal. 

414 For single-signal files, returns a list with one path. Returns `None` 

415 if preview generation was not requested. 

416 """ 

417 extension = fname.suffix[1:] 

418 

419 # Create extraction context 

420 instrument = get_instr_from_filepath(fname) 

421 context = ExtractionContext(file_path=fname, instrument=instrument) 

422 

423 # Get extractor from registry 

424 registry = get_registry() 

425 extractor = registry.get_extractor(context) 

426 

427 # Extract metadata using the selected extractor 

428 # All extractors now return a list of dicts (one per signal) 

429 nx_meta_list = extractor.extract(context) 

430 

431 # Create a pseudo-module for extraction details tracking 

432 class ExtractorMethod: 

433 """Pseudo-module for extraction details tracking.""" 

434 

435 def __init__(self, extractor_name: str): 

436 # Use the plugin module path for all extractors 

437 self.__module__ = f"nexusLIMS.extractors.plugins.{extractor_name}" 

438 self.__name__ = self.__module__ 

439 

440 def __call__(self, f: Path) -> dict: # noqa: ARG002 

441 return nx_meta_list # pragma: no cover 

442 

443 # Defensive check: extractors should always return a list but handle None gracefully 

444 if nx_meta_list is None: 

445 return None, None 

446 

447 extractor_method = ExtractorMethod(extractor.name) 

448 

449 # Handle preview generation logic if the extractor is 

450 # the basic fallback and extension is not in unextracted_preview_map, 

451 # don't generate a preview 

452 if extractor.name == "basic_file_info_extractor": 

453 if extension not in unextracted_preview_map: 

454 generate_preview = False 

455 _logger.info( 

456 "No specialized extractor found for file extension; " 

457 "setting generate_preview to False", 

458 ) 

459 else: 

460 generate_preview = True 

461 _logger.info( 

462 "No specialized extractor found for file extension; " 

463 "but file extension was in unextracted_preview_map; " 

464 "setting generate_preview to True", 

465 ) 

466 

467 # Add extraction details to metadata 

468 nx_meta_list = [_add_extraction_details(m, extractor_method) for m in nx_meta_list] 

469 

470 signal_count = len(nx_meta_list) 

471 preview_fnames = [] 

472 

473 # Set the dataset type to Misc if it was not set by the file reader 

474 for nx_meta in nx_meta_list: 

475 if "DatasetType" not in nx_meta["nx_meta"]: 

476 nx_meta["nx_meta"]["DatasetType"] = "Misc" 

477 nx_meta["nx_meta"]["Data Type"] = "Miscellaneous" 

478 

479 # Validate each metadata dict against the schema (strict mode) 

480 # This happens AFTER setting defaults to allow extractors to omit optional fields 

481 for nx_meta in nx_meta_list: 

482 validate_nx_meta(nx_meta, filename=fname) 

483 

484 # Write output for each signal (single and multi-signal files) 

485 _can_write = write_output and _config_available() 

486 if write_output and not _can_write: 

487 _logger.warning( 

488 "NexusLIMS config unavailable; skipping metadata file write " 

489 "(pass write_output=False to suppress this warning)" 

490 ) 

491 

492 if _can_write: 

493 for i, nx_meta in enumerate(nx_meta_list): 

494 # For single-signal files, omit suffix for backward compatibility 

495 if signal_count == 1: 

496 out_fname = replace_instrument_data_path(fname, ".json") 

497 else: 

498 # For multi-signal files, append signal index to filename 

499 base_path = replace_instrument_data_path(fname, "") 

500 out_fname = Path(f"{base_path}_signal{i}.json") 

501 

502 if not out_fname.exists() or overwrite: 

503 # Create the directory for the metadata file, if needed 

504 out_fname.parent.mkdir(parents=True, exist_ok=True) 

505 # Make sure that the nx_meta dict comes first in the json output 

506 out_dict = {"nx_meta": nx_meta["nx_meta"]} 

507 for k, v in nx_meta.items(): 

508 if k == "nx_meta": 

509 pass 

510 else: 

511 out_dict[k] = v 

512 with out_fname.open(mode="w", encoding="utf-8") as f: 

513 _logger.debug("Dumping metadata to %s", out_fname) 

514 json.dump( 

515 out_dict, 

516 f, 

517 sort_keys=False, 

518 indent=2, 

519 cls=_CustomEncoder, 

520 ) 

521 

522 # Generate previews for each signal 

523 _can_preview = generate_preview and _config_available() 

524 if generate_preview and not _can_preview: 

525 _logger.warning( 

526 "NexusLIMS config unavailable; skipping preview generation " 

527 "(pass generate_preview=False to suppress this warning)" 

528 ) 

529 

530 if _can_preview: 

531 for i in range(signal_count): 

532 # For single-signal files, omit suffix for backward compatibility 

533 signal_idx = i if signal_count > 1 else None 

534 preview = create_preview( 

535 fname=fname, 

536 overwrite=overwrite, 

537 signal_index=signal_idx, 

538 ) 

539 preview_fnames.append(preview) 

540 else: 

541 preview_fnames = [None] * signal_count 

542 

543 return nx_meta_list, preview_fnames 

544 

545 

546def create_preview( # noqa: PLR0911, PLR0912, PLR0915 

547 fname: Path, *, overwrite: bool, signal_index: int | None = None 

548) -> Path | None: 

549 """ 

550 Generate a preview image for a given file using the plugin system. 

551 

552 This method uses the preview generator plugin system to create thumbnail 

553 previews. It first tries to find a suitable preview generator plugin, and 

554 falls back to legacy methods if no plugin is found. 

555 

556 Parameters 

557 ---------- 

558 fname 

559 The filename from which to read data 

560 overwrite 

561 Whether to overwrite the .json metadata file and thumbnail 

562 image if either exists 

563 signal_index 

564 For files with multiple signals, the index of the signal to preview. 

565 If None, generates a single preview (legacy behavior). If an int, 

566 generates preview with _signalN suffix in filename. 

567 

568 Returns 

569 ------- 

570 preview_fname : Optional[pathlib.Path] 

571 The filename of the generated preview image; if None, a preview could not be 

572 successfully generated. 

573 """ 

574 # Generate preview filename with signal index suffix if provided 

575 if signal_index is None: 

576 preview_fname = replace_instrument_data_path(fname, ".thumb.png") 

577 else: 

578 preview_fname = replace_instrument_data_path( 

579 fname, f"_signal{signal_index}.thumb.png" 

580 ) 

581 

582 # Skip if preview exists and overwrite is False 

583 if preview_fname.is_file() and not overwrite: 

584 _logger.info("Preview already exists: %s", preview_fname) 

585 return preview_fname 

586 

587 # Create context for preview generation 

588 instrument = get_instr_from_filepath(fname) 

589 context = ExtractionContext( 

590 file_path=fname, instrument=instrument, signal_index=signal_index 

591 ) 

592 

593 # Try to get a preview generator from the registry 

594 registry = get_registry() 

595 generator = registry.get_preview_generator(context) 

596 

597 if generator: 

598 # Use plugin-based preview generation 

599 _logger.info("Generating preview using %s: %s", generator.name, preview_fname) 

600 # Create the directory for the thumbnail, if needed 

601 preview_fname.parent.mkdir(parents=True, exist_ok=True) 

602 

603 success = generator.generate(context, preview_fname) 

604 if success: 

605 return preview_fname 

606 

607 _logger.warning( 

608 "Preview generator %s failed for %s", 

609 generator.name, 

610 fname, 

611 ) 

612 # Fall through to legacy methods 

613 

614 # Legacy fallback for .tif files (special case with downsampling) 

615 extension = fname.suffix[1:] 

616 if extension == "tif": 

617 _logger.info("Using legacy downsampling for .tif: %s", preview_fname) 

618 preview_fname.parent.mkdir(parents=True, exist_ok=True) 

619 factor = 2 

620 down_sample_image(fname, out_path=preview_fname, factor=factor) 

621 return preview_fname 

622 

623 # Legacy fallback for files in unextracted_preview_map 

624 if extension in unextracted_preview_map: 

625 _logger.info("Using legacy preview map for %s: %s", extension, preview_fname) 

626 preview_fname.parent.mkdir(parents=True, exist_ok=True) 

627 preview_return = unextracted_preview_map[extension]( 

628 f=fname, 

629 out_path=preview_fname, 

630 output_size=500, 

631 ) 

632 

633 # handle the case where PIL cannot open an image 

634 if preview_return is False: 

635 return None 

636 

637 return preview_fname 

638 

639 # Legacy fallback for HyperSpy-loadable files 

640 _logger.info("Trying legacy HyperSpy preview generation: %s", preview_fname) 

641 load_options = {"lazy": True} 

642 if extension == "ser": 

643 load_options["only_valid_data"] = True 

644 

645 # noinspection PyBroadException 

646 try: 

647 s = hs.load(fname, **load_options) 

648 except Exception: # pylint: disable=broad-exception-caught 

649 _logger.warning( 

650 "Signal could not be loaded by HyperSpy. " 

651 "Using placeholder image for preview.", 

652 ) 

653 preview_fname.parent.mkdir(parents=True, exist_ok=True) 

654 shutil.copyfile(PLACEHOLDER_PREVIEW, preview_fname) 

655 return preview_fname 

656 

657 # If s is a list of signals, select the appropriate one 

658 if isinstance(s, list): 

659 num_sigs = len(s) 

660 original_fname = s[0].metadata.General.original_filename 

661 if signal_index is not None: 

662 # Use specified signal index 

663 s = s[signal_index] 

664 s.metadata.General.title = ( 

665 s.metadata.General.title 

666 + f" (signal {signal_index + 1} of " 

667 + f'{num_sigs} in file "{original_fname}")' 

668 ) 

669 else: 

670 # Legacy: use first signal only 

671 s = s[0] 

672 s.metadata.General.title = ( 

673 s.metadata.General.title 

674 + f' (1 of {num_sigs} total signals in file "{original_fname}")' 

675 ) 

676 elif not s.metadata.General.title: 

677 s.metadata.General.title = s.metadata.General.original_filename.replace( 

678 extension, 

679 "", 

680 ).strip(".") 

681 

682 # Generate the preview 

683 _logger.info("Generating HyperSpy preview: %s", preview_fname) 

684 preview_fname.parent.mkdir(parents=True, exist_ok=True) 

685 s.compute(show_progressbar=False) 

686 try: 

687 sig_to_thumbnail(s, out_path=preview_fname) 

688 except Exception: # pylint: disable=broad-exception-caught 

689 _logger.warning( 

690 "Legacy HyperSpy preview generation failed for %s. " 

691 "Using placeholder image for preview.", 

692 fname, 

693 ) 

694 shutil.copyfile(PLACEHOLDER_PREVIEW, preview_fname) 

695 

696 return preview_fname 

697 

698 

699def flatten_dict(_dict, parent_key="", separator=" "): # noqa: ARG001 

700 """ 

701 Flatten a nested dictionary into a single level. 

702 

703 Utility method to take a nested dictionary structure and flatten it into a 

704 single level, separating the levels by a string as specified by 

705 ``separator``. 

706 

707 Uses python-benedict for robust nested dictionary operations. 

708 

709 Parameters 

710 ---------- 

711 _dict : dict 

712 The dictionary to flatten 

713 parent_key : str 

714 The "root" key to add to the existing keys (unused in current implementation) 

715 separator : str 

716 The string to use to separate values in the flattened keys (i.e. 

717 {'a': {'b': 'c'}} would become {'a' + sep + 'b': 'c'}) 

718 

719 Returns 

720 ------- 

721 flattened_dict : str 

722 The dictionary with depth one, with nested dictionaries flattened 

723 into root-level keys 

724 """ 

725 # Disable keypath_separator to avoid conflicts with keys containing 

726 # dots or other special chars 

727 return benedict(_dict, keypath_separator=None).flatten(separator=separator) 

728 

729 

730class _CustomEncoder(json.JSONEncoder): 

731 """ 

732 Allow non-serializable types to be written in a JSON format. 

733 

734 A custom JSON Encoder class that will allow certain types to be serialized that are 

735 not able to be by default (taken from https://stackoverflow.com/a/27050186). 

736 """ 

737 

738 def default(self, o): # noqa: PLR0911 

739 if isinstance(o, np.integer): 

740 return int(o) 

741 if isinstance(o, np.floating): 

742 return float(o) 

743 if isinstance(o, np.ndarray): 

744 return o.tolist() 

745 if isinstance(o, np.bytes_): 

746 return o.decode() 

747 if isinstance(o, np.void): 

748 # np.void array may contain arbitary binary, so base64 encode it 

749 return base64.b64encode(o.tolist()).decode("utf-8") 

750 # Handle Pint Quantity objects 

751 if isinstance(o, ureg.Quantity): 

752 return {"value": float(o.magnitude), "unit": str(o.units)} 

753 # Handle Decimal objects (convert to float for JSON serialization) 

754 if isinstance(o, Decimal): 

755 return float(o) 

756 

757 return super().default(o)