Coverage for nexusLIMS/extractors/plugins/fei_emi.py: 100%

276 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1"""FEI TIA (.ser/.emi) extractor plugin.""" 

2 

3import contextlib 

4import logging 

5from datetime import datetime as dt 

6from pathlib import Path 

7from typing import Any, ClassVar, List, Tuple 

8 

9import numpy as np 

10from hyperspy.io import load as hs_load 

11from hyperspy.signal import BaseSignal 

12 

13from nexusLIMS.db.models import Instrument 

14from nexusLIMS.extractors.base import ExtractionContext 

15from nexusLIMS.extractors.utils import add_to_extensions 

16from nexusLIMS.instruments import get_instr_from_filepath 

17from nexusLIMS.schemas.units import ureg 

18from nexusLIMS.utils.dicts import ( 

19 set_nested_dict_value, 

20 sort_dict, 

21 try_getting_dict_value, 

22) 

23from nexusLIMS.utils.time import current_system_tz 

24 

25_logger = logging.getLogger(__name__) 

26 

27 

28class SerEmiExtractor: 

29 """ 

30 Extractor for FEI TIA series files (.ser with accompanying .emi). 

31 

32 This extractor handles metadata extraction from files saved by FEI's 

33 (now Thermo Fisher Scientific) TIA (Tecnai Imaging and Analysis) software. 

34 The .ser files contain the actual data, while .emi files contain metadata. 

35 """ 

36 

37 name = "ser_emi_extractor" 

38 priority = 100 

39 supported_extensions: ClassVar = {"ser"} 

40 

41 def supports(self, context: ExtractionContext) -> bool: 

42 """ 

43 Check if this extractor supports the given file. 

44 

45 Parameters 

46 ---------- 

47 context 

48 The extraction context containing file information 

49 

50 Returns 

51 ------- 

52 bool 

53 True if file extension is .ser 

54 """ 

55 extension = context.file_path.suffix.lower().lstrip(".") 

56 return extension == "ser" 

57 

58 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: # noqa: PLR0915 

59 """ 

60 Extract metadata from a .ser file and its accompanying .emi file. 

61 

62 Returns metadata (as a list of dicts) from an FEI .ser file + 

63 its associated .emi files, with some non-relevant information stripped. 

64 

65 Parameters 

66 ---------- 

67 context 

68 The extraction context containing file information 

69 

70 Returns 

71 ------- 

72 list[dict] 

73 List containing a single metadata dict with 'nx_meta' key. 

74 If files cannot be opened, at least basic metadata will be returned ( 

75 creation time, etc.) 

76 """ 

77 filename = context.file_path 

78 _logger.debug("Extracting metadata from SER/EMI file: %s", filename) 

79 

80 # ObjectInfo present in emi; ser_header_parameters present in .ser 

81 # ObjectInfo should contain all the interesting metadata, 

82 # while ser_header_parameters is mostly technical stuff not really of 

83 # interest to anyone 

84 warning, emi_filename, ser_error = None, None, False 

85 

86 # pylint: disable=broad-exception-caught 

87 try: 

88 emi_filename, ser_index = get_emi_from_ser(filename) 

89 s, emi_loaded = _load_ser(emi_filename, ser_index) 

90 

91 except FileNotFoundError: 

92 # if emi wasn't found, specifically mention that 

93 warning = ( 

94 "NexusLIMS could not find a corresponding .emi metadata " 

95 "file for this .ser file. Metadata extraction will be " 

96 "limited." 

97 ) 

98 _logger.warning(warning) 

99 emi_loaded = False 

100 emi_filename = None 

101 

102 except Exception: 

103 # otherwise, HyperSpy could not load the .emi, so give generic warning 

104 # that .emi could not be loaded for some reason: 

105 warning = ( 

106 "The .emi metadata file associated with this " 

107 ".ser file could not be opened by NexusLIMS. " 

108 "Metadata extraction will be limited." 

109 ) 

110 _logger.warning(warning) 

111 emi_loaded = False 

112 

113 if not emi_loaded: 

114 # pylint: disable=broad-exception-caught 

115 

116 # if we couldn't load the emi, lets at least open the .ser to pull 

117 # out the ser_header_info 

118 try: 

119 s = hs_load(filename, only_valid_data=True, lazy=True) 

120 except Exception: 

121 warning = ( 

122 "The .ser file could not be opened (perhaps file is " 

123 "corrupted?); Metadata extraction is not possible." 

124 ) 

125 _logger.warning(warning) 

126 # set s to an empty signal just so we can process some basic 

127 # metadata using same syntax as if we had read it correctly 

128 s = BaseSignal(np.zeros(1)) 

129 ser_error = True 

130 

131 metadata = s.original_metadata.as_dictionary() 

132 metadata["nx_meta"] = {} 

133 

134 # if we've already encountered a warning, add that to the metadata, 

135 if warning: 

136 metadata["nx_meta"]["Extractor Warning"] = warning 

137 # otherwise check to ensure we actually have some metadata read from .emi 

138 elif "ObjectInfo" not in metadata or ( 

139 "ExperimentalConditions" not in metadata["ObjectInfo"] 

140 and "ExperimentalDescription" not in metadata["ObjectInfo"] 

141 ): 

142 warning = ( 

143 "No experimental metadata was found in the " 

144 "corresponding .emi file for this .ser. " 

145 "Metadata extraction will be limited." 

146 ) 

147 _logger.warning(warning) 

148 metadata["nx_meta"]["Extractor Warning"] = warning 

149 

150 # if we successfully found the .emi file, add it to the metadata 

151 if emi_filename: 

152 try: 

153 from nexusLIMS.config import settings # noqa: PLC0415 

154 

155 rel_emi_fname = str(emi_filename).replace( 

156 str(settings.NX_INSTRUMENT_DATA_PATH) + "/", "" 

157 ) 

158 except Exception: 

159 rel_emi_fname = str(emi_filename) 

160 metadata["nx_meta"]["emi Filename"] = rel_emi_fname 

161 else: 

162 metadata["nx_meta"]["emi Filename"] = None 

163 

164 # Get the instrument object associated with this file 

165 instr = get_instr_from_filepath(filename) 

166 

167 # if we found the instrument, then store the name as string, else None 

168 instr_name = instr.name if instr is not None else None 

169 metadata["nx_meta"]["fname"] = filename 

170 # get the modification time: 

171 # Use instrument timezone if available, otherwise fall back to system timezone 

172 mtime_naive_dt = dt.fromtimestamp(filename.stat().st_mtime) # noqa: DTZ006 

173 tz = instr.timezone if instr is not None else None 

174 tz = tz if tz is not None else current_system_tz() 

175 mtime_aware_dt = tz.localize(mtime_naive_dt) 

176 metadata["nx_meta"]["Creation Time"] = mtime_aware_dt.isoformat() 

177 metadata["nx_meta"]["Instrument ID"] = instr_name 

178 

179 # we could not read the signal, so add some basic metadata and return 

180 if ser_error: 

181 metadata = _handle_ser_error_metadata(metadata) 

182 # Migrate to schema-compliant format (move vendor meta to extensions) 

183 metadata = self._migrate_to_schema_compliant_metadata(metadata) 

184 return [metadata] 

185 

186 metadata = parse_basic_info(metadata, s.data.shape, instr) 

187 metadata = parse_acquire_info(metadata) 

188 metadata = parse_experimental_conditions(metadata) 

189 metadata = parse_experimental_description(metadata) 

190 

191 ( 

192 metadata["nx_meta"]["Data Type"], 

193 metadata["nx_meta"]["DatasetType"], 

194 ) = parse_data_type(s, metadata) 

195 

196 # we don't need to save the filename, it's just for internal processing 

197 del metadata["nx_meta"]["fname"] 

198 

199 # Migrate metadata to schema-compliant format 

200 metadata = self._migrate_to_schema_compliant_metadata(metadata) 

201 

202 # sort the nx_meta dictionary (recursively) for nicer display 

203 metadata["nx_meta"] = sort_dict(metadata["nx_meta"]) 

204 

205 return [metadata] 

206 

207 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: 

208 """ 

209 Migrate metadata to schema-compliant format. 

210 

211 Reorganizes metadata to conform to type-specific Pydantic schemas: 

212 - Extracts core EM Glossary fields to top level with standardized names 

213 - Moves vendor-specific nested dictionaries to extensions section 

214 - Preserves existing extensions from instrument profiles 

215 

216 Parameters 

217 ---------- 

218 mdict 

219 Metadata dictionary with nx_meta containing extracted fields 

220 

221 Returns 

222 ------- 

223 dict 

224 Metadata dictionary with schema-compliant nx_meta structure 

225 """ 

226 nx_meta = mdict.get("nx_meta", {}) 

227 dataset_type = nx_meta.get("DatasetType", "Image") 

228 

229 # Preserve existing extensions from instrument profiles 

230 extensions = ( 

231 nx_meta.get("extensions", {}).copy() if "extensions" in nx_meta else {} 

232 ) 

233 

234 # Field mappings from display names to EM Glossary names 

235 field_mappings = { 

236 "AccelerationVoltage": "acceleration_voltage", 

237 "Convergence Angle": "convergence_angle", 

238 "Acquisition Device": "acquisition_device", 

239 } 

240 

241 # Camera Length is only core for Diffraction datasets 

242 if dataset_type == "Diffraction": 

243 field_mappings["Camera Length"] = "camera_length" 

244 

245 # FEI TIA-specific top-level sections that go to extensions 

246 extension_top_level_keys = { 

247 "ObjectInfo", # Main FEI metadata section 

248 "ser_header_parameters", # SER file header 

249 } 

250 

251 # Individual vendor-specific fields to move to extensions 

252 extension_field_names = { 

253 "emi Filename", 

254 "Extractor Warning", 

255 # Any other FEI-specific fields 

256 } 

257 

258 # Build new nx_meta with proper field organization 

259 new_nx_meta = {} 

260 

261 # Copy required fields 

262 for field in ["DatasetType", "Data Type", "Creation Time", "Data Dimensions"]: 

263 if field in nx_meta: 

264 new_nx_meta[field] = nx_meta[field] 

265 

266 # Copy instrument identification 

267 if "Instrument ID" in nx_meta: 

268 new_nx_meta["Instrument ID"] = nx_meta["Instrument ID"] 

269 

270 # Process all fields and categorize 

271 for old_name, value in nx_meta.items(): 

272 # Skip fields we've already handled 

273 if old_name in [ 

274 "DatasetType", 

275 "Data Type", 

276 "Creation Time", 

277 "Data Dimensions", 

278 "Instrument ID", 

279 "Extractor Warnings", 

280 "warnings", 

281 "extensions", 

282 ]: 

283 continue 

284 

285 # Top-level vendor sections go to extensions 

286 if old_name in extension_top_level_keys: 

287 extensions[old_name] = value 

288 continue 

289 

290 # Check if this is a core field that needs renaming 

291 if old_name in field_mappings: 

292 emg_name = field_mappings[old_name] 

293 new_nx_meta[emg_name] = value 

294 continue 

295 

296 # Vendor-specific individual fields go to extensions 

297 if old_name in extension_field_names: 

298 extensions[old_name] = value 

299 continue 

300 

301 # Everything else goes to extensions (FEI-specific fields) 

302 # This is safer since most FEI fields are vendor-specific 

303 extensions[old_name] = value 

304 

305 # Copy warnings if present 

306 if "warnings" in nx_meta: 

307 new_nx_meta["warnings"] = nx_meta["warnings"] 

308 

309 # Add extensions section if we have any 

310 for key, value in extensions.items(): 

311 add_to_extensions(new_nx_meta, key, value) 

312 

313 mdict["nx_meta"] = new_nx_meta 

314 return mdict 

315 

316 

317def _handle_ser_error_metadata(metadata): 

318 """Handle metadata when .ser file cannot be read.""" 

319 metadata["nx_meta"]["DatasetType"] = "Misc" 

320 metadata["nx_meta"]["Data Type"] = "Unknown" 

321 metadata["nx_meta"]["warnings"] = [] 

322 # sort the nx_meta dictionary (recursively) for nicer display 

323 metadata["nx_meta"] = sort_dict(metadata["nx_meta"]) 

324 del metadata["nx_meta"]["fname"] 

325 return metadata 

326 

327 

328def _load_ser(emi_filename: Path, ser_index: int): 

329 """ 

330 Load an data file given the .emi filename and an index of which signal to use. 

331 

332 Parameters 

333 ---------- 

334 emi_filename 

335 The path to an .emi file 

336 ser_index 

337 Which .ser file to load data from, given the .emi file above 

338 

339 Returns 

340 ------- 

341 hyperspy.signal.BaseSignal 

342 The signal loaded by HyperSpy 

343 bool 

344 Whether the emi file was successfully loaded (should be true if no Exceptions) 

345 """ 

346 # approach here is for every .ser we want to examine, load the 

347 # metadata from the corresponding .emi file. If multiple .ser files 

348 # are related to this emi, HyperSpy returns a list, so we select out 

349 # the right signal from that list if that's what is returned 

350 

351 # make sure to load with "only_valid_data" so data shape is correct 

352 # loading the emi with HS will try loading the .ser too, so this will 

353 # fail if there's an issue with the .ser file 

354 emi_s = hs_load(emi_filename, lazy=True, only_valid_data=True) 

355 

356 # if there is more than one dataset, emi_s will be a list, so pick 

357 # out the matching signal from the list, which will be the "index" 

358 # from the filename minus 1: 

359 # if there is more than one dataset, emi_s will be a list, so pick 

360 # out the matching signal, otherwise use the signal as-is 

361 s = emi_s[ser_index - 1] if isinstance(emi_s, list) else emi_s 

362 

363 return s, True 

364 

365 

366def parse_basic_info(metadata, shape, instrument: Instrument): 

367 """ 

368 Parse basic metadata from file. 

369 

370 Parse the metadata that is saved at specific places within 

371 the .emi tag structure into a consistent place in the metadata dictionary 

372 returned by :py:meth:`get_ser_metadata`. Specifically, this method handles 

373 the creation date, equipment manufacturer, and data shape/type. 

374 

375 Parameters 

376 ---------- 

377 metadata : dict 

378 A metadata dictionary as returned by :py:meth:`get_ser_metadata` 

379 shape 

380 The shape of the dataset 

381 instrument : Instrument 

382 The instrument this file was collected on 

383 

384 Returns 

385 ------- 

386 metadata : dict 

387 The same metadata dictionary with some values added under the 

388 root-level ``nx_meta`` key 

389 """ 

390 # try to set creation time to acquisition time from metadata 

391 acq_time = try_getting_dict_value(metadata, ["ObjectInfo", "AcquireDate"]) 

392 if acq_time is not None: 

393 # Use instrument timezone if available, otherwise fall back to system timezone 

394 tz = instrument.timezone if instrument else current_system_tz() 

395 naive_dt = dt.strptime(acq_time, "%a %b %d %H:%M:%S %Y") # noqa: DTZ007 

396 # Both instrument.timezone and current_system_tz() return pytz objects, 

397 # so use localize() for proper DST handling 

398 aware_dt = tz.localize(naive_dt) 

399 metadata["nx_meta"]["Creation Time"] = aware_dt.isoformat() 

400 

401 # manufacturer is at high level, so parse it now 

402 manufacturer = try_getting_dict_value(metadata, ["ObjectInfo", "Manufacturer"]) 

403 if manufacturer is not None: 

404 metadata["nx_meta"]["Manufacturer"] = manufacturer 

405 

406 metadata["nx_meta"]["Data Dimensions"] = str(shape) 

407 metadata["nx_meta"]["warnings"] = [] 

408 

409 # set type to STEM Image by default (this seems to be most common) 

410 metadata["nx_meta"]["DatasetType"] = "Image" 

411 metadata["nx_meta"]["Data Type"] = "STEM_Imaging" 

412 

413 return metadata 

414 

415 

416def parse_experimental_conditions(metadata): 

417 """ 

418 Parse experimental conditions. 

419 

420 Parse the metadata that is saved at specific places within 

421 the .emi tag structure into a consistent place in the metadata dictionary 

422 returned by :py:meth:`get_ser_metadata`. Specifically looks at the 

423 "ExperimentalConditions" node of the metadata structure. 

424 

425 Parameters 

426 ---------- 

427 metadata : dict 

428 A metadata dictionary as returned by :py:meth:`get_ser_metadata` 

429 

430 Returns 

431 ------- 

432 metadata : dict 

433 The same metadata dictionary with some values added under the 

434 root-level ``nx_meta`` key 

435 """ 

436 # Map input field names to (output_name, unit) tuples 

437 # If unit is None, value is stored as-is; otherwise, create Pint Quantity 

438 term_mapping = { 

439 ("DwellTimePath",): ("Dwell Time Path", "second"), 

440 ("FrameTime",): ("Frame Time", "second"), 

441 ("CameraNamePath",): ("Camera Name Path", None), 

442 ("Binning",): ("Binning", None), 

443 ("BeamPosition",): ("Beam Position", "micrometer"), 

444 ("EnergyResolution",): ("Energy Resolution", "electron_volt"), 

445 ("IntegrationTime",): ("Integration Time", "second"), 

446 ("NumberSpectra",): ("Number of Spectra", None), 

447 ("ShapingTime",): ("Shaping Time", "second"), 

448 ("ScanArea",): ("Scan Area", None), 

449 } 

450 base = ["ObjectInfo", "AcquireInfo"] 

451 

452 if try_getting_dict_value(metadata, base) is not None: 

453 metadata = map_keys_with_units(term_mapping, base, metadata) 

454 

455 return metadata 

456 

457 

458def parse_acquire_info(metadata): 

459 """ 

460 Parse acquisition conditions. 

461 

462 Parse the metadata that is saved at specific places within 

463 the .emi tag structure into a consistent place in the metadata dictionary 

464 returned by :py:meth:`get_ser_metadata`. Specifically looks at the 

465 "AcquireInfo" node of the metadata structure. 

466 

467 Parameters 

468 ---------- 

469 metadata : dict 

470 A metadata dictionary as returned by :py:meth:`get_ser_metadata` 

471 

472 Returns 

473 ------- 

474 metadata : dict 

475 The same metadata dictionary with some values added under the 

476 root-level ``nx_meta`` key 

477 """ 

478 # Map input field names to (output_name, unit) tuples 

479 term_mapping = { 

480 ("AcceleratingVoltage",): ("Microscope Accelerating Voltage", "volt"), 

481 ("Tilt1",): ("Microscope Tilt 1", None), 

482 ("Tilt2",): ("Microscope Tilt 2", None), 

483 } 

484 base = ["ObjectInfo", "ExperimentalConditions", "MicroscopeConditions"] 

485 

486 if try_getting_dict_value(metadata, base) is not None: 

487 metadata = map_keys_with_units(term_mapping, base, metadata) 

488 

489 return metadata 

490 

491 

492def parse_experimental_description(metadata): 

493 """ 

494 Parse experimental description. 

495 

496 Parse the metadata that is saved at specific places within 

497 the .emi tag structure into a consistent place in the metadata dictionary 

498 returned by :py:meth:`get_ser_metadata`. Specifically looks at the 

499 "ExperimentalDescription" node of the metadata structure. 

500 

501 Parameters 

502 ---------- 

503 metadata : dict 

504 A metadata dictionary as returned by :py:meth:`get_ser_metadata` 

505 

506 Returns 

507 ------- 

508 metadata : dict 

509 The same metadata dictionary with some values added under the 

510 root-level ``nx_meta`` key 

511 

512 Notes 

513 ----- 

514 The terms to extract in this section were 

515 """ 

516 # These terms were captured by looping through a selection of 

517 # representative .ser/.emi datafiles and running something like the 

518 # following 

519 base = ["ObjectInfo", "ExperimentalDescription"] 

520 

521 experimental_description = try_getting_dict_value(metadata, base) 

522 if experimental_description is not None and isinstance( 

523 experimental_description, 

524 dict, 

525 ): 

526 term_mapping = {} 

527 for k in metadata["ObjectInfo"]["ExperimentalDescription"]: 

528 term, fei_unit = split_fei_metadata_units(k) 

529 pint_unit = fei_unit_to_pint(fei_unit) 

530 

531 # Determine output field name(s) 

532 if "Stage" in term: 

533 # Make stage position a nested list 

534 term = term.replace("Stage ", "") 

535 out_name = ["Stage Position", term] 

536 elif "Filter " in term: 

537 # Make filter settings a nested list 

538 term = term.replace("Filter ", "") 

539 out_name = ["Tecnai Filter", term.title()] 

540 else: 

541 out_name = term 

542 

543 term_mapping[(k,)] = (out_name, pint_unit) 

544 

545 metadata = map_keys_with_units(term_mapping, base, metadata) 

546 

547 # Microscope Mode often has excess spaces, so fix that if needed: 

548 if "Mode" in metadata["nx_meta"]: 

549 metadata["nx_meta"]["Mode"] = metadata["nx_meta"]["Mode"].strip() 

550 

551 return metadata 

552 

553 

554def get_emi_from_ser(ser_fname: Path) -> Path: 

555 """ 

556 Get the accompanying `.emi` filename from an ser filename. 

557 

558 This method assumes that the `.ser` file will be the same name as the `.emi` file, 

559 but with an underscore and a digit appended. i.e. ``file.emi`` would 

560 result in `.ser` files named ``file_1.ser``, ``file_2.ser``, etc. 

561 

562 Parameters 

563 ---------- 

564 ser_fname 

565 The absolute path of an FEI TIA `.ser` data file 

566 

567 Returns 

568 ------- 

569 emi_fname 

570 The absolute path of the accompanying `.emi` metadata file 

571 index : int 

572 The number of this .ser file (i.e. 1, 2, 3, etc.) 

573 

574 Raises 

575 ------ 

576 FileNotFoundError 

577 If the accompanying .emi file cannot be resolved to be a file 

578 """ 

579 # separate filename from extension 

580 filename = ser_fname.parent / ser_fname.stem 

581 # remove everything after the last underscore and add the .emi extension 

582 emi_fname = Path("_".join(str(filename).split("_")[:-1]) + ".emi") 

583 index = int(str(filename).rsplit("_", maxsplit=1)[-1]) 

584 

585 if not emi_fname.is_file(): 

586 msg = f"Could not find .emi file with expected name: {emi_fname}" 

587 raise FileNotFoundError(msg) 

588 return emi_fname, index 

589 

590 

591def fei_unit_to_pint(fei_unit): 

592 """ 

593 Convert FEI unit string to Pint unit name. 

594 

595 Parameters 

596 ---------- 

597 fei_unit : str or None 

598 The unit string from FEI metadata (e.g., "kV", "uA", "um", "deg") 

599 

600 Returns 

601 ------- 

602 str or None 

603 The corresponding Pint unit name, or None if no unit or not recognized 

604 """ 

605 if fei_unit is None: 

606 return None 

607 

608 # Map FEI units to Pint unit names 

609 unit_map = { 

610 "kV": "kilovolt", 

611 "V": "volt", 

612 "uA": "microampere", 

613 "um": "micrometer", 

614 "deg": "degree", 

615 "s": "second", 

616 "eV": "electron_volt", 

617 "keV": "kiloelectron_volt", 

618 "mm": "millimeter", 

619 "nm": "nanometer", 

620 "mrad": "milliradian", 

621 } 

622 

623 return unit_map.get(fei_unit) 

624 

625 

626def split_fei_metadata_units(metadata_term): 

627 """ 

628 Split metadata into value and units. 

629 

630 If present, separate a metadata term into its value and units. 

631 In the FEI metadata structure, units are indicated separated by an 

632 underscore at the end of the term. i.e. ``High tension_kV`` indicates that 

633 the `High tension` metadata value has units of `kV`. 

634 

635 Parameters 

636 ---------- 

637 metadata_term : str 

638 The metadata term read from the FEI tag structure 

639 

640 Returns 

641 ------- 

642 mdata_and_unit : :obj:`tuple` of :obj:`str` 

643 A length-2 tuple with the metadata value name as the first 

644 item and the unit (if present) as the second item 

645 """ 

646 mdata_and_unit = tuple(metadata_term.split("_")) 

647 

648 if len(mdata_and_unit) == 1: 

649 mdata_and_unit = (*mdata_and_unit, None) 

650 

651 # capitalize any words in metadata term that are all lowercase: 

652 mdata_term = " ".join( 

653 [w.title() if w.islower() else w for w in mdata_and_unit[0].split()], 

654 ) 

655 # replace weird "Stem" capitalization 

656 mdata_term = mdata_term.replace("Stem ", "STEM ") 

657 

658 return (mdata_term, mdata_and_unit[1]) 

659 

660 

661def map_keys_with_units(term_mapping, base, metadata): 

662 """ 

663 Map keys into NexusLIMS metadata structure with unit support. 

664 

665 Maps input metadata terms to NexusLIMS metadata structure, with support 

666 for (output_name, unit) tuples in the term_mapping values to create Pint 

667 Quantities. 

668 

669 Parameters 

670 ---------- 

671 term_mapping : dict 

672 Dictionary where keys are tuples of strings (the input terms), 

673 and values are tuples of (output_name, unit) where output_name 

674 is either a string or list of strings, and unit is either a string 

675 (Pint unit name) or None 

676 base : list 

677 The 'root' path within the metadata dictionary 

678 metadata : dict 

679 A metadata dictionary 

680 

681 Returns 

682 ------- 

683 metadata : dict 

684 The same metadata dictionary with values added to nx_meta 

685 """ 

686 for in_term in term_mapping: 

687 out_spec, unit = term_mapping[in_term] 

688 if isinstance(in_term, tuple): 

689 in_term = list(in_term) # noqa: PLW2901 

690 if isinstance(out_spec, str): 

691 out_spec = [out_spec] 

692 

693 val = try_getting_dict_value(metadata, base + in_term) 

694 # only add the value to this list if we found it 

695 if val is not None: 

696 # Clean up string values (remove " um" etc.) 

697 if isinstance(val, str): 

698 val = val.replace(" um", "").strip() 

699 

700 # Convert to numeric first (handles string numbers) 

701 val = _convert_to_numeric(val) 

702 

703 # Create Quantity if unit specified and value is numeric 

704 if unit is not None and isinstance(val, (int, float)): 

705 with contextlib.suppress(ValueError, TypeError): 

706 val = ureg.Quantity(val, unit) 

707 

708 set_nested_dict_value( 

709 metadata, 

710 ["nx_meta", *out_spec], 

711 val, 

712 ) 

713 return metadata 

714 

715 

716def parse_data_type(s, metadata): 

717 """ 

718 Parse the data type from the signal's metadata. 

719 

720 Determine `"Data Type"` and `"DatasetType"` for the given .ser file based 

721 off of metadata and signal characteristics. This method is used to 

722 determine whether the image is TEM or STEM, Image or Diffraction, 

723 Spectrum or Spectrum Image, etc. 

724 

725 Due to lack of appropriate metadata written by the FEI software, 

726 a heuristic of axis limits and size is used to determine whether a 

727 spectrum's data type is EELS or EDS. This may not be a perfect 

728 determination. 

729 

730 Parameters 

731 ---------- 

732 s : :py:class:`hyperspy.signal.BaseSignal` (or subclass) 

733 The HyperSpy signal that contains the data of interest 

734 metadata : dict 

735 A metadata dictionary as returned by :py:meth:`get_ser_metadata` 

736 

737 Returns 

738 ------- 

739 data_type : str 

740 The string that should be stored at metadata['nx_meta']['Data Type'] 

741 dataset_type : str 

742 The string that should be stored at metadata['nx_meta']['DatasetType'] 

743 """ 

744 # default value that will be overwritten if the conditions below are met 

745 dataset_type = "Misc" 

746 

747 # instrument configuration 

748 instr_conf = [] 

749 _set_instrument_type(instr_conf, metadata) 

750 

751 # images have signal dimension of two: 

752 if s.axes_manager.signal_dimension == 2: # noqa: PLR2004 

753 instr_mod, dataset_type = _signal_dim_2(metadata) 

754 

755 # if signal dimension is 1, it's a spectrum and not an image 

756 elif s.axes_manager.signal_dimension == 1: 

757 instr_mod = ["Spectrum"] 

758 dataset_type = "Spectrum" 

759 if s.axes_manager.navigation_dimension > 0: 

760 instr_mod.append("Imaging") 

761 dataset_type = "SpectrumImage" 

762 # do some basic axis value analysis to guess signal type since we 

763 # don't have any indication of EELS vs. EDS; assume 5 keV and above 

764 # is EDS 

765 if s.axes_manager.signal_axes[0].high_value > 5000: # noqa: PLR2004 

766 if "EDS" not in instr_conf: 

767 instr_conf.append("EDS") 

768 # EELS spectra are usually 2048 channels 

769 elif s.axes_manager.signal_axes[0].size == 2048: # noqa: PLR2004 

770 instr_conf.append("EELS") 

771 

772 data_type = "_".join(instr_conf + instr_mod) 

773 

774 return data_type, dataset_type 

775 

776 

777def _set_instrument_type(instr_conf, metadata): 

778 # sometimes there is no metadata for follow-on signals in an .emi/.ser 

779 # bundle (i.e. .ser files after the first one) 

780 if "Mode" in metadata["nx_meta"]: 

781 if "STEM" in metadata["nx_meta"]["Mode"]: 

782 instr_conf.append("STEM") 

783 elif "TEM" in metadata["nx_meta"]["Mode"]: 

784 instr_conf.append("TEM") 

785 # if there is no metadata read from .emi, make determination 

786 # off of instrument (this is really a guess) 

787 elif metadata["nx_meta"]["Instrument ID"] is not None: 

788 if "STEM" in metadata["nx_meta"]["Instrument ID"]: 

789 instr_conf.append("STEM") 

790 else: 

791 instr_conf.append("TEM") 

792 else: 

793 # default to TEM, (since STEM is technically a sub-technique of TEM) 

794 instr_conf.append("TEM") 

795 

796 

797def _signal_dim_2(metadata) -> Tuple[List[str], str]: 

798 """ 

799 Parse data type for a Signal with "signal dimension" of size 2. 

800 

801 Parameters 

802 ---------- 

803 metadata 

804 

805 Returns 

806 ------- 

807 list of str 

808 The instrument mode 

809 str 

810 The dataset type 

811 """ 

812 # default to an image dataset type for 2 dimensional signal 

813 dataset_type = "Image" 

814 # instrument modality: 

815 instr_mod = ["Imaging"] 

816 if "Mode" in metadata["nx_meta"]: 

817 if "Image" in metadata["nx_meta"]["Mode"]: 

818 instr_mod = ["Imaging"] 

819 dataset_type = "Image" 

820 elif "Diffraction" in metadata["nx_meta"]["Mode"]: 

821 # Diffraction mode is only actually diffraction in TEM mode, 

822 # In STEM, imaging happens in diffraction mode 

823 if "STEM" in metadata["nx_meta"]["Mode"]: 

824 instr_mod = ["Imaging"] 

825 dataset_type = "Image" 

826 elif "TEM" in metadata["nx_meta"]["Mode"]: 

827 instr_mod = ["Diffraction"] 

828 dataset_type = "Diffraction" 

829 return instr_mod, dataset_type 

830 

831 

832def _convert_to_numeric(val): 

833 if isinstance(val, str): 

834 if "." in val: 

835 try: 

836 return float(val) 

837 except ValueError: 

838 return val 

839 else: 

840 try: 

841 return int(val) 

842 except ValueError: 

843 return val 

844 else: 

845 return val 

846 

847 

848# Backward compatibility function for tests 

849def get_ser_metadata(filename): 

850 """ 

851 Get metadata from a .ser file and its accompanying .emi file. 

852 

853 .. deprecated:: 

854 This function is deprecated. Use SerEmiExtractor class instead. 

855 

856 Parameters 

857 ---------- 

858 filename : pathlib.Path 

859 path to a file saved in the harvested directory of the instrument 

860 

861 Returns 

862 ------- 

863 mdict : dict 

864 A description of the file's metadata. 

865 """ 

866 context = ExtractionContext( 

867 file_path=filename, instrument=get_instr_from_filepath(filename) 

868 ) 

869 extractor = SerEmiExtractor() 

870 return extractor.extract(context)