Coverage for nexusLIMS/extractors/plugins/fei

1"""FEI TIA (.ser/.emi) extractor plugin."""

3import contextlib

4import logging

5from datetime import datetime as dt

6from pathlib import Path

7from typing import Any, ClassVar, List, Tuple

9import numpy as np

10from hyperspy.io import load as hs_load

11from hyperspy.signal import BaseSignal

13from nexusLIMS.db.models import Instrument

14from nexusLIMS.extractors.base import ExtractionContext

15from nexusLIMS.extractors.utils import add_to_extensions

16from nexusLIMS.instruments import get_instr_from_filepath

17from nexusLIMS.schemas.units import ureg

18from nexusLIMS.utils.dicts import (

19 set_nested_dict_value,

20 sort_dict,

21 try_getting_dict_value,

22)

23from nexusLIMS.utils.time import current_system_tz

25_logger = logging.getLogger(__name__)

28class SerEmiExtractor:

29 """

30 Extractor for FEI TIA series files (.ser with accompanying .emi).

32 This extractor handles metadata extraction from files saved by FEI's

33 (now Thermo Fisher Scientific) TIA (Tecnai Imaging and Analysis) software.

34 The .ser files contain the actual data, while .emi files contain metadata.

35 """

37 name = "ser_emi_extractor"

38 priority = 100

39 supported_extensions: ClassVar = {"ser"}

41 def supports(self, context: ExtractionContext) -> bool:

42 """

43 Check if this extractor supports the given file.

45 Parameters

46 ----------

47 context

48 The extraction context containing file information

50 Returns

51 -------

52 bool

53 True if file extension is .ser

54 """

55 extension = context.file_path.suffix.lower().lstrip(".")

56 return extension == "ser"

58 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: # noqa: PLR0915

59 """

60 Extract metadata from a .ser file and its accompanying .emi file.

62 Returns metadata (as a list of dicts) from an FEI .ser file +

63 its associated .emi files, with some non-relevant information stripped.

65 Parameters

66 ----------

67 context

68 The extraction context containing file information

70 Returns

71 -------

72 list[dict]

73 List containing a single metadata dict with 'nx_meta' key.

74 If files cannot be opened, at least basic metadata will be returned (

75 creation time, etc.)

76 """

77 filename = context.file_path

78 _logger.debug("Extracting metadata from SER/EMI file: %s", filename)

80 # ObjectInfo present in emi; ser_header_parameters present in .ser

81 # ObjectInfo should contain all the interesting metadata,

82 # while ser_header_parameters is mostly technical stuff not really of

83 # interest to anyone

84 warning, emi_filename, ser_error = None, None, False

86 # pylint: disable=broad-exception-caught

87 try:

88 emi_filename, ser_index = get_emi_from_ser(filename)

89 s, emi_loaded = _load_ser(emi_filename, ser_index)

91 except FileNotFoundError:

92 # if emi wasn't found, specifically mention that

93 warning = (

94 "NexusLIMS could not find a corresponding .emi metadata "

95 "file for this .ser file. Metadata extraction will be "

96 "limited."

97 )

98 _logger.warning(warning)

99 emi_loaded = False

100 emi_filename = None

101

102 except Exception:

103 # otherwise, HyperSpy could not load the .emi, so give generic warning

104 # that .emi could not be loaded for some reason:

105 warning = (

106 "The .emi metadata file associated with this "

107 ".ser file could not be opened by NexusLIMS. "

108 "Metadata extraction will be limited."

109 )

110 _logger.warning(warning)

111 emi_loaded = False

112

113 if not emi_loaded:

114 # pylint: disable=broad-exception-caught

115

116 # if we couldn't load the emi, lets at least open the .ser to pull

117 # out the ser_header_info

118 try:

119 s = hs_load(filename, only_valid_data=True, lazy=True)

120 except Exception:

121 warning = (

122 "The .ser file could not be opened (perhaps file is "

123 "corrupted?); Metadata extraction is not possible."

124 )

125 _logger.warning(warning)

126 # set s to an empty signal just so we can process some basic

127 # metadata using same syntax as if we had read it correctly

128 s = BaseSignal(np.zeros(1))

129 ser_error = True

130

131 metadata = s.original_metadata.as_dictionary()

132 metadata["nx_meta"] = {}

133

134 # if we've already encountered a warning, add that to the metadata,

135 if warning:

136 metadata["nx_meta"]["Extractor Warning"] = warning

137 # otherwise check to ensure we actually have some metadata read from .emi

138 elif "ObjectInfo" not in metadata or (

139 "ExperimentalConditions" not in metadata["ObjectInfo"]

140 and "ExperimentalDescription" not in metadata["ObjectInfo"]

141 ):

142 warning = (

143 "No experimental metadata was found in the "

144 "corresponding .emi file for this .ser. "

145 "Metadata extraction will be limited."

146 )

147 _logger.warning(warning)

148 metadata["nx_meta"]["Extractor Warning"] = warning

149

150 # if we successfully found the .emi file, add it to the metadata

151 if emi_filename:

152 try:

153 from nexusLIMS.config import settings # noqa: PLC0415

154

155 rel_emi_fname = str(emi_filename).replace(

156 str(settings.NX_INSTRUMENT_DATA_PATH) + "/", ""

157 )

158 except Exception:

159 rel_emi_fname = str(emi_filename)

160 metadata["nx_meta"]["emi Filename"] = rel_emi_fname

161 else:

162 metadata["nx_meta"]["emi Filename"] = None

163

164 # Get the instrument object associated with this file

165 instr = get_instr_from_filepath(filename)

166

167 # if we found the instrument, then store the name as string, else None

168 instr_name = instr.name if instr is not None else None

169 metadata["nx_meta"]["fname"] = filename

170 # get the modification time:

171 # Use instrument timezone if available, otherwise fall back to system timezone

172 mtime_naive_dt = dt.fromtimestamp(filename.stat().st_mtime) # noqa: DTZ006

173 tz = instr.timezone if instr is not None else None

174 tz = tz if tz is not None else current_system_tz()

175 mtime_aware_dt = tz.localize(mtime_naive_dt)

176 metadata["nx_meta"]["Creation Time"] = mtime_aware_dt.isoformat()

177 metadata["nx_meta"]["Instrument ID"] = instr_name

178

179 # we could not read the signal, so add some basic metadata and return

180 if ser_error:

181 metadata = _handle_ser_error_metadata(metadata)

182 # Migrate to schema-compliant format (move vendor meta to extensions)

183 metadata = self._migrate_to_schema_compliant_metadata(metadata)

184 return [metadata]

185

186 metadata = parse_basic_info(metadata, s.data.shape, instr)

187 metadata = parse_acquire_info(metadata)

188 metadata = parse_experimental_conditions(metadata)

189 metadata = parse_experimental_description(metadata)

190

191 (

192 metadata["nx_meta"]["Data Type"],

193 metadata["nx_meta"]["DatasetType"],

194 ) = parse_data_type(s, metadata)

195

196 # we don't need to save the filename, it's just for internal processing

197 del metadata["nx_meta"]["fname"]

198

199 # Migrate metadata to schema-compliant format

200 metadata = self._migrate_to_schema_compliant_metadata(metadata)

201

202 # sort the nx_meta dictionary (recursively) for nicer display

203 metadata["nx_meta"] = sort_dict(metadata["nx_meta"])

204

205 return [metadata]

206

207 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict:

208 """

209 Migrate metadata to schema-compliant format.

210

211 Reorganizes metadata to conform to type-specific Pydantic schemas:

212 - Extracts core EM Glossary fields to top level with standardized names

213 - Moves vendor-specific nested dictionaries to extensions section

214 - Preserves existing extensions from instrument profiles

215

216 Parameters

217 ----------

218 mdict

219 Metadata dictionary with nx_meta containing extracted fields

220

221 Returns

222 -------

223 dict

224 Metadata dictionary with schema-compliant nx_meta structure

225 """

226 nx_meta = mdict.get("nx_meta", {})

227 dataset_type = nx_meta.get("DatasetType", "Image")

228

229 # Preserve existing extensions from instrument profiles

230 extensions = (

231 nx_meta.get("extensions", {}).copy() if "extensions" in nx_meta else {}

232 )

233

234 # Field mappings from display names to EM Glossary names

235 field_mappings = {

236 "AccelerationVoltage": "acceleration_voltage",

237 "Convergence Angle": "convergence_angle",

238 "Acquisition Device": "acquisition_device",

239 }

240

241 # Camera Length is only core for Diffraction datasets

242 if dataset_type == "Diffraction":

243 field_mappings["Camera Length"] = "camera_length"

244

245 # FEI TIA-specific top-level sections that go to extensions

246 extension_top_level_keys = {

247 "ObjectInfo", # Main FEI metadata section

248 "ser_header_parameters", # SER file header

249 }

250

251 # Individual vendor-specific fields to move to extensions

252 extension_field_names = {

253 "emi Filename",

254 "Extractor Warning",

255 # Any other FEI-specific fields

256 }

257

258 # Build new nx_meta with proper field organization

259 new_nx_meta = {}

260

261 # Copy required fields

262 for field in ["DatasetType", "Data Type", "Creation Time", "Data Dimensions"]:

263 if field in nx_meta:

264 new_nx_meta[field] = nx_meta[field]

265

266 # Copy instrument identification

267 if "Instrument ID" in nx_meta:

268 new_nx_meta["Instrument ID"] = nx_meta["Instrument ID"]

269

270 # Process all fields and categorize

271 for old_name, value in nx_meta.items():

272 # Skip fields we've already handled

273 if old_name in [

274 "DatasetType",

275 "Data Type",

276 "Creation Time",

277 "Data Dimensions",

278 "Instrument ID",

279 "Extractor Warnings",

280 "warnings",

281 "extensions",

282 ]:

283 continue

284

285 # Top-level vendor sections go to extensions

286 if old_name in extension_top_level_keys:

287 extensions[old_name] = value

288 continue

289

290 # Check if this is a core field that needs renaming

291 if old_name in field_mappings:

292 emg_name = field_mappings[old_name]

293 new_nx_meta[emg_name] = value

294 continue

295

296 # Vendor-specific individual fields go to extensions

297 if old_name in extension_field_names:

298 extensions[old_name] = value

299 continue

300

301 # Everything else goes to extensions (FEI-specific fields)

302 # This is safer since most FEI fields are vendor-specific

303 extensions[old_name] = value

304

305 # Copy warnings if present

306 if "warnings" in nx_meta:

307 new_nx_meta["warnings"] = nx_meta["warnings"]

308

309 # Add extensions section if we have any

310 for key, value in extensions.items():

311 add_to_extensions(new_nx_meta, key, value)

312

313 mdict["nx_meta"] = new_nx_meta

314 return mdict

315

316

317def _handle_ser_error_metadata(metadata):

318 """Handle metadata when .ser file cannot be read."""

319 metadata["nx_meta"]["DatasetType"] = "Misc"

320 metadata["nx_meta"]["Data Type"] = "Unknown"

321 metadata["nx_meta"]["warnings"] = []

322 # sort the nx_meta dictionary (recursively) for nicer display

323 metadata["nx_meta"] = sort_dict(metadata["nx_meta"])

324 del metadata["nx_meta"]["fname"]

325 return metadata

326

327

328def _load_ser(emi_filename: Path, ser_index: int):

329 """

330 Load an data file given the .emi filename and an index of which signal to use.

331

332 Parameters

333 ----------

334 emi_filename

335 The path to an .emi file

336 ser_index

337 Which .ser file to load data from, given the .emi file above

338

339 Returns

340 -------

341 hyperspy.signal.BaseSignal

342 The signal loaded by HyperSpy

343 bool

344 Whether the emi file was successfully loaded (should be true if no Exceptions)

345 """

346 # approach here is for every .ser we want to examine, load the

347 # metadata from the corresponding .emi file. If multiple .ser files

348 # are related to this emi, HyperSpy returns a list, so we select out

349 # the right signal from that list if that's what is returned

350

351 # make sure to load with "only_valid_data" so data shape is correct

352 # loading the emi with HS will try loading the .ser too, so this will

353 # fail if there's an issue with the .ser file

354 emi_s = hs_load(emi_filename, lazy=True, only_valid_data=True)

355

356 # if there is more than one dataset, emi_s will be a list, so pick

357 # out the matching signal from the list, which will be the "index"

358 # from the filename minus 1:

359 # if there is more than one dataset, emi_s will be a list, so pick

360 # out the matching signal, otherwise use the signal as-is

361 s = emi_s[ser_index - 1] if isinstance(emi_s, list) else emi_s

362

363 return s, True

364

365

366def parse_basic_info(metadata, shape, instrument: Instrument):

367 """

368 Parse basic metadata from file.

369

370 Parse the metadata that is saved at specific places within

371 the .emi tag structure into a consistent place in the metadata dictionary

372 returned by :py:meth:`get_ser_metadata`. Specifically, this method handles

373 the creation date, equipment manufacturer, and data shape/type.

374

375 Parameters

376 ----------

377 metadata : dict

378 A metadata dictionary as returned by :py:meth:`get_ser_metadata`

379 shape

380 The shape of the dataset

381 instrument : Instrument

382 The instrument this file was collected on

383

384 Returns

385 -------

386 metadata : dict

387 The same metadata dictionary with some values added under the

388 root-level ``nx_meta`` key

389 """

390 # try to set creation time to acquisition time from metadata

391 acq_time = try_getting_dict_value(metadata, ["ObjectInfo", "AcquireDate"])

392 if acq_time is not None:

393 # Use instrument timezone if available, otherwise fall back to system timezone

394 tz = instrument.timezone if instrument else current_system_tz()

395 naive_dt = dt.strptime(acq_time, "%a %b %d %H:%M:%S %Y") # noqa: DTZ007

396 # Both instrument.timezone and current_system_tz() return pytz objects,

397 # so use localize() for proper DST handling

398 aware_dt = tz.localize(naive_dt)

399 metadata["nx_meta"]["Creation Time"] = aware_dt.isoformat()

400

401 # manufacturer is at high level, so parse it now

402 manufacturer = try_getting_dict_value(metadata, ["ObjectInfo", "Manufacturer"])

403 if manufacturer is not None:

404 metadata["nx_meta"]["Manufacturer"] = manufacturer

405

406 metadata["nx_meta"]["Data Dimensions"] = str(shape)

407 metadata["nx_meta"]["warnings"] = []

408

409 # set type to STEM Image by default (this seems to be most common)

410 metadata["nx_meta"]["DatasetType"] = "Image"

411 metadata["nx_meta"]["Data Type"] = "STEM_Imaging"

412

413 return metadata

414

415

416def parse_experimental_conditions(metadata):

417 """

418 Parse experimental conditions.

419

420 Parse the metadata that is saved at specific places within

421 the .emi tag structure into a consistent place in the metadata dictionary

422 returned by :py:meth:`get_ser_metadata`. Specifically looks at the

423 "ExperimentalConditions" node of the metadata structure.

424

425 Parameters

426 ----------

427 metadata : dict

428 A metadata dictionary as returned by :py:meth:`get_ser_metadata`

429

430 Returns

431 -------

432 metadata : dict

433 The same metadata dictionary with some values added under the

434 root-level ``nx_meta`` key

435 """

436 # Map input field names to (output_name, unit) tuples

437 # If unit is None, value is stored as-is; otherwise, create Pint Quantity

438 term_mapping = {

439 ("DwellTimePath",): ("Dwell Time Path", "second"),

440 ("FrameTime",): ("Frame Time", "second"),

441 ("CameraNamePath",): ("Camera Name Path", None),

442 ("Binning",): ("Binning", None),

443 ("BeamPosition",): ("Beam Position", "micrometer"),

444 ("EnergyResolution",): ("Energy Resolution", "electron_volt"),

445 ("IntegrationTime",): ("Integration Time", "second"),

446 ("NumberSpectra",): ("Number of Spectra", None),

447 ("ShapingTime",): ("Shaping Time", "second"),

448 ("ScanArea",): ("Scan Area", None),

449 }

450 base = ["ObjectInfo", "AcquireInfo"]

451

452 if try_getting_dict_value(metadata, base) is not None:

453 metadata = map_keys_with_units(term_mapping, base, metadata)

454

455 return metadata

456

457

458def parse_acquire_info(metadata):

459 """

460 Parse acquisition conditions.

461

462 Parse the metadata that is saved at specific places within

463 the .emi tag structure into a consistent place in the metadata dictionary

464 returned by :py:meth:`get_ser_metadata`. Specifically looks at the

465 "AcquireInfo" node of the metadata structure.

466

467 Parameters

468 ----------

469 metadata : dict

470 A metadata dictionary as returned by :py:meth:`get_ser_metadata`

471

472 Returns

473 -------

474 metadata : dict

475 The same metadata dictionary with some values added under the

476 root-level ``nx_meta`` key

477 """

478 # Map input field names to (output_name, unit) tuples

479 term_mapping = {

480 ("AcceleratingVoltage",): ("Microscope Accelerating Voltage", "volt"),

481 ("Tilt1",): ("Microscope Tilt 1", None),

482 ("Tilt2",): ("Microscope Tilt 2", None),

483 }

484 base = ["ObjectInfo", "ExperimentalConditions", "MicroscopeConditions"]

485

486 if try_getting_dict_value(metadata, base) is not None:

487 metadata = map_keys_with_units(term_mapping, base, metadata)

488

489 return metadata

490

491

492def parse_experimental_description(metadata):

493 """

494 Parse experimental description.

495

496 Parse the metadata that is saved at specific places within

497 the .emi tag structure into a consistent place in the metadata dictionary

498 returned by :py:meth:`get_ser_metadata`. Specifically looks at the

499 "ExperimentalDescription" node of the metadata structure.

500

501 Parameters

502 ----------

503 metadata : dict

504 A metadata dictionary as returned by :py:meth:`get_ser_metadata`

505

506 Returns

507 -------

508 metadata : dict

509 The same metadata dictionary with some values added under the

510 root-level ``nx_meta`` key

511

512 Notes

513 -----

514 The terms to extract in this section were

515 """

516 # These terms were captured by looping through a selection of

517 # representative .ser/.emi datafiles and running something like the

518 # following

519 base = ["ObjectInfo", "ExperimentalDescription"]

520

521 experimental_description = try_getting_dict_value(metadata, base)

522 if experimental_description is not None and isinstance(

523 experimental_description,

524 dict,

525 ):

526 term_mapping = {}

527 for k in metadata["ObjectInfo"]["ExperimentalDescription"]:

528 term, fei_unit = split_fei_metadata_units(k)

529 pint_unit = fei_unit_to_pint(fei_unit)

530

531 # Determine output field name(s)

532 if "Stage" in term:

533 # Make stage position a nested list

534 term = term.replace("Stage ", "")

535 out_name = ["Stage Position", term]

536 elif "Filter " in term:

537 # Make filter settings a nested list

538 term = term.replace("Filter ", "")

539 out_name = ["Tecnai Filter", term.title()]

540 else:

541 out_name = term

542

543 term_mapping[(k,)] = (out_name, pint_unit)

544

545 metadata = map_keys_with_units(term_mapping, base, metadata)

546

547 # Microscope Mode often has excess spaces, so fix that if needed:

548 if "Mode" in metadata["nx_meta"]:

549 metadata["nx_meta"]["Mode"] = metadata["nx_meta"]["Mode"].strip()

550

551 return metadata

552

553

554def get_emi_from_ser(ser_fname: Path) -> Path:

555 """

556 Get the accompanying `.emi` filename from an ser filename.

557

558 This method assumes that the `.ser` file will be the same name as the `.emi` file,

559 but with an underscore and a digit appended. i.e. ``file.emi`` would

560 result in `.ser` files named ``file_1.ser``, ``file_2.ser``, etc.

561

562 Parameters

563 ----------

564 ser_fname

565 The absolute path of an FEI TIA `.ser` data file

566

567 Returns

568 -------

569 emi_fname

570 The absolute path of the accompanying `.emi` metadata file

571 index : int

572 The number of this .ser file (i.e. 1, 2, 3, etc.)

573

574 Raises

575 ------

576 FileNotFoundError

577 If the accompanying .emi file cannot be resolved to be a file

578 """

579 # separate filename from extension

580 filename = ser_fname.parent / ser_fname.stem

581 # remove everything after the last underscore and add the .emi extension

582 emi_fname = Path("_".join(str(filename).split("_")[:-1]) + ".emi")

583 index = int(str(filename).rsplit("_", maxsplit=1)[-1])

584

585 if not emi_fname.is_file():

586 msg = f"Could not find .emi file with expected name: {emi_fname}"

587 raise FileNotFoundError(msg)

588 return emi_fname, index

589

590

591def fei_unit_to_pint(fei_unit):

592 """

593 Convert FEI unit string to Pint unit name.

594

595 Parameters

596 ----------

597 fei_unit : str or None

598 The unit string from FEI metadata (e.g., "kV", "uA", "um", "deg")

599

600 Returns

601 -------

602 str or None

603 The corresponding Pint unit name, or None if no unit or not recognized

604 """

605 if fei_unit is None:

606 return None

607

608 # Map FEI units to Pint unit names

609 unit_map = {

610 "kV": "kilovolt",

611 "V": "volt",

612 "uA": "microampere",

613 "um": "micrometer",

614 "deg": "degree",

615 "s": "second",

616 "eV": "electron_volt",

617 "keV": "kiloelectron_volt",

618 "mm": "millimeter",

619 "nm": "nanometer",

620 "mrad": "milliradian",

621 }

622

623 return unit_map.get(fei_unit)

624

625

626def split_fei_metadata_units(metadata_term):

627 """

628 Split metadata into value and units.

629

630 If present, separate a metadata term into its value and units.

631 In the FEI metadata structure, units are indicated separated by an

632 underscore at the end of the term. i.e. ``High tension_kV`` indicates that

633 the `High tension` metadata value has units of `kV`.

634

635 Parameters

636 ----------

637 metadata_term : str

638 The metadata term read from the FEI tag structure

639

640 Returns

641 -------

642 mdata_and_unit : :obj:`tuple` of :obj:`str`

643 A length-2 tuple with the metadata value name as the first

644 item and the unit (if present) as the second item

645 """

646 mdata_and_unit = tuple(metadata_term.split("_"))

647

648 if len(mdata_and_unit) == 1:

649 mdata_and_unit = (*mdata_and_unit, None)

650

651 # capitalize any words in metadata term that are all lowercase:

652 mdata_term = " ".join(

653 [w.title() if w.islower() else w for w in mdata_and_unit[0].split()],

654 )

655 # replace weird "Stem" capitalization

656 mdata_term = mdata_term.replace("Stem ", "STEM ")

657

658 return (mdata_term, mdata_and_unit[1])

659

660

661def map_keys_with_units(term_mapping, base, metadata):

662 """

663 Map keys into NexusLIMS metadata structure with unit support.

664

665 Maps input metadata terms to NexusLIMS metadata structure, with support

666 for (output_name, unit) tuples in the term_mapping values to create Pint

667 Quantities.

668

669 Parameters

670 ----------

671 term_mapping : dict

672 Dictionary where keys are tuples of strings (the input terms),

673 and values are tuples of (output_name, unit) where output_name

674 is either a string or list of strings, and unit is either a string

675 (Pint unit name) or None

676 base : list

677 The 'root' path within the metadata dictionary

678 metadata : dict

679 A metadata dictionary

680

681 Returns

682 -------

683 metadata : dict

684 The same metadata dictionary with values added to nx_meta

685 """

686 for in_term in term_mapping:

687 out_spec, unit = term_mapping[in_term]

688 if isinstance(in_term, tuple):

689 in_term = list(in_term) # noqa: PLW2901

690 if isinstance(out_spec, str):

691 out_spec = [out_spec]

692

693 val = try_getting_dict_value(metadata, base + in_term)

694 # only add the value to this list if we found it

695 if val is not None:

696 # Clean up string values (remove " um" etc.)

697 if isinstance(val, str):

698 val = val.replace(" um", "").strip()

699

700 # Convert to numeric first (handles string numbers)

701 val = _convert_to_numeric(val)

702

703 # Create Quantity if unit specified and value is numeric

704 if unit is not None and isinstance(val, (int, float)):

705 with contextlib.suppress(ValueError, TypeError):

706 val = ureg.Quantity(val, unit)

707

708 set_nested_dict_value(

709 metadata,

710 ["nx_meta", *out_spec],

711 val,

712 )

713 return metadata

714

715

716def parse_data_type(s, metadata):

717 """

718 Parse the data type from the signal's metadata.

719

720 Determine `"Data Type"` and `"DatasetType"` for the given .ser file based

721 off of metadata and signal characteristics. This method is used to

722 determine whether the image is TEM or STEM, Image or Diffraction,

723 Spectrum or Spectrum Image, etc.

724

725 Due to lack of appropriate metadata written by the FEI software,

726 a heuristic of axis limits and size is used to determine whether a

727 spectrum's data type is EELS or EDS. This may not be a perfect

728 determination.

729

730 Parameters

731 ----------

732 s : :py:class:`hyperspy.signal.BaseSignal` (or subclass)

733 The HyperSpy signal that contains the data of interest

734 metadata : dict

735 A metadata dictionary as returned by :py:meth:`get_ser_metadata`

736

737 Returns

738 -------

739 data_type : str

740 The string that should be stored at metadata['nx_meta']['Data Type']

741 dataset_type : str

742 The string that should be stored at metadata['nx_meta']['DatasetType']

743 """

744 # default value that will be overwritten if the conditions below are met

745 dataset_type = "Misc"

746

747 # instrument configuration

748 instr_conf = []

749 _set_instrument_type(instr_conf, metadata)

750

751 # images have signal dimension of two:

752 if s.axes_manager.signal_dimension == 2: # noqa: PLR2004

753 instr_mod, dataset_type = _signal_dim_2(metadata)

754

755 # if signal dimension is 1, it's a spectrum and not an image

756 elif s.axes_manager.signal_dimension == 1:

757 instr_mod = ["Spectrum"]

758 dataset_type = "Spectrum"

759 if s.axes_manager.navigation_dimension > 0:

760 instr_mod.append("Imaging")

761 dataset_type = "SpectrumImage"

762 # do some basic axis value analysis to guess signal type since we

763 # don't have any indication of EELS vs. EDS; assume 5 keV and above

764 # is EDS

765 if s.axes_manager.signal_axes[0].high_value > 5000: # noqa: PLR2004

766 if "EDS" not in instr_conf:

767 instr_conf.append("EDS")

768 # EELS spectra are usually 2048 channels

769 elif s.axes_manager.signal_axes[0].size == 2048: # noqa: PLR2004

770 instr_conf.append("EELS")

771

772 data_type = "_".join(instr_conf + instr_mod)

773

774 return data_type, dataset_type

775

776

777def _set_instrument_type(instr_conf, metadata):

778 # sometimes there is no metadata for follow-on signals in an .emi/.ser

779 # bundle (i.e. .ser files after the first one)

780 if "Mode" in metadata["nx_meta"]:

781 if "STEM" in metadata["nx_meta"]["Mode"]:

782 instr_conf.append("STEM")

783 elif "TEM" in metadata["nx_meta"]["Mode"]:

784 instr_conf.append("TEM")

785 # if there is no metadata read from .emi, make determination

786 # off of instrument (this is really a guess)

787 elif metadata["nx_meta"]["Instrument ID"] is not None:

788 if "STEM" in metadata["nx_meta"]["Instrument ID"]:

789 instr_conf.append("STEM")

790 else:

791 instr_conf.append("TEM")

792 else:

793 # default to TEM, (since STEM is technically a sub-technique of TEM)

794 instr_conf.append("TEM")

795

796

797def _signal_dim_2(metadata) -> Tuple[List[str], str]:

798 """

799 Parse data type for a Signal with "signal dimension" of size 2.

800

801 Parameters

802 ----------

803 metadata

804

805 Returns

806 -------

807 list of str

808 The instrument mode

809 str

810 The dataset type

811 """

812 # default to an image dataset type for 2 dimensional signal

813 dataset_type = "Image"

814 # instrument modality:

815 instr_mod = ["Imaging"]

816 if "Mode" in metadata["nx_meta"]:

817 if "Image" in metadata["nx_meta"]["Mode"]:

818 instr_mod = ["Imaging"]

819 dataset_type = "Image"

820 elif "Diffraction" in metadata["nx_meta"]["Mode"]:

821 # Diffraction mode is only actually diffraction in TEM mode,

822 # In STEM, imaging happens in diffraction mode

823 if "STEM" in metadata["nx_meta"]["Mode"]:

824 instr_mod = ["Imaging"]

825 dataset_type = "Image"

826 elif "TEM" in metadata["nx_meta"]["Mode"]:

827 instr_mod = ["Diffraction"]

828 dataset_type = "Diffraction"

829 return instr_mod, dataset_type

830

831

832def _convert_to_numeric(val):

833 if isinstance(val, str):

834 if "." in val:

835 try:

836 return float(val)

837 except ValueError:

838 return val

839 else:

840 try:

841 return int(val)

842 except ValueError:

843 return val

844 else:

845 return val

846

847

848# Backward compatibility function for tests

849def get_ser_metadata(filename):

850 """

851 Get metadata from a .ser file and its accompanying .emi file.

852

853 .. deprecated::

854 This function is deprecated. Use SerEmiExtractor class instead.

855

856 Parameters

857 ----------

858 filename : pathlib.Path

859 path to a file saved in the harvested directory of the instrument

860

861 Returns

862 -------

863 mdict : dict

864 A description of the file's metadata.

865 """

866 context = ExtractionContext(

867 file_path=filename, instrument=get_instr_from_filepath(filename)

868 )

869 extractor = SerEmiExtractor()

870 return extractor.extract(context)

Coverage for nexusLIMS/extractors/plugins/fei_emi.py: 100%

276 statements