Coverage for nexusLIMS/extractors/plugins/tescan

1# ruff: noqa: N817, FBT003

2"""Tescan (P)FIB/SEM TIFF extractor plugin."""

4import configparser

5import contextlib

6import io

7import logging

8from decimal import Decimal

9from pathlib import Path

10from typing import Any, ClassVar

12from PIL import Image

14from nexusLIMS.extractors.base import ExtractionContext

15from nexusLIMS.extractors.base import FieldDefinition as FD

16from nexusLIMS.extractors.utils import _set_instr_name_and_time, add_to_extensions

17from nexusLIMS.schemas.units import ureg

18from nexusLIMS.utils.dicts import set_nested_dict_value, sort_dict

20TESCAN_TIFF_TAG = 50431

21"""

22TIFF tag ID where Tescan stores INI-style metadata in TIFF files.

23The tag contains holds instrument configuration, beam parameters, stage position,

24detector settings, and other acquisition metadata.

25"""

27_MAX_ASCII_VALUE = 128

28"""Maximum value for ASCII characters. Used to filter non-ASCII binary data."""

30_logger = logging.getLogger(__name__)

33class TescanTiffExtractor:

34 """

35 Extractor for Tescan FIB/SEM TIFF files.

37 This extractor handles metadata extraction from .tif files saved by

38 Tescan FIB and SEM instruments (e.g., AMBER X). The extractor uses

39 a two-tier strategy:

41 1. Primary: Look for sidecar .hdr file with full metadata in INI format

42 2. Fallback: Extract basic metadata from TIFF tags if no .hdr file exists

44 The .hdr file contains comprehensive acquisition parameters in two sections:

45 [MAIN] and [SEM], which are parsed using Python's configparser.

46 """

48 name = "tescan_tif_extractor"

49 priority = 150

50 supported_extensions: ClassVar = {"tif", "tiff"}

52 def supports(self, context: ExtractionContext) -> bool:

53 """

54 Check if this extractor supports the given file.

56 Performs content sniffing to verify this is a Tescan TIFF file by:

57 1. Checking file extension (.tif or .tiff)

58 2. Looking for either a sidecar .hdr file or Tescan-specific TIFF tags

60 Parameters

61 ----------

62 context

63 The extraction context containing file information

65 Returns

66 -------

67 bool

68 True if this appears to be a Tescan TIFF file

69 """

70 extension = context.file_path.suffix.lower().lstrip(".")

71 if extension not in {"tif", "tiff"}:

72 return False

74 # Check for sidecar HDR file

75 hdr_file = self._find_hdr_file(context.file_path)

76 if hdr_file is not None and self._is_tescan_hdr(hdr_file):

77 return True

79 # Fallback: check TIFF tags for Tescan signature

80 try:

81 with Image.open(context.file_path) as img:

82 # Check for TESCAN in Make tag (271) or Software tag (305)

83 make = img.tag_v2.get(271, "")

84 software = img.tag_v2.get(305, "")

85 if "TESCAN" in str(make).upper() or "TESCAN" in str(software).upper():

86 return True

87 # check for custom Tescan metadata tag

88 tescan_metadata = img.tag_v2.get(TESCAN_TIFF_TAG, "")

89 if tescan_metadata != "":

90 return True

91 except Exception as e:

92 _logger.debug(

93 "Could not read TIFF tags from %s: %s",

94 context.file_path,

95 e,

96 )

97 return False

99 return False

100

101 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]:

102 """

103 Extract metadata from a Tescan FIB/SEM TIFF file.

104

105 Returns the metadata (as a list of dictionaries) from a .tif file saved by

106 Tescan instruments. Uses a three-tier extraction strategy:

107 1. Try to parse embedded HDR metadata from TIFF Tag 50431

108 2. If that fails, look for a sidecar .hdr file

109 3. Always extract basic TIFF tags as well

110

111 Parameters

112 ----------

113 context

114 The extraction context containing file information

115

116 Returns

117 -------

118 list[dict]

119 List containing a single metadata dict with 'nx_meta' key

120 """

121 filename = context.file_path

122 _logger.debug("Extracting metadata from Tescan TIFF file: %s", filename)

123

124 mdict = {"nx_meta": {}}

125 # Assume all datasets coming from Tescan are SEM Images, originally

126 mdict["nx_meta"]["DatasetType"] = "Image"

127 mdict["nx_meta"]["Data Type"] = "SEM_Imaging"

128

129 _set_instr_name_and_time(mdict, filename)

130

131 hdr_parsed = False

132

133 # Strategy 1: Try to parse embedded HDR metadata from TIFF tag 50431

134 try:

135 embedded_metadata = self._extract_embedded_hdr(filename)

136 if embedded_metadata:

137 mdict.update(embedded_metadata)

138 mdict = self._parse_nx_meta(mdict)

139 hdr_parsed = True

140 _logger.debug("Successfully parsed embedded HDR from TIFF tag")

141 except Exception as e:

142 _logger.debug("Could not parse embedded HDR metadata: %s", e)

143

144 # Strategy 2: If embedded parsing failed, try sidecar HDR file

145 if not hdr_parsed:

146 hdr_file = self._find_hdr_file(filename)

147 if hdr_file is not None and self._is_tescan_hdr(hdr_file):

148 try:

149 hdr_metadata = self._read_hdr_metadata(hdr_file)

150 mdict.update(hdr_metadata)

151 mdict = self._parse_nx_meta(mdict)

152 hdr_parsed = True

153 _logger.debug("Successfully parsed sidecar HDR file")

154 except Exception as e:

155 _logger.warning(

156 "Failed to parse HDR file %s: %s",

157 hdr_file,

158 e,

159 )

160

161 # Strategy 3: Always extract basic TIFF tags (may supplement or override)

162 self._extract_from_tiff_tags(filename, mdict)

163

164 # Migrate metadata to schema-compliant format

165 mdict = self._migrate_to_schema_compliant_metadata(mdict)

166

167 # Sort the nx_meta dictionary (recursively) for nicer display

168 mdict["nx_meta"] = sort_dict(mdict["nx_meta"])

169

170 return [mdict]

171

172 def _find_hdr_file(self, tiff_path: Path) -> Path | None:

173 """

174 Find the sidecar .hdr file for a given TIFF file.

175

176 Parameters

177 ----------

178 tiff_path

179 Path to the TIFF file

180

181 Returns

182 -------

183 Path or None

184 Path to the .hdr file if it exists, None otherwise

185 """

186 hdr_path = tiff_path.with_suffix(".hdr")

187 if hdr_path.exists():

188 return hdr_path

189 return None

190

191 def _is_tescan_hdr(self, hdr_path: Path) -> bool:

192 """

193 Verify that an HDR file is a Tescan format file.

194

195 Checks for the presence of [MAIN] and [SEM] sections which are

196 characteristic of Tescan HDR files.

197

198 Parameters

199 ----------

200 hdr_path

201 Path to the .hdr file

202

203 Returns

204 -------

205 bool

206 True if this appears to be a Tescan HDR file

207 """

208 try:

209 with hdr_path.open("r", encoding="utf-8", errors="ignore") as f:

210 content = f.read(500) # Read first 500 chars

211 # Look for characteristic Tescan sections

212 return "[MAIN]" in content or "Device=TESCAN" in content

213 except Exception as e:

214 _logger.debug("Could not verify HDR file %s: %s", hdr_path, e)

215 return False

216

217 def _extract_embedded_hdr(

218 self, tiff_path: Path

219 ) -> dict[str, dict[str, str]] | None:

220 """

221 Extract embedded HDR metadata from TIFF Tag TESCAN_TIFF_TAG.

222

223 Tescan embeds the complete HDR metadata in TIFF tag TESCAN_TIFF_TAG as a

224 binary blob containing the INI-formatted text. The tag may contain binary

225 garbage at the beginning before the actual metadata starts.

226

227 Parameters

228 ----------

229 tiff_path

230 Path to the TIFF file

231

232 Returns

233 -------

234 dict or None

235 Dictionary with section names as keys and key-value dicts as values,

236 or None if tag is not present or cannot be parsed

237 """

238 try:

239 with Image.open(tiff_path) as img:

240 metadata_tag = img.tag_v2.get(TESCAN_TIFF_TAG)

241 if metadata_tag is None:

242 return None

243

244 # Convert tag to bytes

245 metadata_bytes = self._tag_to_bytes(metadata_tag)

246

247 # Extract metadata string from binary data

248 metadata_str = self._extract_metadata_string(metadata_bytes)

249

250 # Clean up non-printable characters

251 metadata_str = self._clean_metadata_string(metadata_str)

252

253 # Add section headers if missing

254 metadata_str = self._add_section_headers_if_needed(metadata_str)

255

256 # Parse as INI format

257 return self._parse_hdr_string(metadata_str)

258

259 except Exception as e:

260 _logger.debug("Failed to extract embedded HDR from tag 50431: %s", e)

261 return None

262

263 def _tag_to_bytes(self, metadata_tag: Any) -> bytes:

264 """Convert TIFF tag data to bytes.

265

266 Parameters

267 ----------

268 metadata_tag

269 Tag data in various formats (bytes, str, etc.)

270

271 Returns

272 -------

273 bytes

274 Converted bytes

275

276 Raises

277 ------

278 TypeError

279 If tag data is not bytes or str

280 """

281 if isinstance(metadata_tag, bytes):

282 return metadata_tag

283 if isinstance(metadata_tag, str):

284 return metadata_tag.encode("utf-8")

285 msg = f"Unsupported metadata tag type: {type(metadata_tag)}"

286 raise TypeError(msg)

287

288 def _extract_metadata_string(self, metadata_bytes: bytes) -> str:

289 """Extract metadata string from binary data by removing garbage.

290

291 The tag may contain binary garbage at the beginning. This method looks

292 for known keys to find the start of actual metadata.

293

294 Parameters

295 ----------

296 metadata_bytes

297 Raw binary metadata from TIFF tag

298

299 Returns

300 -------

301 str

302 Cleaned metadata string

303 """

304 # Look for the start of metadata by searching for known keys

305 search_keys = [b"[MAIN]", b"AccFrames=", b"AccType=", b"Company=", b"Date="]

306 for search_key in search_keys:

307 pos = metadata_bytes.find(search_key)

308 if pos >= 0:

309 metadata_bytes = metadata_bytes[pos:]

310 return metadata_bytes.replace(b"\x00", b"").decode(

311 "utf-8", errors="ignore"

312 )

313

314 # Fallback: decode whole thing

315 return metadata_bytes.replace(b"\x00", b"").decode("utf-8", errors="ignore")

316

317 def _clean_metadata_string(self, metadata_str: str) -> str:

318 """Remove non-printable binary characters from metadata string.

319

320 Parameters

321 ----------

322 metadata_str

323 Metadata string that may contain non-printable characters

324

325 Returns

326 -------

327 str

328 Cleaned metadata string

329 """

330 return "".join(

331 c

332 for c in metadata_str

333 if ord(c) < _MAX_ASCII_VALUE and (c.isprintable() or c in "\n\r\t")

334 )

335

336 def _add_section_headers_if_needed(self, metadata_str: str) -> str:

337 """Add [MAIN] and [SEM] section headers if missing.

338

339 Tescan's embedded metadata doesn't include section headers, so this

340 method detects where the SEM section starts and inserts headers.

341

342 Parameters

343 ----------

344 metadata_str

345 Metadata string potentially without section headers

346

347 Returns

348 -------

349 str

350 Metadata string with section headers

351 """

352 if "[MAIN]" in metadata_str or "[SEM]" in metadata_str:

353 return metadata_str

354

355 # Find where SEM section starts by looking for known SEM keys

356 sem_keys = [

357 "AcceleratorVoltage=",

358 "ApertureDiameter=",

359 "ApertureOptimization=",

360 "ChamberPressure=",

361 "CrossFree=",

362 "HV=",

363 ]

364 sem_start_pos = self._find_sem_section_start(metadata_str, sem_keys)

365

366 # Insert section headers at line boundaries

367 if sem_start_pos < len(metadata_str):

368 line_start = metadata_str.rfind("\n", 0, sem_start_pos)

369 if line_start < 0:

370 line_start = 0

371 else:

372 line_start += 1 # Move past the \n

373 return (

374 "[MAIN]\n"

375 + metadata_str[:line_start]

376 + "[SEM]\n"

377 + metadata_str[line_start:]

378 )

379

380 # No SEM section found

381 return "[MAIN]\n" + metadata_str

382

383 def _find_sem_section_start(self, metadata_str: str, sem_keys: list[str]) -> int:

384 """Find the position where SEM section starts.

385

386 Parameters

387 ----------

388 metadata_str

389 Metadata string to search

390 sem_keys

391 List of keys that typically appear in SEM section

392

393 Returns

394 -------

395 int

396 Position of first SEM key, or length of string if not found

397 """

398 sem_start_pos = len(metadata_str)

399 for sem_key in sem_keys:

400 pos = metadata_str.find(sem_key)

401 if pos >= 0 and pos < sem_start_pos:

402 sem_start_pos = pos

403 return sem_start_pos

404

405 def _parse_hdr_string(self, hdr_string: str) -> dict[str, dict[str, str]]:

406 """

407 Parse HDR metadata from a string in INI format.

408

409 Parameters

410 ----------

411 hdr_string

412 HDR metadata as a string in INI format

413

414 Returns

415 -------

416 dict

417 Dictionary with section names as keys and key-value dicts as values

418 """

419 # Normalize line endings

420 hdr_string = hdr_string.replace("\r\n", "\n").replace("\r", "\n")

421

422 # Parse with ConfigParser

423 config = configparser.ConfigParser()

424 # Make ConfigParser respect upper/lowercase values

425 config.optionxform = lambda option: option

426

427 # Use StringIO to read from string

428 buf = io.StringIO(hdr_string)

429 config.read_file(buf)

430

431 metadata = {}

432 for section in config.sections():

433 metadata[section] = dict(config.items(section))

434

435 return metadata

436

437 def _read_hdr_metadata(self, hdr_path: Path) -> dict[str, dict[str, str]]:

438 """

439 Read and parse a Tescan .hdr file.

440

441 The .hdr file is in INI format with sections like [MAIN] and [SEM].

442

443 Parameters

444 ----------

445 hdr_path

446 Path to the .hdr file

447

448 Returns

449 -------

450 dict

451 Dictionary with section names as keys and key-value dicts as values

452 """

453 with hdr_path.open("r", encoding="utf-8", errors="ignore") as f:

454 hdr_string = f.read()

455

456 return self._parse_hdr_string(hdr_string)

457

458 def _extract_from_tiff_tags(self, filename: Path, mdict: dict) -> None:

459 """

460 Extract basic metadata from TIFF tags.

461

462 This supplements metadata from HDR files with standard TIFF tags.

463 Only adds fields that haven't already been set by HDR parsing.

464 Updates mdict in place.

465

466 Parameters

467 ----------

468 filename

469 Path to the TIFF file

470 mdict

471 Metadata dictionary to update

472 """

473 try:

474 with Image.open(filename) as img:

475 # Extract standard TIFF tags

476 # 271 = Make

477 # 272 = Model

478 # 305 = Software

479 # 306 = DateTime

480 # 315 = Artist (username)

481

482 # Only add Make if not already present

483 if "Make" not in mdict["nx_meta"]:

484 make = img.tag_v2.get(271)

485 if make:

486 mdict["nx_meta"]["Make"] = make

487

488 # Only add Model if not already present

489 if "Model" not in mdict["nx_meta"]:

490 model = img.tag_v2.get(272)

491 if model:

492 mdict["nx_meta"]["Model"] = model

493

494 # Only add Software Version if not already present

495 if "Software Version" not in mdict["nx_meta"]:

496 software = img.tag_v2.get(305)

497 if software:

498 mdict["nx_meta"]["Software Version"] = software

499

500 # Always add TIFF DateTime as supplemental info

501 datetime_str = img.tag_v2.get(306)

502 if datetime_str:

503 mdict["nx_meta"]["TIFF DateTime"] = datetime_str

504

505 # Only add Operator from Artist tag if not already present

506 if "Operator" not in mdict["nx_meta"]:

507 artist = img.tag_v2.get(315)

508 if artist:

509 mdict["nx_meta"]["Operator"] = artist

510

511 # Only add dimensions if not already present

512 if "Data Dimensions" not in mdict["nx_meta"]:

513 width = img.tag_v2.get(256) # ImageWidth

514 height = img.tag_v2.get(257) # ImageLength

515 if width and height:

516 mdict["nx_meta"]["Data Dimensions"] = str((width, height))

517

518 except Exception as e:

519 _logger.warning("Failed to extract TIFF tags from %s: %s", filename, e)

520 mdict["nx_meta"]["Extractor Warnings"] = f"Failed to extract TIFF tags: {e}"

521

522 def _get_field_definitions(self) -> list:

523 """

524 Get field definitions for metadata extraction.

525

526 Returns

527 -------

528 list

529 List of FieldDefinition tuples

530 """

531 return [

532 # [MAIN] section - in order as they appear in HDR file

533 FD("MAIN", "AccFrames", "Accumulated Frames", 1, False),

534 FD("MAIN", "AccType", "Accumulation Type", 1, True),

535 FD("MAIN", "Company", "Company", 1, True),

536 FD("MAIN", "Date", "Acquisition Date", 1, True),

537 FD("MAIN", "Description", "Description", 1, True),

538 FD("MAIN", "Device", "Device", 1, True),

539 FD("MAIN", "DeviceModel", "Device Model", 1, True),

540 FD("MAIN", "FullUserName", "Full User Name", 1, True),

541 FD("MAIN", "ImageStripSize", "Image Strip Size", 1, False),

542 FD(

543 "MAIN",

544 "Magnification",

545 "Magnification",

546 1e-3,

547 False,

548 target_unit="kiloX",

549 ),

550 FD("MAIN", "MagnificationReference", "Magnification Reference", 1, False),

551 FD("MAIN", "OrigFileName", "Original Filename", 1, True),

552 FD(

553 "MAIN", "PixelSizeX", "Pixel Width", 1e9, False, target_unit="nanometer"

554 ),

555 FD(

556 "MAIN",

557 "PixelSizeY",

558 "Pixel Height",

559 1e9,

560 False,

561 target_unit="nanometer",

562 ),

563 FD("MAIN", "SerialNumber", "Serial Number", 1, True),

564 FD("MAIN", "Sign", "Sign", 1, True),

565 FD("MAIN", "SoftwareVersion", "Software Version", 1, True),

566 FD("MAIN", "Time", "Acquisition Time", 1, True),

567 FD("MAIN", "UserName", "User Name", 1, True),

568 FD("MAIN", "ViewFieldsCountX", "View Fields Count X", 1, False),

569 FD("MAIN", "ViewFieldsCountY", "View Fields Count Y", 1, False),

570 # [SEM] section - in order as they appear in HDR file

571 FD(

572 "SEM",

573 "AcceleratorVoltage",

574 "Accelerator Voltage",

575 1e-3,

576 False,

577 target_unit="kilovolt",

578 ),

579 FD(

580 "SEM",

581 "ApertureDiameter",

582 "Aperture Diameter",

583 1e6,

584 False,

585 target_unit="micrometer",

586 ),

587 FD("SEM", "ApertureOptimization", "Aperture Optimization", 1, False),

588 FD(

589 "SEM",

590 "ChamberPressure",

591 "Chamber Pressure",

592 1e3,

593 False,

594 target_unit="millipascal",

595 ),

596 FD("SEM", "CrossFree", "Cross Free", 1, False),

597 FD(

598 "SEM",

599 "CrossSectionShiftX",

600 "Cross Section Shift X",

601 1e6,

602 False,

603 target_unit="micrometer",

604 ),

605 FD(

606 "SEM",

607 "CrossSectionShiftY",

608 "Cross Section Shift Y",

609 1e6,

610 False,

611 target_unit="micrometer",

612 ),

613 FD(

614 "SEM",

615 "DepthOfFocus",

616 "Depth of Focus",

617 1e6,

618 False,

619 target_unit="micrometer",

620 ),

621 FD("SEM", "Detector", "Detector Name", 1, True),

622 FD("SEM", "Detector0", "Detector 0", 1, True),

623 FD("SEM", "Detector0FlatField", "Detector 0 Flat Field", 1, False),

624 FD("SEM", "Detector0Gain", "Detector 0 Gain", 1, False),

625 FD("SEM", "Detector0Offset", "Detector 0 Offset", 1, False),

626 FD(

627 "SEM",

628 "DwellTime",

629 "Pixel Dwell Time",

630 1e6,

631 False,

632 target_unit="microsecond",

633 ),

634 FD(

635 "SEM",

636 "EmissionCurrent",

637 "Emission Current",

638 1e6,

639 False,

640 target_unit="microampere",

641 ),

642 FD("SEM", "Gun", "Gun Type", 1, True),

643 FD("SEM", "GunShiftX", "Gun Shift X", 1, False),

644 FD("SEM", "GunShiftY", "Gun Shift Y", 1, False),

645 FD("SEM", "GunTiltX", "Gun Tilt X", 1, False),

646 FD("SEM", "GunTiltY", "Gun Tilt Y", 1, False),

647 FD("SEM", "HV", "HV Voltage", 1e-3, False, target_unit="kilovolt"),

648 FD("SEM", "IMLCenteringX", "IML Centering X", 1, False),

649 FD("SEM", "IMLCenteringY", "IML Centering Y", 1, False),

650 FD(

651 "SEM",

652 "ImageShiftX",

653 "Image Shift X",

654 1e9,

655 False,

656 target_unit="nanometer",

657 ),

658 FD(

659 "SEM",

660 "ImageShiftY",

661 "Image Shift Y",

662 1e9,

663 False,

664 target_unit="nanometer",

665 ),

666 FD("SEM", "InjectedGas", "Injected Gas", 1, True),

667 FD("SEM", "LUTGamma", "LUT Gamma", 1, False),

668 FD("SEM", "LUTMaximum", "LUT Maximum", 1, False),

669 FD("SEM", "LUTMinimum", "LUT Minimum", 1, False),

670 FD("SEM", "MTDGrid", "MTD Grid", 1e-3, False, target_unit="kilovolt"),

671 FD(

672 "SEM",

673 "MTDScintillator",

674 "MTD Scintillator",

675 1e-3,

676 False,

677 target_unit="kilovolt",

678 ),

679 FD("SEM", "OBJCenteringX", "OBJ Centering X", 1, False),

680 FD("SEM", "OBJCenteringY", "OBJ Centering Y", 1, False),

681 FD("SEM", "OBJPreCenteringX", "OBJ Pre-Centering X", 1, False),

682 FD("SEM", "OBJPreCenteringY", "OBJ Pre-Centering Y", 1, False),

683 FD("SEM", "PotentialMode", "Potential Mode", 1, True),

684 FD(

685 "SEM",

686 "PredictedBeamCurrent",

687 "Predicted Beam Current",

688 1e12,

689 False,

690 target_unit="picoampere",

691 ),

692 FD("SEM", "PrimaryDetectorGain", "Primary Detector Gain", 1, False),

693 FD("SEM", "PrimaryDetectorOffset", "Primary Detector Offset", 1, False),

694 FD("SEM", "SampleVoltage", "Sample Voltage", 1, False, target_unit="volt"),

695 FD("SEM", "ScanID", "Scan ID", 1, False),

696 FD("SEM", "ScanMode", "Scan Mode", 1, True),

697 FD("SEM", "ScanRotation", "Scan Rotation", 1, False, target_unit="degree"),

698 FD("SEM", "ScanSpeed", "Scan Speed", 1, False),

699 FD("SEM", "SessionID", "Session ID", 1, True),

700 FD(

701 "SEM",

702 "SpecimenCurrent",

703 "Specimen Current",

704 1e12,

705 False,

706 target_unit="picoampere",

707 ),

708 FD("SEM", "SpotSize", "Spot Size", 1e9, False, target_unit="nanometer"),

709 FD(

710 "SEM",

711 "StageRotation",

712 ["Stage Position", "Rotation"],

713 1,

714 False,

715 target_unit="degree",

716 ),

717 FD(

718 "SEM",

719 "StageTilt",

720 ["Stage Position", "Tilt"],

721 1,

722 False,

723 target_unit="degree",

724 ),

725 FD("SEM", "StageX", ["Stage Position", "X"], 1, False, target_unit="meter"),

726 FD("SEM", "StageY", ["Stage Position", "Y"], 1, False, target_unit="meter"),

727 FD("SEM", "StageZ", ["Stage Position", "Z"], 1, False, target_unit="meter"),

728 FD("SEM", "StigmatorX", "Stigmator X Value", 1, False),

729 FD("SEM", "StigmatorY", "Stigmator Y Value", 1, False),

730 FD(

731 "SEM",

732 "SymmetrizationVoltage",

733 "Symmetrization Voltage",

734 1e-3,

735 False,

736 target_unit="kilovolt",

737 ),

738 FD("SEM", "SyncMains", "Sync to Mains", 1, True),

739 FD("SEM", "TiltCorrection", "Tilt Correction", 1, False),

740 FD(

741 "SEM",

742 "TubeVoltage",

743 "Tube Voltage",

744 1e-3,

745 False,

746 target_unit="kilovolt",

747 ),

748 FD(

749 "SEM",

750 "VirtualObserverDistance",

751 "Virtual Observer Distance",

752 1e3,

753 False,

754 target_unit="millimeter",

755 ),

756 FD("SEM", "WD", "Working Distance", 1e3, False, target_unit="millimeter"),

757 ]

758

759 def _parse_nx_meta(self, mdict: dict) -> dict: # noqa: PLR0912

760 """

761 Parse metadata into NexusLIMS format.

762

763 Extracts important metadata from the [MAIN] and [SEM] sections

764 of the HDR file and places them in standardized locations under

765 the nx_meta key.

766

767 Parameters

768 ----------

769 mdict

770 Metadata dictionary with [MAIN] and [SEM] sections

771

772 Returns

773 -------

774 dict

775 Updated metadata dictionary with parsed nx_meta fields

776 """

777 # Initialize warnings list

778 if "warnings" not in mdict["nx_meta"]:

779 mdict["nx_meta"]["warnings"] = []

780

781 main_section = mdict.get("MAIN", {})

782 sem_section = mdict.get("SEM", {})

783

784 # Get field definitions

785 fields = self._get_field_definitions()

786

787 # Extract standard fields

788 for field in fields:

789 section = main_section if field.section == "MAIN" else sem_section

790 value = section.get(field.source_key)

791

792 # Try fallback keys for some fields

793 if value is None and field.source_key == "HV":

794 value = sem_section.get("AcceleratorVoltage")

795 elif value is None and field.source_key == "Detector0Gain":

796 value = sem_section.get("PrimaryDetectorGain")

797 elif value is None and field.source_key == "Detector0Offset":

798 value = sem_section.get("PrimaryDetectorOffset")

799

800 if value:

801 if field.is_string:

802 # Handle nested dict paths vs flat keys

803 # (impossible to test with existing metadata structure,

804 # so exclude from coverage)

805 if isinstance(field.output_key, list): # pragma: no cover

806 set_nested_dict_value(

807 mdict, ["nx_meta", *field.output_key], value

808 )

809 else:

810 mdict["nx_meta"][field.output_key] = value

811 else:

812 with contextlib.suppress(ValueError):

813 # Convert to Decimal to preserve precision through unit

814 # conversions. The ureg uses non_int_type=Decimal to avoid

815 # floating-point errors during internal conversions.

816 # Also apply scaling factor for unit conversion

817 decimal_val = Decimal(value) * Decimal(str(field.factor))

818

819 # Skip if suppress_zero is True and value is zero

820 if field.suppress_zero and decimal_val == 0:

821 continue

822

823 # Create Pint Quantity if unit is specified

824 if field.target_unit:

825 # Create Quantity with the value after factor conversion

826 quantity = ureg.Quantity(decimal_val, field.target_unit)

827

828 if isinstance(field.output_key, list):

829 set_nested_dict_value(

830 mdict, ["nx_meta", *field.output_key], quantity

831 )

832 else:

833 mdict["nx_meta"][field.output_key] = quantity

834 # No unit specified, keep as Decimal for precision

835 elif isinstance(field.output_key, list):

836 set_nested_dict_value(

837 mdict, ["nx_meta", *field.output_key], decimal_val

838 )

839 else:

840 mdict["nx_meta"][field.output_key] = decimal_val

841

842 # Handle user information (prefer FullUserName over UserName)

843 full_username = main_section.get("FullUserName")

844 username = main_section.get("UserName")

845 if full_username or username:

846 mdict["nx_meta"]["Operator"] = full_username or username

847 mdict["nx_meta"]["warnings"].append(["Operator"])

848

849 return mdict

850

851 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict:

852 """

853 Migrate metadata to schema-compliant format.

854

855 Reorganizes metadata to conform to type-specific Pydantic schemas:

856 - Extracts core EM Glossary fields to top level with standardized names

857 - Moves vendor-specific nested dictionaries and fields to extensions section

858 - Preserves existing extensions from instrument profiles

859

860 Parameters

861 ----------

862 mdict

863 Metadata dictionary with nx_meta containing extracted fields

864

865 Returns

866 -------

867 dict

868 Metadata dictionary with schema-compliant nx_meta structure

869 """

870 nx_meta = mdict.get("nx_meta", {})

871

872 # Preserve existing extensions from instrument profiles

873 extensions = (

874 nx_meta.get("extensions", {}).copy() if "extensions" in nx_meta else {}

875 )

876

877 # Field mappings from display names to EM Glossary names

878 field_mappings = {

879 "HV Voltage": "acceleration_voltage",

880 "Accelerator Voltage": "acceleration_voltage",

881 "Working Distance": "working_distance",

882 "Beam Current": "beam_current",

883 "Emission Current": "emission_current",

884 "Pixel Dwell Time": "dwell_time",

885 "Horizontal Field Width": "horizontal_field_width",

886 "Pixel Width": "pixel_width",

887 "Pixel Height": "pixel_height",

888 }

889

890 # Tescan-specific fields that go to extensions (ALL non-core fields)

891 # Since tescan extractor currently extracts many individual fields at top level,

892 # we move them all to extensions except the core EM Glossary ones

893 extension_field_names = {

894 "Operator", # User info

895 # Any other Tescan-specific fields we discover

896 }

897

898 # Build new nx_meta with proper field organization

899 new_nx_meta = {}

900

901 # Copy required fields

902 for field in ["DatasetType", "Data Type", "Creation Time"]:

903 if field in nx_meta:

904 new_nx_meta[field] = nx_meta[field]

905

906 # Copy instrument identification

907 if "Instrument ID" in nx_meta:

908 new_nx_meta["Instrument ID"] = nx_meta["Instrument ID"]

909

910 # Process all fields and categorize

911 for old_name, value in nx_meta.items():

912 # Skip fields we've already handled

913 if old_name in [

914 "DatasetType",

915 "Data Type",

916 "Creation Time",

917 "Instrument ID",

918 "Extractor Warnings",

919 "warnings",

920 "extensions",

921 ]:

922 continue

923

924 # Check if this is a core field that needs renaming

925 if old_name in field_mappings:

926 emg_name = field_mappings[old_name]

927 new_nx_meta[emg_name] = value

928 continue

929

930 # Fields explicitly marked as extensions

931 if old_name in extension_field_names:

932 extensions[old_name] = value

933 continue

934

935 # Everything else goes to extensions (Tescan-specific fields)

936 # This is the safest approach since most Tescan fields are vendor-specific

937 extensions[old_name] = value

938

939 # Copy warnings if present

940 if "warnings" in nx_meta:

941 new_nx_meta["warnings"] = nx_meta["warnings"]

942

943 # Add extensions section if we have any

944 for key, value in extensions.items():

945 add_to_extensions(new_nx_meta, key, value)

946

947 mdict["nx_meta"] = new_nx_meta

948 return mdict

Coverage for nexusLIMS/extractors/plugins/tescan_tif.py: 100%

247 statements