Coverage for nexusLIMS/schemas/em_glossary.py: 100%

116 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1""" 

2EM Glossary field name mappings for NexusLIMS metadata. 

3 

4This module provides mappings between NexusLIMS internal field names, display names, 

5and EM Glossary (EMG) standardized terminology. The EM Glossary is a community-driven 

6ontology for electron microscopy metadata maintained by the Helmholtz Metadata 

7Collaboration. 

8 

9The module uses RDFLib to parse the EM Glossary OWL ontology file, providing access 

10to term labels, definitions, and the full semantic structure. 

11 

12**EM Glossary Version:** v2.0.0 

13 

14**References:** 

15- EM Glossary v2.0.0: [https://purls.helmholtz-metadaten.de/emg/v2.0.0/](https://purls.helmholtz-metadaten.de/emg/v2.0.0/) 

16- OWL Ontology: Shipped with NexusLIMS at 

17 `nexusLIMS/schemas/references/em_glossary_2.0.owl` 

18- License: CC BY 4.0 [https://creativecommons.org/licenses/by/4.0/](https://creativecommons.org/licenses/by/4.0/) 

19 

20The mappings in this module enable: 

21- Standardized field names across instruments and vendors 

22- Cross-reference to EM Glossary IDs for semantic interoperability 

23- Human-readable display names for XML output 

24- Dynamic loading from the OWL ontology using [RDFLib](https://rdflib.readthedocs.io/en/stable/index.html) 

25 

26Examples 

27-------- 

28Get EM Glossary ID for a field: 

29 

30>>> from nexusLIMS.schemas.em_glossary import get_emg_id 

31>>> get_emg_id("acceleration_voltage") 

32'EMG_00000004' 

33 

34Get display name for XML: 

35 

36>>> from nexusLIMS.schemas.em_glossary import get_display_name 

37>>> get_display_name("acceleration_voltage") 

38'Voltage' 

39 

40Get EMG label from ID: 

41 

42>>> from nexusLIMS.schemas.em_glossary import get_emg_label 

43>>> get_emg_label("EMG_00000004") 

44'Acceleration Voltage' 

45 

46Get EMG definition: 

47 

48>>> from nexusLIMS.schemas.em_glossary import get_emg_definition 

49>>> defn = get_emg_definition("EMG_00000004") 

50>>> print(defn) 

51The potential difference between anode and cathode. 

52 

53Check if field has EMG mapping: 

54 

55>>> from nexusLIMS.schemas.em_glossary import has_emg_id 

56>>> has_emg_id("acceleration_voltage") 

57True 

58>>> has_emg_id("custom_vendor_field") 

59False 

60""" 

61 

62import logging 

63from functools import lru_cache 

64from pathlib import Path 

65from typing import Dict 

66 

67from rdflib import RDF, RDFS, Graph, Namespace 

68 

69_logger = logging.getLogger(__name__) 

70 

71EMG_OWL_PATH = Path(__file__).parent / "references" / "em_glossary_2.0.owl" 

72"""Path to the EM Glossary OWL file shipped with NexusLIMS""" 

73 

74EMG_VERSION = "v2.0.0" 

75"""Version of the packaged EM Glossary OWL file""" 

76 

77EMG = Namespace("https://purls.helmholtz-metadaten.de/emg/") 

78"""RDF Namespace for the EM Glossary""" 

79 

80OBO = Namespace("http://purl.obolibrary.org/obo/") 

81"""RDF Namespace for OBO""" 

82 

83 

84@lru_cache(maxsize=1) 

85def _load_emg_graph() -> Graph: 

86 """ 

87 Load the EM Glossary ontology RDF graph. 

88 

89 Parses the OWL/RDF file and returns an RDFLib Graph object. 

90 Results are cached for performance. 

91 

92 Returns 

93 ------- 

94 rdflib.Graph 

95 The parsed RDF graph 

96 

97 Raises 

98 ------ 

99 FileNotFoundError 

100 If the OWL file cannot be found 

101 ValueError 

102 If the OWL file cannot be parsed 

103 """ 

104 if not EMG_OWL_PATH.exists(): 

105 msg = f"EM Glossary OWL file not found at {EMG_OWL_PATH}" 

106 raise FileNotFoundError(msg) 

107 

108 try: 

109 g = Graph() 

110 g.parse(EMG_OWL_PATH, format="xml") 

111 _logger.debug("Loaded EM Glossary ontology from %s", EMG_OWL_PATH) 

112 _logger.debug("Graph contains %s triples", len(g)) 

113 except Exception as e: 

114 msg = f"Failed to parse EM Glossary OWL file: {e}" 

115 raise ValueError(msg) from e 

116 return g 

117 

118 

119@lru_cache(maxsize=1) 

120def _load_emg_terms() -> Dict[str, Dict[str, str]]: 

121 """ 

122 Load EM Glossary terms with labels and definitions. 

123 

124 Extracts all EMG terms from the ontology graph with their labels 

125 and definitions (if available). 

126 

127 Returns 

128 ------- 

129 dict[str, dict[str, str]] 

130 Mapping from EMG_ID -> {'label': str, 'definition': str | None} 

131 

132 Examples 

133 -------- 

134 >>> terms = _load_emg_terms() 

135 >>> terms['EMG_00000004']['label'] 

136 'Acceleration Voltage' 

137 """ 

138 g = _load_emg_graph() 

139 

140 emg_terms = {} 

141 

142 # Query for all EMG Class URIs with labels 

143 for s in g.subjects(RDF.type, None): 

144 uri_str = str(s) 

145 if not uri_str.startswith(str(EMG)): 

146 continue 

147 

148 # Extract EMG ID from URI 

149 emg_id = uri_str.split("/")[-1] 

150 if not emg_id.startswith("EMG_"): 

151 continue 

152 

153 # Get label 

154 label = None 

155 for o in g.objects(s, RDFS.label): 

156 label = str(o) 

157 break # Take first label 

158 

159 if label is None: 

160 continue 

161 

162 # Get definition (IAO_0000115 is the standard definition property) 

163 definition = None 

164 for o in g.objects(s, OBO.IAO_0000115): 

165 definition = str(o) 

166 break # Take first definition 

167 

168 emg_terms[emg_id] = { 

169 "label": label, 

170 "definition": definition, 

171 } 

172 

173 if not emg_terms: 

174 msg = "No EMG terms found in OWL file. File may be corrupted." 

175 raise ValueError(msg) 

176 

177 _logger.debug("Loaded %s EMG terms from ontology", len(emg_terms)) 

178 return emg_terms 

179 

180 

181# Mapping from NexusLIMS internal field names to EM Glossary terms 

182# Format: internal_field_name -> (display_name, emg_label or None, description) 

183# The emg_label is used to look up the EMG_ID from the OWL file 

184NEXUSLIMS_TO_EMG_MAPPINGS: Dict[str, tuple[str, str | None, str]] = { 

185 # Core acquisition parameters (common to all types) 

186 "creation_time": ( 

187 "Creation Time", 

188 None, # No specific EMG term for timestamp 

189 "ISO-8601 timestamp with timezone", 

190 ), 

191 "data_type": ( 

192 "Data Type", 

193 None, # Descriptive field, not in EMG 

194 "Human-readable data type description", 

195 ), 

196 "dataset_type": ( 

197 "DatasetType", 

198 None, # Schema-defined category 

199 "Schema-defined dataset category", 

200 ), 

201 # Image acquisition parameters (SEM/TEM/STEM) 

202 "acceleration_voltage": ( 

203 "Acceleration Voltage", 

204 "Acceleration Voltage", # EMG label 

205 "Accelerating voltage of the electron/ion beam", 

206 ), 

207 "working_distance": ( 

208 "Working Distance", 

209 "Working Distance", # EMG label 

210 "Distance between final lens and sample surface", 

211 ), 

212 "beam_current": ( 

213 "Beam Current", 

214 "Beam Current", # EMG label 

215 "Electron beam current", 

216 ), 

217 "emission_current": ( 

218 "Emission Current", 

219 "Emission Current", # EMG label 

220 "Emission current from electron source", 

221 ), 

222 "dwell_time": ( 

223 "Pixel Dwell Time", 

224 "Dwell Time", # EMG label 

225 "Time the beam dwells on each pixel during scanning", 

226 ), 

227 "magnification": ( 

228 "Magnification", 

229 None, # EMG has Magnification but it's complex 

230 "Nominal magnification", 

231 ), 

232 "horizontal_field_width": ( 

233 "Horizontal Field Width", 

234 None, # Not in EMG v2.0.0 

235 "Width of the scanned area", 

236 ), 

237 "vertical_field_width": ( 

238 "Vertical Field Width", 

239 None, # Not in EMG v2.0.0 

240 "Height of the scanned area", 

241 ), 

242 "pixel_width": ( 

243 "Pixel Width", 

244 None, # Not in EMG v2.0.0 

245 "Physical width of a single pixel", 

246 ), 

247 "pixel_height": ( 

248 "Pixel Height", 

249 None, # Not in EMG v2.0.0 

250 "Physical height of a single pixel", 

251 ), 

252 "scan_rotation": ( 

253 "Scan Rotation", 

254 None, # Not in EMG v2.0.0 

255 "Rotation angle of the scan frame", 

256 ), 

257 # Detector information 

258 "detector_type": ( 

259 "Detector", 

260 None, # EMG has detector concepts but not simple type field 

261 "Type or name of detector used", 

262 ), 

263 "acquisition_device": ( 

264 "Acquisition Device", 

265 None, # Similar to detector_type 

266 "Name of the acquisition device or camera", 

267 ), 

268 # Stage position (common to SEM/TEM) 

269 "stage_x": ( 

270 "Stage X", 

271 None, # Part of complex stage position concept 

272 "Stage X coordinate", 

273 ), 

274 "stage_y": ( 

275 "Stage Y", 

276 None, # Part of complex stage position concept 

277 "Stage Y coordinate", 

278 ), 

279 "stage_z": ( 

280 "Stage Z", 

281 None, # Part of complex stage position concept 

282 "Stage Z coordinate", 

283 ), 

284 "stage_tilt": ( 

285 "Stage Tilt", 

286 None, # Part of complex stage position concept 

287 "Stage tilt angle (alpha)", 

288 ), 

289 "stage_rotation": ( 

290 "Stage Rotation", 

291 None, # Part of complex stage position concept 

292 "Stage rotation angle", 

293 ), 

294 "stage_alpha": ( 

295 "Stage Alpha", 

296 None, # Part of complex stage position concept 

297 "Stage alpha tilt angle", 

298 ), 

299 "stage_beta": ( 

300 "Stage Beta", 

301 None, # Part of complex stage position concept 

302 "Stage beta tilt angle", 

303 ), 

304 # Spectrum acquisition parameters (EDS/EELS) 

305 "acquisition_time": ( 

306 "Acquisition Time", 

307 "Acquisition Time", # EMG label 

308 "Total time for spectrum acquisition", 

309 ), 

310 "live_time": ( 

311 "Live Time", 

312 None, # Not in EMG v2.0.0 

313 "Live time (excludes dead time) for spectrum acquisition", 

314 ), 

315 "detector_energy_resolution": ( 

316 "Energy Resolution", 

317 None, # Not in EMG v2.0.0 

318 "Energy resolution of the detector", 

319 ), 

320 "channel_size": ( 

321 "Channel Size", 

322 None, # Not in EMG v2.0.0 

323 "Energy width of each channel", 

324 ), 

325 "starting_energy": ( 

326 "Starting Energy", 

327 None, # Not in EMG v2.0.0 

328 "Starting energy of the spectrum", 

329 ), 

330 "azimuthal_angle": ( 

331 "Azimuthal Angle", 

332 None, # Not in EMG v2.0.0 

333 "Azimuthal angle of the detector", 

334 ), 

335 "elevation_angle": ( 

336 "Elevation Angle", 

337 None, # Not in EMG v2.0.0 

338 "Elevation angle of the detector", 

339 ), 

340 "takeoff_angle": ( 

341 "Takeoff Angle", 

342 None, # Not in EMG v2.0.0 

343 "X-ray takeoff angle", 

344 ), 

345 # Diffraction parameters (TEM) 

346 "camera_length": ( 

347 "Camera Length", 

348 "Camera Length", # EMG label 

349 "Camera length for diffraction pattern", 

350 ), 

351 "convergence_angle": ( 

352 "Convergence Angle", 

353 "Convergence Angle", # EMG label 

354 "Convergence angle of the electron beam", 

355 ), 

356 "illumination_mode": ( 

357 "Illumination Mode", 

358 None, # Not in EMG v2.0.0 

359 "TEM illumination mode (TEM, STEM, Diffraction, etc.)", 

360 ), 

361 # Sample/metadata 

362 "specimen": ( 

363 "Specimen", 

364 None, # EMG has Specimen but it's complex 

365 "Sample or specimen description", 

366 ), 

367 "operator": ( 

368 "Operator", 

369 None, # Not in EMG (user information) 

370 "User who acquired the data", 

371 ), 

372 # Environmental parameters 

373 "temperature": ( 

374 "Temperature", 

375 None, # Not in EMG v2.0.0 

376 "Sample or chamber temperature", 

377 ), 

378 "pressure": ( 

379 "Pressure", 

380 None, # Not in EMG v2.0.0 

381 "Chamber pressure", 

382 ), 

383 "chamber_pressure": ( 

384 "Chamber Pressure", 

385 None, # Not in EMG v2.0.0 

386 "Vacuum chamber pressure", 

387 ), 

388 # Data dimensions 

389 "data_dimensions": ( 

390 "Data Dimensions", 

391 None, # Not a measurement, structural metadata 

392 "String representation of data shape", 

393 ), 

394 # Instrument identification 

395 "instrument_id": ( 

396 "Instrument ID", 

397 None, # Not in EMG (internal NexusLIMS identifier) 

398 "NexusLIMS persistent instrument identifier", 

399 ), 

400} 

401"""Mapping from NexusLIMS internal field names to EM Glossary terms 

402Format: `internal_field_name -> (display_name, emg_label or None, description)` 

403The emg_label is used to look up the EMG_ID from the OWL file""" 

404 

405 

406def get_emg_label(emg_id: str) -> str | None: 

407 """ 

408 Get the EM Glossary label for an EMG ID. 

409 

410 Looks up the human-readable label from the OWL ontology file. 

411 

412 Parameters 

413 ---------- 

414 emg_id : str 

415 EM Glossary ID (e.g., "EMG_00000004") 

416 

417 Returns 

418 ------- 

419 str or None 

420 EMG label, or None if ID not found 

421 

422 Examples 

423 -------- 

424 >>> get_emg_label("EMG_00000004") 

425 'Acceleration Voltage' 

426 

427 >>> get_emg_label("EMG_00000050") 

428 'Working Distance' 

429 

430 >>> get_emg_label("EMG_99999999") is None 

431 True 

432 """ 

433 try: 

434 emg_terms = _load_emg_terms() 

435 term_info = emg_terms.get(emg_id) 

436 return term_info["label"] if term_info else None 

437 except Exception as e: 

438 _logger.warning("Failed to load EMG ontology: %s", e) 

439 return None 

440 

441 

442def get_emg_definition(emg_id: str) -> str | None: 

443 """ 

444 Get the EM Glossary definition for an EMG ID. 

445 

446 Looks up the formal definition from the OWL ontology file. 

447 

448 Parameters 

449 ---------- 

450 emg_id : str 

451 EM Glossary ID (e.g., "EMG_00000004") 

452 

453 Returns 

454 ------- 

455 str or None 

456 EMG definition, or None if ID not found or no definition available 

457 

458 Examples 

459 -------- 

460 >>> defn = get_emg_definition("EMG_00000004") 

461 >>> print(defn) 

462 The potential difference between anode and cathode. 

463 

464 >>> get_emg_definition("EMG_99999999") is None 

465 True 

466 """ 

467 try: 

468 emg_terms = _load_emg_terms() 

469 term_info = emg_terms.get(emg_id) 

470 return term_info["definition"] if term_info else None 

471 except Exception as e: 

472 _logger.warning("Failed to load EMG ontology: %s", e) 

473 return None 

474 

475 

476def get_emg_id(field_name: str) -> str | None: 

477 """ 

478 Get the EM Glossary ID for a NexusLIMS field name. 

479 

480 Looks up the field in NEXUSLIMS_TO_EMG_MAPPINGS, then resolves the 

481 EMG label to an ID from the OWL ontology. 

482 

483 Parameters 

484 ---------- 

485 field_name : str 

486 Internal field name (e.g., "acceleration_voltage") 

487 

488 Returns 

489 ------- 

490 str or None 

491 EM Glossary ID string (e.g., "EMG_00000004"), or None if not mapped 

492 

493 Examples 

494 -------- 

495 >>> get_emg_id("acceleration_voltage") 

496 'EMG_00000004' 

497 

498 >>> get_emg_id("working_distance") 

499 'EMG_00000050' 

500 

501 >>> get_emg_id("custom_field") is None 

502 True 

503 

504 Notes 

505 ----- 

506 Not all NexusLIMS fields have EM Glossary equivalents. This is expected 

507 as EMG is a growing ontology and some fields are vendor-specific or 

508 outside the scope of EMG's current coverage (v2.0.0). 

509 """ 

510 mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name) 

511 if mapping is None or mapping[1] is None: 

512 return None 

513 

514 emg_label = mapping[1] 

515 

516 # Look up the EMG ID from the label 

517 try: 

518 emg_terms = _load_emg_terms() 

519 # Reverse lookup: label -> ID 

520 for emg_id, term_info in emg_terms.items(): 

521 if term_info["label"] == emg_label: 

522 return emg_id 

523 except Exception as e: 

524 _logger.warning("Failed to load EMG ontology: %s", e) 

525 return None 

526 

527 _logger.debug("EMG label '%s' not found in ontology", emg_label) 

528 return None 

529 

530 

531def get_display_name(field_name: str) -> str: 

532 """ 

533 Get the human-readable display name for a field. 

534 

535 Returns the display name used in XML output and user-facing documentation. 

536 If the field is not in the mapping, returns a title-cased version of the 

537 field name with underscores replaced by spaces. 

538 

539 Parameters 

540 ---------- 

541 field_name : str 

542 Internal field name (e.g., "acceleration_voltage") 

543 

544 Returns 

545 ------- 

546 str 

547 Display name for the field 

548 

549 Examples 

550 -------- 

551 >>> get_display_name("acceleration_voltage") 

552 'Voltage' 

553 

554 >>> get_display_name("working_distance") 

555 'Working Distance' 

556 

557 >>> get_display_name("custom_field") 

558 'Custom Field' 

559 

560 Notes 

561 ----- 

562 For unmapped fields, the function applies a simple transformation: 

563 replace underscores with spaces and title-case the result. This ensures 

564 all fields have reasonable display names even without explicit mappings. 

565 """ 

566 mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name) 

567 if mapping is not None: 

568 return mapping[0] # Return display name (first element of tuple) 

569 

570 # Fallback: convert field_name to Title Case 

571 return field_name.replace("_", " ").title() 

572 

573 

574def get_description(field_name: str) -> str | None: 

575 """ 

576 Get the NexusLIMS description for a field. 

577 

578 Returns a brief description of what the field represents from the 

579 NexusLIMS mappings. For EMG formal definitions, use get_emg_definition(). 

580 

581 Parameters 

582 ---------- 

583 field_name : str 

584 Internal field name (e.g., "acceleration_voltage") 

585 

586 Returns 

587 ------- 

588 str or None 

589 Field description, or None if not mapped 

590 

591 Examples 

592 -------- 

593 >>> desc = get_description("acceleration_voltage") 

594 >>> print(desc) 

595 Accelerating voltage of the electron/ion beam 

596 

597 >>> get_description("unknown_field") is None 

598 True 

599 """ 

600 mapping = NEXUSLIMS_TO_EMG_MAPPINGS.get(field_name) 

601 if mapping is None: 

602 return None 

603 return mapping[2] # Return description (third element of tuple) 

604 

605 

606def has_emg_id(field_name: str) -> bool: 

607 """ 

608 Check if a field has an EM Glossary ID mapping. 

609 

610 Returns True if the field has a corresponding EMG ID in v2.0.0, False otherwise. 

611 This is useful for determining whether semantic annotations are available. 

612 

613 Parameters 

614 ---------- 

615 field_name : str 

616 Internal field name (e.g., "acceleration_voltage") 

617 

618 Returns 

619 ------- 

620 bool 

621 True if field has EMG ID, False otherwise 

622 

623 Examples 

624 -------- 

625 >>> has_emg_id("acceleration_voltage") 

626 True 

627 

628 >>> has_emg_id("magnification") 

629 False 

630 

631 >>> has_emg_id("custom_field") 

632 False 

633 """ 

634 emg_id = get_emg_id(field_name) 

635 return emg_id is not None 

636 

637 

638def get_emg_uri(field_name: str) -> str | None: 

639 """ 

640 Get the full EM Glossary URI for a field. 

641 

642 Returns the complete PURL (Persistent URL) for the field's EM Glossary 

643 v2.0.0 entry. This enables Tier 3 semantic web integration and linkage to 

644 the full EMG ontology. 

645 

646 Parameters 

647 ---------- 

648 field_name : str 

649 Internal field name (e.g., "acceleration_voltage") 

650 

651 Returns 

652 ------- 

653 str or None 

654 Full EMG PURL, or None if field has no EMG ID 

655 

656 Examples 

657 -------- 

658 >>> get_emg_uri("acceleration_voltage") 

659 'https://purls.helmholtz-metadaten.de/emg/v2.0.0/EMG_00000004' 

660 

661 >>> get_emg_uri("working_distance") 

662 'https://purls.helmholtz-metadaten.de/emg/v2.0.0/EMG_00000050' 

663 

664 >>> get_emg_uri("custom_field") is None 

665 True 

666 

667 Notes 

668 ----- 

669 The returned URI is a PURL that redirects to the canonical EMG ontology 

670 entry. These URIs are suitable for use in RDF/OWL ontologies and 

671 semantic web applications. 

672 """ 

673 emg_id = get_emg_id(field_name) 

674 if emg_id is None: 

675 return None 

676 

677 # Construct the full PURL with version 

678 return f"https://purls.helmholtz-metadaten.de/emg/{EMG_VERSION}/{emg_id}" 

679 

680 

681def get_all_mapped_fields() -> list[str]: 

682 """ 

683 Get a list of all fields with NexusLIMS mappings. 

684 

685 Returns a sorted list of all internal field names that have entries 

686 in the NEXUSLIMS_TO_EMG_MAPPINGS dictionary. 

687 

688 Returns 

689 ------- 

690 list[str] 

691 Sorted list of field names with mappings 

692 

693 Examples 

694 -------- 

695 >>> fields = get_all_mapped_fields() 

696 >>> "acceleration_voltage" in fields 

697 True 

698 >>> len(fields) > 0 

699 True 

700 """ 

701 return sorted(NEXUSLIMS_TO_EMG_MAPPINGS.keys()) 

702 

703 

704def get_fields_with_emg_ids() -> list[str]: 

705 """ 

706 Get a list of fields that have EM Glossary ID mappings. 

707 

708 Returns only fields with actual EMG IDs (non-None values), excluding 

709 fields that have display names but no EMG equivalents. 

710 

711 Returns 

712 ------- 

713 list[str] 

714 Sorted list of field names with EMG IDs 

715 

716 Examples 

717 -------- 

718 >>> fields = get_fields_with_emg_ids() 

719 >>> "acceleration_voltage" in fields 

720 True 

721 >>> "magnification" in fields # Has display name but no EMG ID 

722 False 

723 """ 

724 return sorted([field for field in NEXUSLIMS_TO_EMG_MAPPINGS if has_emg_id(field)]) 

725 

726 

727def get_all_emg_terms() -> Dict[str, Dict[str, str]]: 

728 """ 

729 Get all EM Glossary terms from the OWL file. 

730 

731 Returns the complete mapping of EMG IDs to labels and definitions 

732 loaded from the ontology. Useful for discovering available EMG terms. 

733 

734 Returns 

735 ------- 

736 dict[str, dict[str, str]] 

737 Mapping from EMG_ID -> {'label': str, 'definition': str | None} 

738 

739 Examples 

740 -------- 

741 >>> terms = get_all_emg_terms() 

742 >>> "EMG_00000004" in terms 

743 True 

744 >>> terms["EMG_00000004"]["label"] 

745 'Acceleration Voltage' 

746 >>> print(terms["EMG_00000004"]["definition"]) 

747 The potential difference between anode and cathode. 

748 """ 

749 try: 

750 return _load_emg_terms() 

751 except Exception: 

752 _logger.exception("Failed to load EMG ontology") 

753 return {}