Coverage for nexusLIMS/schemas/units.py: 100%

102 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1""" 

2Pint unit registry and utilities for NexusLIMS metadata. 

3 

4This module provides a centralized Pint unit registry for handling physical quantities 

5with units in NexusLIMS metadata. It defines preferred units for different measurement 

6types and provides utilities for normalizing quantities to these preferred units. 

7 

8The module supports three-tiered unit serialization: 

9- **Tier 1 (Internal)**: Pint Quantity objects with QUDT/EMG mappings 

10- **Tier 2 (XML)**: Clean name/value/unit separation using XSD unit attribute 

11- **Tier 3 (Future)**: Optional QUDT/EMG URIs for semantic web integration 

12 

13Examples 

14-------- 

15Create and normalize quantities: 

16 

17>>> from nexusLIMS.schemas.units import ureg, normalize_quantity 

18>>> voltage = ureg.Quantity(10000, "volt") 

19>>> normalized = normalize_quantity("acceleration_voltage", voltage) 

20>>> print(normalized) 

2110.0 kilovolt 

22 

23Parse from strings: 

24 

25>>> from nexusLIMS.schemas.units import parse_quantity 

26>>> voltage = parse_quantity("acceleration_voltage", "10 kV") 

27>>> print(voltage) 

2810.0 kilovolt 

29 

30Serialize for XML: 

31 

32>>> from nexusLIMS.schemas.units import quantity_to_xml_parts 

33>>> name, value, unit = quantity_to_xml_parts("acceleration_voltage", voltage) 

34>>> print(f"<meta name='{name}' unit='{unit}'>{value}</meta>") 

35<meta name='Voltage' unit='kV'>10.0</meta> 

36""" 

37 

38import logging 

39from decimal import Decimal 

40from functools import lru_cache 

41from pathlib import Path 

42from typing import Any 

43 

44import numpy as np 

45from pint import UnitRegistry 

46from rdflib import RDFS, Graph, Namespace 

47 

48logger = logging.getLogger(__name__) 

49 

50# Singleton Pint unit registry for the entire application 

51# Using this ensures consistent unit definitions across all modules 

52# Use Decimal for non-integer types to avoid floating-point precision issues 

53# (e.g., 1.5625 instead of 1.5624999999999998 when converting units) 

54ureg = UnitRegistry(non_int_type=Decimal) 

55 

56# Save reference to the original Quantity class for isinstance checks 

57_OriginalQuantity = ureg.Quantity 

58 

59 

60# Monkey-patch the __new__ method to auto-convert floats to Decimals 

61# This prevents type errors when comparing Quantities with different magnitude types 

62_original_new = _OriginalQuantity.__new__ 

63 

64 

65def _quantity_new_with_decimal_conversion(cls, value, units=None): 

66 """ 

67 Auto-convert float magnitudes to Decimal when creating Quantity instances. 

68 

69 This ensures consistency with the ureg's non_int_type=Decimal setting. 

70 Without this conversion, Pint doesn't automatically convert input floats, 

71 leading to mixed float/Decimal types that fail during unit conversions. 

72 """ 

73 if isinstance(value, (float, np.floating)): 

74 value = Decimal(str(value)) 

75 # Call original __new__ with potentially modified value 

76 return _original_new(cls, value, units) 

77 

78 

79# Replace the __new__ method while keeping the class intact for isinstance() 

80_OriginalQuantity.__new__ = staticmethod(_quantity_new_with_decimal_conversion) 

81 

82# Path to QUDT unit vocabulary file 

83QUDT_UNIT_TTL_PATH = Path(__file__).parent / "references" / "qudt_unit.ttl" 

84QUDT_VERSION = "3.1.9" 

85 

86# RDF namespace for QUDT 

87QUDT_UNIT = Namespace("http://qudt.org/vocab/unit/") 

88 

89# Define custom microscopy units 

90ureg.define("kiloX = 1000 = kX") # Magnification in thousands (e.g., 160 kX = 160000x) 

91 

92# Magic values for scientific notation formatting 

93_MIN_MAGNITUDE_FOR_NORMAL_NOTATION = 1e-3 

94_MAX_MAGNITUDE_FOR_NORMAL_NOTATION = 1e6 

95 

96# Preferred units for each field type 

97# These define the canonical units that quantities should be normalized to 

98# before serialization to XML or storage 

99PREFERRED_UNITS = { 

100 # Image acquisition parameters 

101 "acceleration_voltage": ureg.kilovolt, 

102 "working_distance": ureg.millimeter, 

103 "beam_current": ureg.picoampere, 

104 "emission_current": ureg.microampere, 

105 "dwell_time": ureg.microsecond, 

106 "magnification": ureg.dimensionless, # Magnification has no units 

107 "horizontal_field_width": ureg.micrometer, 

108 "pixel_width": ureg.nanometer, 

109 "pixel_height": ureg.nanometer, 

110 "scan_rotation": ureg.degree, 

111 # Stage position components 

112 "stage_x": ureg.micrometer, 

113 "stage_y": ureg.micrometer, 

114 "stage_z": ureg.millimeter, 

115 "stage_tilt": ureg.degree, 

116 "stage_rotation": ureg.degree, 

117 "stage_alpha": ureg.degree, 

118 "stage_beta": ureg.degree, 

119 # Spectrum acquisition parameters 

120 "acquisition_time": ureg.second, 

121 "live_time": ureg.second, 

122 "detector_energy_resolution": ureg.eV, 

123 "channel_size": ureg.eV, 

124 "starting_energy": ureg.keV, 

125 "azimuthal_angle": ureg.degree, 

126 "elevation_angle": ureg.degree, 

127 "takeoff_angle": ureg.degree, 

128 # Diffraction parameters 

129 "camera_length": ureg.millimeter, 

130 "convergence_angle": ureg.milliradian, 

131 # Environmental parameters 

132 "temperature": ureg.kelvin, 

133 "pressure": ureg.pascal, 

134 "chamber_pressure": ureg.pascal, 

135} 

136 

137 

138@lru_cache(maxsize=1) 

139def _load_qudt_units() -> dict[str, str]: 

140 """ 

141 Load QUDT unit URIs from the Turtle file. 

142 

143 Parses the QUDT unit vocabulary to extract unit labels and their URIs. 

144 This provides a mapping from Pint unit names to QUDT ontology URIs. 

145 

146 Returns 

147 ------- 

148 dict[str, str] 

149 Mapping from unit_name -> QUDT URI 

150 

151 Examples 

152 -------- 

153 >>> units = _load_qudt_units() 

154 >>> units.get("kilovolt") 

155 'http://qudt.org/vocab/unit/KiloV' 

156 

157 Notes 

158 ----- 

159 Results are cached for performance. The mapping uses rdfs:label to match 

160 Pint unit names (e.g., "kilovolt") to QUDT URIs. 

161 """ 

162 if not QUDT_UNIT_TTL_PATH.exists(): 

163 logger.warning("QUDT unit file not found at %s", QUDT_UNIT_TTL_PATH) 

164 return {} 

165 

166 try: 

167 g = Graph() 

168 g.parse(QUDT_UNIT_TTL_PATH, format="turtle") 

169 logger.debug("Loaded QUDT unit vocabulary from %s", QUDT_UNIT_TTL_PATH) 

170 except Exception: 

171 logger.exception("Failed to parse QUDT unit file.") 

172 return {} 

173 

174 # Build mapping from label -> URI 

175 unit_map = {} 

176 

177 # Iterate over all QUDT unit instances 

178 for unit_uri in g.subjects(predicate=RDFS.label): 

179 if not str(unit_uri).startswith(str(QUDT_UNIT)): 

180 continue 

181 

182 # Get the label(s) for this unit 

183 for label_obj in g.objects(unit_uri, RDFS.label): 

184 label = str(label_obj).lower().replace(" ", "") 

185 

186 # Map label to URI 

187 unit_map[label] = str(unit_uri) 

188 

189 logger.debug("Loaded %s QUDT unit mappings", len(unit_map)) 

190 return unit_map 

191 

192 

193# Lazy-loaded QUDT unit URI mappings via lru_cache 

194@lru_cache(maxsize=1) 

195def _get_qudt_uri_mapping() -> dict[str, str]: 

196 """Get the QUDT unit URI mapping, loading if necessary.""" 

197 return _load_qudt_units() 

198 

199 

200def normalize_quantity(field_name: str, quantity: Any) -> Any: 

201 """ 

202 Normalize a quantity to its preferred unit for the given field. 

203 

204 Takes a Pint Quantity and converts it to the canonical unit defined 

205 in PREFERRED_UNITS for that field. If no preferred unit is defined, 

206 returns the quantity unchanged. Non-Quantity values are passed through. 

207 

208 Parameters 

209 ---------- 

210 field_name : str 

211 The metadata field name (e.g., "acceleration_voltage", "working_distance") 

212 quantity : Any 

213 The quantity to normalize. Can be: 

214 - Pint Quantity object (will be converted) 

215 - String (returned unchanged - use parse_quantity first) 

216 - Numeric value (returned unchanged) 

217 - None (returned unchanged) 

218 

219 Returns 

220 ------- 

221 Any 

222 The normalized quantity in preferred units, or the original value 

223 if not a Quantity or no preferred unit is defined 

224 

225 Examples 

226 -------- 

227 >>> voltage = ureg.Quantity(10000, "volt") 

228 >>> normalized = normalize_quantity("acceleration_voltage", voltage) 

229 >>> print(normalized) 

230 10.0 kilovolt 

231 

232 >>> current = ureg.Quantity(0.1, "nanoampere") 

233 >>> normalized = normalize_quantity("beam_current", current) 

234 >>> print(normalized) 

235 100.0 picoampere 

236 

237 >>> # Non-Quantity values pass through 

238 >>> normalize_quantity("unknown_field", "some string") 

239 'some string' 

240 

241 >>> # Fields without preferred units return unchanged 

242 >>> qty = ureg.Quantity(5.0, "furlong") 

243 >>> normalize_quantity("custom_field", qty) == qty 

244 True 

245 """ 

246 # Only process Pint Quantity objects 

247 if not isinstance(quantity, ureg.Quantity): 

248 return quantity 

249 

250 # Get preferred unit for this field 

251 preferred_unit = PREFERRED_UNITS.get(field_name) 

252 

253 if preferred_unit is None: 

254 # No preferred unit defined, return as-is 

255 return quantity 

256 

257 try: 

258 # Convert to preferred unit 

259 return quantity.to(preferred_unit) 

260 except Exception as e: 

261 # Log conversion error but don't fail - return original 

262 logger.warning( 

263 "Could not convert %s from %s to %s: %s. Returning original value.", 

264 field_name, 

265 quantity.units, 

266 preferred_unit, 

267 e, 

268 ) 

269 return quantity 

270 

271 

272def parse_quantity(field_name: str, value: Any) -> Any: 

273 """ 

274 Parse a value into a Pint Quantity and normalize to preferred units. 

275 

276 Accepts multiple input types: 

277 - Pint Quantity: Normalized to preferred units 

278 - String: Parsed as quantity (e.g., "10 kV", "5.2 mm") 

279 - Numeric: Assumed to be in preferred units for field 

280 - None: Passed through unchanged 

281 

282 Parameters 

283 ---------- 

284 field_name : str 

285 The metadata field name (e.g., "acceleration_voltage") 

286 value : Any 

287 The value to parse. Can be Quantity, string, numeric, or None 

288 

289 Returns 

290 ------- 

291 Any 

292 Pint Quantity in preferred units, or original value if unparseable 

293 

294 Examples 

295 -------- 

296 >>> qty = parse_quantity("acceleration_voltage", "10 kV") 

297 >>> print(qty) 

298 10.0 kilovolt 

299 

300 >>> qty = parse_quantity("working_distance", 5.2) # Assumes mm 

301 >>> print(qty) 

302 5.2 millimeter 

303 

304 >>> qty = parse_quantity("beam_current", ureg.Quantity(0.1, "nA")) 

305 >>> print(qty) 

306 100.0 picoampere 

307 

308 >>> parse_quantity("operator", None) is None 

309 True 

310 """ 

311 # Pass through None 

312 if value is None: 

313 return value 

314 

315 # If already a Quantity, normalize it 

316 if isinstance(value, ureg.Quantity): 

317 return normalize_quantity(field_name, value) 

318 

319 # Try parsing string as quantity 

320 if isinstance(value, str): 

321 try: 

322 qty = ureg.Quantity(value) 

323 return normalize_quantity(field_name, qty) 

324 except Exception as e: 

325 logger.debug( 

326 "Could not parse '%s' as quantity for %s: %s", value, field_name, e 

327 ) 

328 

329 # For numeric values, assume they're in the preferred unit 

330 if isinstance(value, (int, float)): 

331 preferred_unit = PREFERRED_UNITS.get(field_name) 

332 if preferred_unit is not None: 

333 return ureg.Quantity(value, preferred_unit) 

334 

335 # All other cases (unparseable strings, unknown types, or no preferred unit) 

336 return value 

337 

338 

339def quantity_to_xml_parts( 

340 field_name: str, quantity: Any 

341) -> tuple[str, str, str | None]: 

342 """ 

343 Convert a field name and quantity to XML serialization parts. 

344 

345 Extracts the display name, numeric value, and unit string for XML 

346 serialization. This enables clean XML output like: 

347 ``<meta name="Voltage" unit="kV">10.0</meta>`` 

348 

349 Parameters 

350 ---------- 

351 field_name : str 

352 The internal field name (e.g., "acceleration_voltage") 

353 quantity : Any 

354 The quantity value (Pint Quantity, string, or numeric) 

355 

356 Returns 

357 ------- 

358 tuple[str, str, str | None] 

359 A 3-tuple of (display_name, value_string, unit_string) 

360 - display_name: Human-readable field name for XML 

361 - value_string: Numeric value as string 

362 - unit_string: Unit abbreviation, or None if dimensionless/non-quantity 

363 

364 Examples 

365 -------- 

366 >>> qty = ureg.Quantity(10.0, "kilovolt") 

367 >>> name, value, unit = quantity_to_xml_parts("acceleration_voltage", qty) 

368 >>> print(f"<meta name='{name}' unit='{unit}'>{value}</meta>") 

369 <meta name='Voltage' unit='kV'>10.0</meta> 

370 

371 >>> qty = ureg.Quantity(5000, "dimensionless") 

372 >>> name, value, unit = quantity_to_xml_parts("magnification", qty) 

373 >>> print(f"<meta name='{name}'>{value}</meta>") # No unit attr 

374 <meta name='Magnification'>5000</meta> 

375 

376 Notes 

377 ----- 

378 For non-Quantity values, the value is converted to string and unit is None. 

379 Display name mapping is handled by separate EM Glossary utilities. 

380 """ 

381 from nexusLIMS.schemas.em_glossary import ( # noqa: PLC0415 

382 get_display_name, 

383 ) # Import here to avoid circular imports 

384 

385 display_name = get_display_name(field_name) 

386 

387 if isinstance(quantity, ureg.Quantity): 

388 # Format magnitude (use scientific notation for very small/large) 

389 magnitude = quantity.magnitude 

390 if ( 

391 abs(magnitude) < _MIN_MAGNITUDE_FOR_NORMAL_NOTATION 

392 or abs(magnitude) > _MAX_MAGNITUDE_FOR_NORMAL_NOTATION 

393 ): 

394 value_str = f"{magnitude:.6e}" 

395 else: 

396 value_str = f"{magnitude:.6g}" 

397 

398 # Get unit string (use compact format) 

399 unit_str = f"{quantity.units:~}" # Compact format (kV instead of kilovolt) 

400 

401 # Handle dimensionless 

402 if quantity.dimensionless: 

403 unit_str = None 

404 

405 return display_name, value_str, unit_str 

406 

407 # Non-Quantity value 

408 return display_name, str(quantity), None 

409 

410 

411def get_qudt_uri(quantity: Any) -> str | None: 

412 """ 

413 Get the QUDT URI for a Pint Quantity's unit. 

414 

415 Returns the QUDT (Quantities, Units, Dimensions and Data Types) ontology 

416 URI for the quantity's unit. This enables Tier 3 semantic web integration. 

417 

418 The mapping is loaded dynamically from the QUDT unit vocabulary file 

419 (qudt_unit.ttl) using RDFLib. 

420 

421 Parameters 

422 ---------- 

423 quantity : Any 

424 A Pint Quantity object 

425 

426 Returns 

427 ------- 

428 str or None 

429 QUDT URI string, or None if not a Quantity or URI not found 

430 

431 Examples 

432 -------- 

433 >>> qty = ureg.Quantity(10, "kilovolt") 

434 >>> get_qudt_uri(qty) 

435 'http://qudt.org/vocab/unit/KiloV' 

436 

437 >>> qty = ureg.Quantity(5.2, "millimeter") 

438 >>> get_qudt_uri(qty) 

439 'http://qudt.org/vocab/unit/MilliM' 

440 

441 >>> get_qudt_uri("not a quantity") 

442 # Returns None 

443 """ 

444 if not isinstance(quantity, ureg.Quantity): 

445 return None 

446 

447 # Get unit string (full name, lowercase, no spaces for matching) 

448 unit_str = str(quantity.units).lower().replace(" ", "") 

449 

450 # Look up in QUDT mapping (loaded from TTL file) 

451 qudt_map = _get_qudt_uri_mapping() 

452 return qudt_map.get(unit_str) 

453 

454 

455def serialize_quantity(quantity: Any) -> dict[str, Any]: 

456 """ 

457 Serialize a Pint Quantity to a JSON-compatible dictionary. 

458 

459 Converts a Quantity to a dict with 'value' and 'units' keys. 

460 Used for internal storage or JSON export. For XML serialization, 

461 use :func:`quantity_to_xml_parts` instead. 

462 

463 Parameters 

464 ---------- 

465 quantity : Any 

466 A Pint Quantity object, or other value to serialize 

467 

468 Returns 

469 ------- 

470 dict[str, Any] 

471 Dictionary with 'value' and 'units' keys if Quantity, 

472 or {'value': quantity} for non-Quantity values 

473 

474 Examples 

475 -------- 

476 >>> qty = ureg.Quantity(10, "kilovolt") 

477 >>> serialize_quantity(qty) 

478 {'value': 10.0, 'units': 'kilovolt'} 

479 

480 >>> serialize_quantity("some string") 

481 {'value': 'some string'} 

482 """ 

483 if isinstance(quantity, ureg.Quantity): 

484 return { 

485 "value": quantity.magnitude, 

486 "units": str(quantity.units), 

487 } 

488 return {"value": quantity} 

489 

490 

491def deserialize_quantity(data: dict[str, Any]) -> Any: 

492 """ 

493 Deserialize a dictionary back to a Pint Quantity. 

494 

495 Reverses the operation of :func:`serialize_quantity`. Takes a dict 

496 with 'value' and 'units' keys and reconstructs the Quantity. 

497 

498 Parameters 

499 ---------- 

500 data : dict[str, Any] 

501 Dictionary with 'value' and 'units' keys, or just 'value' key 

502 

503 Returns 

504 ------- 

505 Any 

506 Pint Quantity if dict has value/units, otherwise the 'value' field 

507 

508 Examples 

509 -------- 

510 >>> data = {'value': 10.0, 'units': 'kilovolt'} 

511 >>> qty = deserialize_quantity(data) 

512 >>> print(qty) 

513 10.0 kilovolt 

514 

515 >>> data = {'value': 'some string'} 

516 >>> deserialize_quantity(data) 

517 'some string' 

518 """ 

519 if "units" in data: 

520 return ureg.Quantity(data["value"], data["units"]) 

521 return data.get("value")