Coverage for nexusLIMS/schemas/units.py: 100%
102 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""
2Pint unit registry and utilities for NexusLIMS metadata.
4This module provides a centralized Pint unit registry for handling physical quantities
5with units in NexusLIMS metadata. It defines preferred units for different measurement
6types and provides utilities for normalizing quantities to these preferred units.
8The module supports three-tiered unit serialization:
9- **Tier 1 (Internal)**: Pint Quantity objects with QUDT/EMG mappings
10- **Tier 2 (XML)**: Clean name/value/unit separation using XSD unit attribute
11- **Tier 3 (Future)**: Optional QUDT/EMG URIs for semantic web integration
13Examples
14--------
15Create and normalize quantities:
17>>> from nexusLIMS.schemas.units import ureg, normalize_quantity
18>>> voltage = ureg.Quantity(10000, "volt")
19>>> normalized = normalize_quantity("acceleration_voltage", voltage)
20>>> print(normalized)
2110.0 kilovolt
23Parse from strings:
25>>> from nexusLIMS.schemas.units import parse_quantity
26>>> voltage = parse_quantity("acceleration_voltage", "10 kV")
27>>> print(voltage)
2810.0 kilovolt
30Serialize for XML:
32>>> from nexusLIMS.schemas.units import quantity_to_xml_parts
33>>> name, value, unit = quantity_to_xml_parts("acceleration_voltage", voltage)
34>>> print(f"<meta name='{name}' unit='{unit}'>{value}</meta>")
35<meta name='Voltage' unit='kV'>10.0</meta>
36"""
38import logging
39from decimal import Decimal
40from functools import lru_cache
41from pathlib import Path
42from typing import Any
44import numpy as np
45from pint import UnitRegistry
46from rdflib import RDFS, Graph, Namespace
48logger = logging.getLogger(__name__)
50# Singleton Pint unit registry for the entire application
51# Using this ensures consistent unit definitions across all modules
52# Use Decimal for non-integer types to avoid floating-point precision issues
53# (e.g., 1.5625 instead of 1.5624999999999998 when converting units)
54ureg = UnitRegistry(non_int_type=Decimal)
56# Save reference to the original Quantity class for isinstance checks
57_OriginalQuantity = ureg.Quantity
60# Monkey-patch the __new__ method to auto-convert floats to Decimals
61# This prevents type errors when comparing Quantities with different magnitude types
62_original_new = _OriginalQuantity.__new__
65def _quantity_new_with_decimal_conversion(cls, value, units=None):
66 """
67 Auto-convert float magnitudes to Decimal when creating Quantity instances.
69 This ensures consistency with the ureg's non_int_type=Decimal setting.
70 Without this conversion, Pint doesn't automatically convert input floats,
71 leading to mixed float/Decimal types that fail during unit conversions.
72 """
73 if isinstance(value, (float, np.floating)):
74 value = Decimal(str(value))
75 # Call original __new__ with potentially modified value
76 return _original_new(cls, value, units)
79# Replace the __new__ method while keeping the class intact for isinstance()
80_OriginalQuantity.__new__ = staticmethod(_quantity_new_with_decimal_conversion)
82# Path to QUDT unit vocabulary file
83QUDT_UNIT_TTL_PATH = Path(__file__).parent / "references" / "qudt_unit.ttl"
84QUDT_VERSION = "3.1.9"
86# RDF namespace for QUDT
87QUDT_UNIT = Namespace("http://qudt.org/vocab/unit/")
89# Define custom microscopy units
90ureg.define("kiloX = 1000 = kX") # Magnification in thousands (e.g., 160 kX = 160000x)
92# Magic values for scientific notation formatting
93_MIN_MAGNITUDE_FOR_NORMAL_NOTATION = 1e-3
94_MAX_MAGNITUDE_FOR_NORMAL_NOTATION = 1e6
96# Preferred units for each field type
97# These define the canonical units that quantities should be normalized to
98# before serialization to XML or storage
99PREFERRED_UNITS = {
100 # Image acquisition parameters
101 "acceleration_voltage": ureg.kilovolt,
102 "working_distance": ureg.millimeter,
103 "beam_current": ureg.picoampere,
104 "emission_current": ureg.microampere,
105 "dwell_time": ureg.microsecond,
106 "magnification": ureg.dimensionless, # Magnification has no units
107 "horizontal_field_width": ureg.micrometer,
108 "pixel_width": ureg.nanometer,
109 "pixel_height": ureg.nanometer,
110 "scan_rotation": ureg.degree,
111 # Stage position components
112 "stage_x": ureg.micrometer,
113 "stage_y": ureg.micrometer,
114 "stage_z": ureg.millimeter,
115 "stage_tilt": ureg.degree,
116 "stage_rotation": ureg.degree,
117 "stage_alpha": ureg.degree,
118 "stage_beta": ureg.degree,
119 # Spectrum acquisition parameters
120 "acquisition_time": ureg.second,
121 "live_time": ureg.second,
122 "detector_energy_resolution": ureg.eV,
123 "channel_size": ureg.eV,
124 "starting_energy": ureg.keV,
125 "azimuthal_angle": ureg.degree,
126 "elevation_angle": ureg.degree,
127 "takeoff_angle": ureg.degree,
128 # Diffraction parameters
129 "camera_length": ureg.millimeter,
130 "convergence_angle": ureg.milliradian,
131 # Environmental parameters
132 "temperature": ureg.kelvin,
133 "pressure": ureg.pascal,
134 "chamber_pressure": ureg.pascal,
135}
138@lru_cache(maxsize=1)
139def _load_qudt_units() -> dict[str, str]:
140 """
141 Load QUDT unit URIs from the Turtle file.
143 Parses the QUDT unit vocabulary to extract unit labels and their URIs.
144 This provides a mapping from Pint unit names to QUDT ontology URIs.
146 Returns
147 -------
148 dict[str, str]
149 Mapping from unit_name -> QUDT URI
151 Examples
152 --------
153 >>> units = _load_qudt_units()
154 >>> units.get("kilovolt")
155 'http://qudt.org/vocab/unit/KiloV'
157 Notes
158 -----
159 Results are cached for performance. The mapping uses rdfs:label to match
160 Pint unit names (e.g., "kilovolt") to QUDT URIs.
161 """
162 if not QUDT_UNIT_TTL_PATH.exists():
163 logger.warning("QUDT unit file not found at %s", QUDT_UNIT_TTL_PATH)
164 return {}
166 try:
167 g = Graph()
168 g.parse(QUDT_UNIT_TTL_PATH, format="turtle")
169 logger.debug("Loaded QUDT unit vocabulary from %s", QUDT_UNIT_TTL_PATH)
170 except Exception:
171 logger.exception("Failed to parse QUDT unit file.")
172 return {}
174 # Build mapping from label -> URI
175 unit_map = {}
177 # Iterate over all QUDT unit instances
178 for unit_uri in g.subjects(predicate=RDFS.label):
179 if not str(unit_uri).startswith(str(QUDT_UNIT)):
180 continue
182 # Get the label(s) for this unit
183 for label_obj in g.objects(unit_uri, RDFS.label):
184 label = str(label_obj).lower().replace(" ", "")
186 # Map label to URI
187 unit_map[label] = str(unit_uri)
189 logger.debug("Loaded %s QUDT unit mappings", len(unit_map))
190 return unit_map
193# Lazy-loaded QUDT unit URI mappings via lru_cache
194@lru_cache(maxsize=1)
195def _get_qudt_uri_mapping() -> dict[str, str]:
196 """Get the QUDT unit URI mapping, loading if necessary."""
197 return _load_qudt_units()
200def normalize_quantity(field_name: str, quantity: Any) -> Any:
201 """
202 Normalize a quantity to its preferred unit for the given field.
204 Takes a Pint Quantity and converts it to the canonical unit defined
205 in PREFERRED_UNITS for that field. If no preferred unit is defined,
206 returns the quantity unchanged. Non-Quantity values are passed through.
208 Parameters
209 ----------
210 field_name : str
211 The metadata field name (e.g., "acceleration_voltage", "working_distance")
212 quantity : Any
213 The quantity to normalize. Can be:
214 - Pint Quantity object (will be converted)
215 - String (returned unchanged - use parse_quantity first)
216 - Numeric value (returned unchanged)
217 - None (returned unchanged)
219 Returns
220 -------
221 Any
222 The normalized quantity in preferred units, or the original value
223 if not a Quantity or no preferred unit is defined
225 Examples
226 --------
227 >>> voltage = ureg.Quantity(10000, "volt")
228 >>> normalized = normalize_quantity("acceleration_voltage", voltage)
229 >>> print(normalized)
230 10.0 kilovolt
232 >>> current = ureg.Quantity(0.1, "nanoampere")
233 >>> normalized = normalize_quantity("beam_current", current)
234 >>> print(normalized)
235 100.0 picoampere
237 >>> # Non-Quantity values pass through
238 >>> normalize_quantity("unknown_field", "some string")
239 'some string'
241 >>> # Fields without preferred units return unchanged
242 >>> qty = ureg.Quantity(5.0, "furlong")
243 >>> normalize_quantity("custom_field", qty) == qty
244 True
245 """
246 # Only process Pint Quantity objects
247 if not isinstance(quantity, ureg.Quantity):
248 return quantity
250 # Get preferred unit for this field
251 preferred_unit = PREFERRED_UNITS.get(field_name)
253 if preferred_unit is None:
254 # No preferred unit defined, return as-is
255 return quantity
257 try:
258 # Convert to preferred unit
259 return quantity.to(preferred_unit)
260 except Exception as e:
261 # Log conversion error but don't fail - return original
262 logger.warning(
263 "Could not convert %s from %s to %s: %s. Returning original value.",
264 field_name,
265 quantity.units,
266 preferred_unit,
267 e,
268 )
269 return quantity
272def parse_quantity(field_name: str, value: Any) -> Any:
273 """
274 Parse a value into a Pint Quantity and normalize to preferred units.
276 Accepts multiple input types:
277 - Pint Quantity: Normalized to preferred units
278 - String: Parsed as quantity (e.g., "10 kV", "5.2 mm")
279 - Numeric: Assumed to be in preferred units for field
280 - None: Passed through unchanged
282 Parameters
283 ----------
284 field_name : str
285 The metadata field name (e.g., "acceleration_voltage")
286 value : Any
287 The value to parse. Can be Quantity, string, numeric, or None
289 Returns
290 -------
291 Any
292 Pint Quantity in preferred units, or original value if unparseable
294 Examples
295 --------
296 >>> qty = parse_quantity("acceleration_voltage", "10 kV")
297 >>> print(qty)
298 10.0 kilovolt
300 >>> qty = parse_quantity("working_distance", 5.2) # Assumes mm
301 >>> print(qty)
302 5.2 millimeter
304 >>> qty = parse_quantity("beam_current", ureg.Quantity(0.1, "nA"))
305 >>> print(qty)
306 100.0 picoampere
308 >>> parse_quantity("operator", None) is None
309 True
310 """
311 # Pass through None
312 if value is None:
313 return value
315 # If already a Quantity, normalize it
316 if isinstance(value, ureg.Quantity):
317 return normalize_quantity(field_name, value)
319 # Try parsing string as quantity
320 if isinstance(value, str):
321 try:
322 qty = ureg.Quantity(value)
323 return normalize_quantity(field_name, qty)
324 except Exception as e:
325 logger.debug(
326 "Could not parse '%s' as quantity for %s: %s", value, field_name, e
327 )
329 # For numeric values, assume they're in the preferred unit
330 if isinstance(value, (int, float)):
331 preferred_unit = PREFERRED_UNITS.get(field_name)
332 if preferred_unit is not None:
333 return ureg.Quantity(value, preferred_unit)
335 # All other cases (unparseable strings, unknown types, or no preferred unit)
336 return value
339def quantity_to_xml_parts(
340 field_name: str, quantity: Any
341) -> tuple[str, str, str | None]:
342 """
343 Convert a field name and quantity to XML serialization parts.
345 Extracts the display name, numeric value, and unit string for XML
346 serialization. This enables clean XML output like:
347 ``<meta name="Voltage" unit="kV">10.0</meta>``
349 Parameters
350 ----------
351 field_name : str
352 The internal field name (e.g., "acceleration_voltage")
353 quantity : Any
354 The quantity value (Pint Quantity, string, or numeric)
356 Returns
357 -------
358 tuple[str, str, str | None]
359 A 3-tuple of (display_name, value_string, unit_string)
360 - display_name: Human-readable field name for XML
361 - value_string: Numeric value as string
362 - unit_string: Unit abbreviation, or None if dimensionless/non-quantity
364 Examples
365 --------
366 >>> qty = ureg.Quantity(10.0, "kilovolt")
367 >>> name, value, unit = quantity_to_xml_parts("acceleration_voltage", qty)
368 >>> print(f"<meta name='{name}' unit='{unit}'>{value}</meta>")
369 <meta name='Voltage' unit='kV'>10.0</meta>
371 >>> qty = ureg.Quantity(5000, "dimensionless")
372 >>> name, value, unit = quantity_to_xml_parts("magnification", qty)
373 >>> print(f"<meta name='{name}'>{value}</meta>") # No unit attr
374 <meta name='Magnification'>5000</meta>
376 Notes
377 -----
378 For non-Quantity values, the value is converted to string and unit is None.
379 Display name mapping is handled by separate EM Glossary utilities.
380 """
381 from nexusLIMS.schemas.em_glossary import ( # noqa: PLC0415
382 get_display_name,
383 ) # Import here to avoid circular imports
385 display_name = get_display_name(field_name)
387 if isinstance(quantity, ureg.Quantity):
388 # Format magnitude (use scientific notation for very small/large)
389 magnitude = quantity.magnitude
390 if (
391 abs(magnitude) < _MIN_MAGNITUDE_FOR_NORMAL_NOTATION
392 or abs(magnitude) > _MAX_MAGNITUDE_FOR_NORMAL_NOTATION
393 ):
394 value_str = f"{magnitude:.6e}"
395 else:
396 value_str = f"{magnitude:.6g}"
398 # Get unit string (use compact format)
399 unit_str = f"{quantity.units:~}" # Compact format (kV instead of kilovolt)
401 # Handle dimensionless
402 if quantity.dimensionless:
403 unit_str = None
405 return display_name, value_str, unit_str
407 # Non-Quantity value
408 return display_name, str(quantity), None
411def get_qudt_uri(quantity: Any) -> str | None:
412 """
413 Get the QUDT URI for a Pint Quantity's unit.
415 Returns the QUDT (Quantities, Units, Dimensions and Data Types) ontology
416 URI for the quantity's unit. This enables Tier 3 semantic web integration.
418 The mapping is loaded dynamically from the QUDT unit vocabulary file
419 (qudt_unit.ttl) using RDFLib.
421 Parameters
422 ----------
423 quantity : Any
424 A Pint Quantity object
426 Returns
427 -------
428 str or None
429 QUDT URI string, or None if not a Quantity or URI not found
431 Examples
432 --------
433 >>> qty = ureg.Quantity(10, "kilovolt")
434 >>> get_qudt_uri(qty)
435 'http://qudt.org/vocab/unit/KiloV'
437 >>> qty = ureg.Quantity(5.2, "millimeter")
438 >>> get_qudt_uri(qty)
439 'http://qudt.org/vocab/unit/MilliM'
441 >>> get_qudt_uri("not a quantity")
442 # Returns None
443 """
444 if not isinstance(quantity, ureg.Quantity):
445 return None
447 # Get unit string (full name, lowercase, no spaces for matching)
448 unit_str = str(quantity.units).lower().replace(" ", "")
450 # Look up in QUDT mapping (loaded from TTL file)
451 qudt_map = _get_qudt_uri_mapping()
452 return qudt_map.get(unit_str)
455def serialize_quantity(quantity: Any) -> dict[str, Any]:
456 """
457 Serialize a Pint Quantity to a JSON-compatible dictionary.
459 Converts a Quantity to a dict with 'value' and 'units' keys.
460 Used for internal storage or JSON export. For XML serialization,
461 use :func:`quantity_to_xml_parts` instead.
463 Parameters
464 ----------
465 quantity : Any
466 A Pint Quantity object, or other value to serialize
468 Returns
469 -------
470 dict[str, Any]
471 Dictionary with 'value' and 'units' keys if Quantity,
472 or {'value': quantity} for non-Quantity values
474 Examples
475 --------
476 >>> qty = ureg.Quantity(10, "kilovolt")
477 >>> serialize_quantity(qty)
478 {'value': 10.0, 'units': 'kilovolt'}
480 >>> serialize_quantity("some string")
481 {'value': 'some string'}
482 """
483 if isinstance(quantity, ureg.Quantity):
484 return {
485 "value": quantity.magnitude,
486 "units": str(quantity.units),
487 }
488 return {"value": quantity}
491def deserialize_quantity(data: dict[str, Any]) -> Any:
492 """
493 Deserialize a dictionary back to a Pint Quantity.
495 Reverses the operation of :func:`serialize_quantity`. Takes a dict
496 with 'value' and 'units' keys and reconstructs the Quantity.
498 Parameters
499 ----------
500 data : dict[str, Any]
501 Dictionary with 'value' and 'units' keys, or just 'value' key
503 Returns
504 -------
505 Any
506 Pint Quantity if dict has value/units, otherwise the 'value' field
508 Examples
509 --------
510 >>> data = {'value': 10.0, 'units': 'kilovolt'}
511 >>> qty = deserialize_quantity(data)
512 >>> print(qty)
513 10.0 kilovolt
515 >>> data = {'value': 'some string'}
516 >>> deserialize_quantity(data)
517 'some string'
518 """
519 if "units" in data:
520 return ureg.Quantity(data["value"], data["units"])
521 return data.get("value")