Coverage for nexusLIMS/extractors/xml_serialization.py: 100%
49 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""
2XML serialization utilities for NexusLIMS metadata schemas.
4This module provides utilities for converting type-specific metadata schemas
5(using Pint Quantities and EM Glossary terminology) into XML format compatible
6with the Nexus Experiment schema.
8Key Functions
9-------------
10- :func:`serialize_quantity_to_xml`: Convert Pint Quantities to value/unit pairs for XML
11- :func:`get_xml_field_name`: Map EM Glossary field name to human-friendly display name
12- :func:`prepare_metadata_for_xml`: Convert rich metadata to XML-compatible flat dict
14Examples
15--------
16Convert a Pint Quantity to XML:
18>>> from nexusLIMS.schemas.units import ureg
19>>> qty = ureg.Quantity(10, "kilovolt")
20>>> value, unit = serialize_quantity_to_xml(qty)
21>>> value, unit
22(10.0, 'kV')
24Get human-readable field name for XML:
26>>> get_xml_field_name("acceleration_voltage")
27'Voltage'
28>>> get_xml_field_name("working_distance")
29'Working Distance'
30"""
32from typing import Any
34from pint import Quantity
36from nexusLIMS.schemas import em_glossary
37from nexusLIMS.schemas.units import get_qudt_uri as _get_qudt_uri
38from nexusLIMS.schemas.units import ureg
40EM_GLOSSARY_TO_XML_DISPLAY_NAMES = {
41 # Imaging fields (common)
42 "acceleration_voltage": "Voltage",
43 "working_distance": "Working Distance",
44 "beam_current": "Beam Current",
45 "emission_current": "Emission Current",
46 "magnification": "Magnification",
47 "dwell_time": "Pixel Dwell Time",
48 "horizontal_field_width": "Horizontal Field Width",
49 "pixel_width": "Pixel Width",
50 "scan_rotation": "Scan Rotation",
51 "detector_type": "Detector",
52 # Spectrum fields
53 "acquisition_time": "Acquisition Time",
54 "live_time": "Live Time",
55 "detector_energy_resolution": "Energy Resolution",
56 "channel_size": "Channel Size",
57 "starting_energy": "Starting Energy",
58 "azimuthal_angle": "Azimuthal Angle",
59 "elevation_angle": "Elevation Angle",
60 "elements": "Elements",
61 # Diffraction fields
62 "camera_length": "Camera Length",
63 "convergence_angle": "Convergence Angle",
64 "diffraction_mode": "Diffraction Mode",
65 # Stage position fields
66 "stage_position": "Stage Position",
67 "stage_x": "Stage X",
68 "stage_y": "Stage Y",
69 "stage_z": "Stage Z",
70 "stage_tilt": "Stage Tilt",
71 "stage_tilt_alpha": "Stage Tilt", # Primary tilt axis
72 "stage_tilt_beta": "Stage Tilt Beta", # Secondary tilt axis
73 "stage_rotation": "Stage Rotation",
74 # Data fields (core)
75 "acquisition_timestamp": "Creation Time",
76 "data_type": "Data Type",
77 "dataset_type": "DatasetType",
78 "data_dimensions": "Data Dimensions",
79 "instrument_id": "Instrument ID",
80 # Legacy/compatibility fields (old schema)
81 "Voltage": "Voltage",
82 "Working Distance": "Working Distance",
83 "Beam Current": "Beam Current",
84 "Magnification": "Magnification",
85 "Detector": "Detector",
86 "Creation Time": "Creation Time",
87 "Data Type": "Data Type",
88 "DatasetType": "DatasetType",
89 "Data Dimensions": "Data Dimensions",
90 "Instrument ID": "Instrument ID",
91}
92"""
93Mapping from EM Glossary field names to human-readable XML display names.
94This maintains backward compatibility with existing XML field names.
95"""
98def serialize_quantity_to_xml(qty: Quantity) -> tuple[float, str]:
99 """
100 Convert a Pint Quantity to value and unit strings for XML serialization.
102 This function extracts the magnitude and unit from a Pint Quantity object
103 and formats them for use in XML meta elements with the `unit` attribute.
105 Parameters
106 ----------
107 qty : :class:`pint.Quantity`
108 The Pint Quantity object to serialize
110 Returns
111 -------
112 value : float
113 The numeric magnitude of the quantity
114 unit : str
115 The unit symbol in compact form (e.g., "kV", "mm", "pA")
117 Examples
118 --------
119 >>> from nexusLIMS.schemas.units import ureg
120 >>> qty = ureg.Quantity(10, "kilovolt")
121 >>> value, unit = serialize_quantity_to_xml(qty)
122 >>> value
123 10.0
124 >>> unit
125 'kV'
127 >>> qty = ureg.Quantity(5.2, "millimeter")
128 >>> value, unit = serialize_quantity_to_xml(qty)
129 >>> value
130 5.2
131 >>> unit
132 'mm'
134 Notes
135 -----
136 The unit is formatted using Pint's compact format (~) which produces
137 short unit symbols suitable for display in XML attributes.
138 """
139 # Extract magnitude as float
140 magnitude = float(qty.magnitude)
142 # Format unit in compact form (e.g., "kV" instead of "kilovolt")
143 unit_str = f"{qty.units:~}"
145 return magnitude, unit_str
148def get_xml_field_name(field_name: str) -> str:
149 """
150 Map an EM Glossary field name to a human-readable XML display name.
152 This function provides the translation layer between EM Glossary terminology
153 (used internally in metadata schemas) and the human-readable field names
154 used in XML output. It maintains backward compatibility with existing XML
155 field names.
157 Parameters
158 ----------
159 field_name : str
160 The internal EM Glossary field name (e.g., "acceleration_voltage")
162 Returns
163 -------
164 display_name : str
165 The human-readable display name for XML (e.g., "Voltage")
167 Examples
168 --------
169 >>> get_xml_field_name("acceleration_voltage")
170 'Voltage'
171 >>> get_xml_field_name("working_distance")
172 'Working Distance'
173 >>> get_xml_field_name("detector_type")
174 'Detector'
176 For unknown fields, returns the field name with underscores replaced by spaces
177 and title-cased:
179 >>> get_xml_field_name("some_custom_field")
180 'Some Custom Field'
182 Notes
183 -----
184 This function prioritizes backward compatibility with existing XML field names.
185 New fields should be added to EM_GLOSSARY_TO_XML_DISPLAY_NAMES to control
186 their XML representation.
187 """
188 # Check if we have an explicit mapping
189 if field_name in EM_GLOSSARY_TO_XML_DISPLAY_NAMES:
190 return EM_GLOSSARY_TO_XML_DISPLAY_NAMES[field_name]
192 # For unknown fields, convert snake_case to Title Case
193 # This handles instrument-specific fields not in the mapping
194 return field_name.replace("_", " ").title()
197def prepare_metadata_for_xml(
198 metadata: dict[str, Any],
199) -> dict[str, str | float]:
200 """
201 Prepare rich metadata for XML serialization.
203 Converts metadata from the new schema format (with Pint Quantities, nested
204 structures, etc.) into a flat dictionary suitable for XML serialization.
205 This includes:
207 1. Converting Pint Quantity objects to separate value/unit entries
208 2. Flattening nested structures (like StagePosition)
209 3. Mapping EM Glossary field names to XML display names
210 4. Preserving non-Quantity values as-is
212 Parameters
213 ----------
214 metadata : dict[str, Any]
215 Metadata dictionary from type-specific schema (ImageMetadata, etc.)
216 May contain Pint Quantities, nested dicts, or simple values
218 Returns
219 -------
220 xml_metadata : dict[str, str | float]
221 Flat dictionary with XML-compatible field names and values.
222 For Quantity fields, creates two entries:
223 - "<field_name>": numeric value
224 - "<field_name>_unit": unit string
226 Examples
227 --------
228 >>> from nexusLIMS.schemas.units import ureg
229 >>> metadata = {
230 ... "acceleration_voltage": ureg.Quantity(10, "kilovolt"),
231 ... "magnification": 50000,
232 ... "detector_type": "ETD",
233 ... }
234 >>> xml_dict = prepare_metadata_for_xml(metadata)
235 >>> xml_dict["Voltage"]
236 10.0
237 >>> xml_dict["Voltage_unit"]
238 'kV'
239 >>> xml_dict["Magnification"]
240 50000
241 >>> xml_dict["Detector"]
242 'ETD'
244 Notes
245 -----
246 This function is designed to work with both the new schema format and
247 legacy metadata dicts for backward compatibility during migration.
248 """
249 xml_dict = {}
251 for field_name, value in metadata.items():
252 # Skip None values and internal fields
253 if value is None:
254 continue
255 if field_name in {"warnings", "schema_version", "extensions"}:
256 continue
258 # Get the XML display name for this field
259 xml_name = get_xml_field_name(field_name)
261 # Handle Pint Quantity objects
262 if isinstance(value, Quantity):
263 magnitude, unit = serialize_quantity_to_xml(value)
264 xml_dict[xml_name] = magnitude
265 xml_dict[f"{xml_name}_unit"] = unit
267 # Handle nested StagePosition dict
268 elif field_name == "stage_position" and isinstance(value, dict):
269 for axis, axis_value in value.items():
270 if axis_value is None:
271 continue
272 axis_xml_name = get_xml_field_name(f"stage_{axis.lower()}")
273 if isinstance(axis_value, Quantity):
274 mag, unit = serialize_quantity_to_xml(axis_value)
275 xml_dict[axis_xml_name] = mag
276 xml_dict[f"{axis_xml_name}_unit"] = unit
277 else:
278 xml_dict[axis_xml_name] = axis_value
280 # Handle list values (e.g., elements list)
281 elif isinstance(value, list):
282 # Convert list to comma-separated string
283 xml_dict[xml_name] = ", ".join(str(v) for v in value)
285 # Handle all other values (strings, numbers, etc.)
286 else:
287 xml_dict[xml_name] = value
289 return xml_dict
292def get_qudt_uri(field_name: str, unit: str) -> str | None: # noqa: ARG001
293 """
294 Get the QUDT URI for a given field's unit.
296 This function looks up the QUDT (Quantities, Units, Dimensions and Types)
297 ontology URI for a given unit string. Used for Tier 3 semantic web
298 integration (future enhancement).
300 Parameters
301 ----------
302 field_name : str
303 The field name (currently unused, reserved for future context-aware lookups)
304 unit : str
305 The unit string in compact form (e.g., "kV", "mm", "pA")
307 Returns
308 -------
309 qudt_uri : str or None
310 The QUDT URI for this unit, or None if no mapping exists
312 Examples
313 --------
314 >>> get_qudt_uri("acceleration_voltage", "kV") # doctest: +SKIP
315 'http://qudt.org/vocab/unit/KiloV'
316 >>> get_qudt_uri("working_distance", "mm") # doctest: +SKIP
317 'http://qudt.org/vocab/unit/MilliM'
319 Notes
320 -----
321 This function is currently a placeholder for Tier 3 implementation.
322 It will use the QUDT mapping system from `nexusLIMS.schemas.units`
323 when Tier 3 semantic attributes are added to the XML schema.
324 """
325 # Parse unit string to Pint unit and create a Quantity
326 try:
327 # Create a quantity with magnitude 1 to get the unit object
328 qty = ureg.Quantity(1, unit)
329 except Exception:
330 return None
332 # Look up QUDT URI using the Quantity object
333 return _get_qudt_uri(qty)
336def get_emg_id(field_name: str) -> str | None:
337 """
338 Get the EM Glossary ID for a given field name.
340 This function looks up the EM Glossary term ID for a field name,
341 if one exists. Used for Tier 3 semantic web integration (future enhancement).
343 Parameters
344 ----------
345 field_name : str
346 The internal field name (e.g., "acceleration_voltage")
348 Returns
349 -------
350 emg_id : str or None
351 The EM Glossary ID (e.g., "EMG_00000004"), or None if no mapping exists
353 Examples
354 --------
355 >>> get_emg_id("acceleration_voltage")
356 'EMG_00000004'
357 >>> get_emg_id("working_distance")
358 'EMG_00000050'
359 >>> get_emg_id("some_custom_field")
361 Notes
362 -----
363 This function is used for Tier 3 implementation where EM Glossary IDs
364 are added as XML attributes for semantic traceability.
365 """
366 return em_glossary.get_emg_id(field_name)