Coverage for nexusLIMS/extractors/plugins/edax.py: 100%
106 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""EDAX EDS spectrum (.spc/.msa) extractor plugin."""
3import contextlib
4import logging
5from typing import Any, ClassVar
7from hyperspy.io import load
9from nexusLIMS.extractors.base import ExtractionContext
10from nexusLIMS.extractors.utils import _set_instr_name_and_time, add_to_extensions
11from nexusLIMS.instruments import get_instr_from_filepath
12from nexusLIMS.schemas.units import ureg
13from nexusLIMS.utils.dicts import try_getting_dict_value
15_logger = logging.getLogger(__name__)
18class SpcExtractor:
19 """
20 Extractor for EDAX .spc files.
22 This extractor handles metadata extraction from .spc files saved by
23 EDAX EDS software (Genesis, TEAM, etc.).
24 """
26 name = "spc_extractor"
27 priority = 100
28 supported_extensions: ClassVar = {"spc"}
30 def supports(self, context: ExtractionContext) -> bool:
31 """
32 Check if this extractor supports the given file.
34 Parameters
35 ----------
36 context
37 The extraction context containing file information
39 Returns
40 -------
41 bool
42 True if file extension is .spc
43 """
44 extension = context.file_path.suffix.lower().lstrip(".")
45 return extension == "spc"
47 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]:
48 """
49 Extract metadata from a .spc file.
51 Returns the metadata (as a list of dicts) from a .spc file.
52 This type of file is produced by EDAX EDS software. It is read by HyperSpy's
53 file reader and relevant metadata extracted and returned
55 Parameters
56 ----------
57 context
58 The extraction context containing file information
60 Returns
61 -------
62 list[dict]
63 List containing a single metadata dict with 'nx_meta' key.
64 If None, the file could not be opened
65 """
66 filename = context.file_path
67 _logger.debug("Extracting metadata from SPC file: %s", filename)
69 mdict = {"nx_meta": {}}
71 # assume all .spc datasets are EDS single spectra
72 mdict["nx_meta"]["DatasetType"] = "Spectrum"
73 mdict["nx_meta"]["Data Type"] = "EDS_Spectrum"
75 _set_instr_name_and_time(mdict, filename)
77 s = load(filename, lazy=True)
79 # original_metadata puts the entire xml under the root node "spc_header",
80 # so this will just bump that all up to the root level for ease of use.
81 mdict["original_metadata"] = s.original_metadata["spc_header"].as_dictionary()
83 # Map input field names to (output_name, unit) tuples
84 # If unit is None, value is stored as-is; otherwise, create Pint Quantity
85 term_mapping = {
86 "azimuth": ("Azimuthal Angle", "degree"),
87 "liveTime": ("Live Time", "second"),
88 "detReso": ("Detector Energy Resolution", "electron_volt"),
89 "elevation": ("Elevation Angle", "degree"),
90 "evPerChan": ("Channel Size", "electron_volt"),
91 "kV": ("Accelerating Voltage", "kilovolt"),
92 "numPts": ("Number of Spectrum Channels", None),
93 "startEnergy": ("Starting Energy", "kiloelectron_volt"),
94 "endEnergy": ("Ending Energy", "kiloelectron_volt"),
95 "tilt": ("Stage Tilt", "degree"),
96 }
98 for in_term, (out_name, unit) in term_mapping.items():
99 val = try_getting_dict_value(mdict["original_metadata"], in_term)
100 if val is not None:
101 if unit is not None:
102 with contextlib.suppress(ValueError, TypeError):
103 val = ureg.Quantity(val, unit)
104 mdict["nx_meta"][out_name] = val
106 # add any elements present:
107 if "Sample" in s.metadata and "elements" in s.metadata.Sample:
108 mdict["nx_meta"]["Elements"] = s.metadata.Sample.elements
110 # Move vendor-specific fields to extensions
111 mdict = self._migrate_to_schema_compliant_metadata(mdict)
113 return [mdict]
115 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict:
116 """
117 Migrate metadata to schema-compliant format.
119 Moves EDAX-specific fields to extensions section.
121 Parameters
122 ----------
123 mdict
124 Metadata dictionary with nx_meta containing extracted fields
126 Returns
127 -------
128 dict
129 Metadata dictionary with schema-compliant nx_meta structure
130 """
131 nx_meta = mdict.get("nx_meta", {})
132 extensions = {}
134 # These EDAX-specific fields go to extensions
135 vendor_fields = {
136 "Azimuthal Angle",
137 "Live Time",
138 "Detector Energy Resolution",
139 "Elevation Angle",
140 "Channel Size",
141 "Accelerating Voltage",
142 "Number of Spectrum Channels",
143 "Starting Energy",
144 "Ending Energy",
145 "Stage Tilt",
146 "Elements",
147 }
149 # Build new nx_meta with core fields only
150 new_nx_meta = {}
151 for field in [
152 "DatasetType",
153 "Data Type",
154 "Creation Time",
155 "Instrument ID",
156 "warnings",
157 ]:
158 if field in nx_meta:
159 new_nx_meta[field] = nx_meta[field]
161 # Move vendor fields to extensions
162 for field_name, value in nx_meta.items():
163 if field_name in vendor_fields:
164 extensions[field_name] = value
165 elif field_name not in new_nx_meta:
166 # Any other unknown fields also go to extensions
167 extensions[field_name] = value
169 # Add extensions if we have any
170 for key, value in extensions.items():
171 add_to_extensions(new_nx_meta, key, value)
173 mdict["nx_meta"] = new_nx_meta
174 return mdict
177class MsaExtractor:
178 """
179 Extractor for EMSA/MAS .msa spectrum files.
181 This extractor handles metadata extraction from .msa files, which may be
182 saved by various EDS acquisition software packages, most commonly as exports
183 from EDAX or Oxford software.
184 """
186 name = "msa_extractor"
187 priority = 100
188 supported_extensions: ClassVar = {"msa"}
190 def supports(self, context: ExtractionContext) -> bool:
191 """
192 Check if this extractor supports the given file.
194 Parameters
195 ----------
196 context
197 The extraction context containing file information
199 Returns
200 -------
201 bool
202 True if file extension is .msa
203 """
204 extension = context.file_path.suffix.lower().lstrip(".")
205 return extension == "msa"
207 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]:
208 """
209 Extract metadata from an .msa file.
211 Returns the metadata (as a list of dicts) from an .msa spectrum file.
212 This file may be saved by a number of different EDS acquisition software, but
213 most often is produced as an export from EDAX or Oxford software. This format is
214 a standard, but vendors (such as EDAX) often add other values into the metadata
215 header. See https://www.microscopy.org/resources/scientific_data/ for the fomal
216 specification.
218 Parameters
219 ----------
220 context
221 The extraction context containing file information
223 Returns
224 -------
225 list[dict]
226 List containing a single metadata dict with 'nx_meta' key.
227 If None, the file could not be opened
228 """
229 filename = context.file_path
230 _logger.debug("Extracting metadata from MSA file: %s", filename)
232 s = load(filename, lazy=False)
233 mdict = {"nx_meta": {}}
234 mdict["original_metadata"] = s.original_metadata.as_dictionary()
236 # assume all .spc datasets are EDS single spectra
237 mdict["nx_meta"]["DatasetType"] = "Spectrum"
238 mdict["nx_meta"]["Data Type"] = "EDS_Spectrum"
240 _set_instr_name_and_time(mdict, filename)
242 # Map input field names to (output_name, unit) tuples
243 # If unit is None, value is stored as-is; otherwise, create Pint Quantity
244 term_mapping = {
245 "AZIMANGLE-dg": ("Azimuthal Angle", "degree"),
246 "AmpTime (usec)": ("Amplifier Time", "microsecond"),
247 "Analyzer Type": ("Analyzer Type", None),
248 "BEAMKV -kV": ("Beam Energy", "kiloelectron_volt"),
249 "CHOFFSET": ("Channel Offset", None),
250 "COMMENT": ("EDAX Comment", None),
251 "DATATYPE": ("Data Format", None),
252 "DATE": ("EDAX Date", None),
253 "ELEVANGLE-dg": ("Elevation Angle", "degree"),
254 "Elements": ("User-Selected Elements", None),
255 "FILENAME": ("Originating File of MSA Export", None),
256 "FORMAT": ("File Format", None),
257 "FPGA Version": ("FPGA Version", None),
258 "LIVETIME -s": ("Live Time", "second"),
259 "NCOLUMNS": ("Number of Data Columns", None),
260 "NPOINTS": ("Number of Data Points", None),
261 "OFFSET": ("Offset", None),
262 "OWNER": ("EDAX Owner", None),
263 "REALTIME -s": ("Real Time", "second"),
264 "RESO (MnKa)": ("Energy Resolution", "electron_volt"),
265 "SIGNALTYPE": ("Signal Type", None),
266 "TACTYLR -cm": ("Active Layer Thickness", "centimeter"),
267 "TBEWIND -cm": ("Be Window Thickness", "centimeter"),
268 "TDEADLYR -cm": ("Dead Layer Thickness", "centimeter"),
269 "TIME": ("EDAX Time", None),
270 "TITLE": ("EDAX Title", None),
271 "TakeOff Angle": ("TakeOff Angle", "degree"),
272 "Tilt Angle": ("Stage Tilt", "degree"),
273 "VERSION": ("MSA Format Version", None),
274 "XLABEL": ("X Column Label", None),
275 "XPERCHAN": ("X Units Per Channel", None),
276 "XUNITS": ("X Column Units", None),
277 "YLABEL": ("Y Column Label", None),
278 "YUNITS": ("Y Column Units", None),
279 }
281 for in_term, (out_name, unit) in term_mapping.items():
282 val = try_getting_dict_value(mdict["original_metadata"], in_term)
283 if val is not None:
284 if unit is not None:
285 with contextlib.suppress(ValueError, TypeError):
286 val = ureg.Quantity(val, unit)
287 mdict["nx_meta"][out_name] = val
289 # Move vendor-specific fields to extensions
290 mdict = self._migrate_to_schema_compliant_metadata(mdict)
292 return [mdict]
294 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict:
295 """
296 Migrate metadata to schema-compliant format.
298 Moves EDAX/EMSA-specific fields to extensions section.
300 Parameters
301 ----------
302 mdict
303 Metadata dictionary with nx_meta containing extracted fields
305 Returns
306 -------
307 dict
308 Metadata dictionary with schema-compliant nx_meta structure
309 """
310 nx_meta = mdict.get("nx_meta", {})
311 extensions = {}
313 # These EDAX/EMSA-specific fields go to extensions
314 vendor_fields = {
315 "Azimuthal Angle",
316 "Live Time",
317 "Detector Energy Resolution",
318 "Elevation Angle",
319 "Channel Size",
320 "Accelerating Voltage",
321 "Number of Spectrum Channels",
322 "Starting Energy",
323 "Ending Energy",
324 "Stage Tilt",
325 "Elements",
326 }
328 # Build new nx_meta with core fields only
329 new_nx_meta = {}
330 for field in [
331 "DatasetType",
332 "Data Type",
333 "Creation Time",
334 "Instrument ID",
335 "warnings",
336 ]:
337 if field in nx_meta:
338 new_nx_meta[field] = nx_meta[field]
340 # Move vendor fields to extensions
341 for field_name, value in nx_meta.items():
342 if field_name in vendor_fields:
343 extensions[field_name] = value
344 elif field_name not in new_nx_meta:
345 # Any other unknown fields also go to extensions
346 extensions[field_name] = value
348 # Add extensions if we have any
349 for key, value in extensions.items():
350 add_to_extensions(new_nx_meta, key, value)
352 mdict["nx_meta"] = new_nx_meta
353 return mdict
356# Backward compatibility functions for tests
357def get_spc_metadata(filename):
358 """
359 Get metadata from a .spc file.
361 .. deprecated:: 1.4.0
362 This function is deprecated. Use :class:`SpcExtractor` class instead.
364 Parameters
365 ----------
366 filename : pathlib.Path
367 path to a file saved in the harvested directory of the instrument
369 Returns
370 -------
371 mdict : dict
372 A description of the file's metadata.
373 """
374 context = ExtractionContext(
375 file_path=filename, instrument=get_instr_from_filepath(filename)
376 )
377 extractor = SpcExtractor()
378 return extractor.extract(context)
381def get_msa_metadata(filename):
382 """
383 Get metadata from an .msa file.
385 .. deprecated:: 1.4.0
386 This function is deprecated. Use :class:`MsaExtractor` class instead.
388 Parameters
389 ----------
390 filename : pathlib.Path
391 path to a file saved in the harvested directory of the instrument
393 Returns
394 -------
395 mdict : dict
396 A description of the file's metadata.
397 """
398 context = ExtractionContext(
399 file_path=filename, instrument=get_instr_from_filepath(filename)
400 )
401 extractor = MsaExtractor()
402 return extractor.extract(context)