Coverage for nexusLIMS/extractors/plugins/edax.py: 100%

106 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1"""EDAX EDS spectrum (.spc/.msa) extractor plugin.""" 

2 

3import contextlib 

4import logging 

5from typing import Any, ClassVar 

6 

7from hyperspy.io import load 

8 

9from nexusLIMS.extractors.base import ExtractionContext 

10from nexusLIMS.extractors.utils import _set_instr_name_and_time, add_to_extensions 

11from nexusLIMS.instruments import get_instr_from_filepath 

12from nexusLIMS.schemas.units import ureg 

13from nexusLIMS.utils.dicts import try_getting_dict_value 

14 

15_logger = logging.getLogger(__name__) 

16 

17 

18class SpcExtractor: 

19 """ 

20 Extractor for EDAX .spc files. 

21 

22 This extractor handles metadata extraction from .spc files saved by 

23 EDAX EDS software (Genesis, TEAM, etc.). 

24 """ 

25 

26 name = "spc_extractor" 

27 priority = 100 

28 supported_extensions: ClassVar = {"spc"} 

29 

30 def supports(self, context: ExtractionContext) -> bool: 

31 """ 

32 Check if this extractor supports the given file. 

33 

34 Parameters 

35 ---------- 

36 context 

37 The extraction context containing file information 

38 

39 Returns 

40 ------- 

41 bool 

42 True if file extension is .spc 

43 """ 

44 extension = context.file_path.suffix.lower().lstrip(".") 

45 return extension == "spc" 

46 

47 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: 

48 """ 

49 Extract metadata from a .spc file. 

50 

51 Returns the metadata (as a list of dicts) from a .spc file. 

52 This type of file is produced by EDAX EDS software. It is read by HyperSpy's 

53 file reader and relevant metadata extracted and returned 

54 

55 Parameters 

56 ---------- 

57 context 

58 The extraction context containing file information 

59 

60 Returns 

61 ------- 

62 list[dict] 

63 List containing a single metadata dict with 'nx_meta' key. 

64 If None, the file could not be opened 

65 """ 

66 filename = context.file_path 

67 _logger.debug("Extracting metadata from SPC file: %s", filename) 

68 

69 mdict = {"nx_meta": {}} 

70 

71 # assume all .spc datasets are EDS single spectra 

72 mdict["nx_meta"]["DatasetType"] = "Spectrum" 

73 mdict["nx_meta"]["Data Type"] = "EDS_Spectrum" 

74 

75 _set_instr_name_and_time(mdict, filename) 

76 

77 s = load(filename, lazy=True) 

78 

79 # original_metadata puts the entire xml under the root node "spc_header", 

80 # so this will just bump that all up to the root level for ease of use. 

81 mdict["original_metadata"] = s.original_metadata["spc_header"].as_dictionary() 

82 

83 # Map input field names to (output_name, unit) tuples 

84 # If unit is None, value is stored as-is; otherwise, create Pint Quantity 

85 term_mapping = { 

86 "azimuth": ("Azimuthal Angle", "degree"), 

87 "liveTime": ("Live Time", "second"), 

88 "detReso": ("Detector Energy Resolution", "electron_volt"), 

89 "elevation": ("Elevation Angle", "degree"), 

90 "evPerChan": ("Channel Size", "electron_volt"), 

91 "kV": ("Accelerating Voltage", "kilovolt"), 

92 "numPts": ("Number of Spectrum Channels", None), 

93 "startEnergy": ("Starting Energy", "kiloelectron_volt"), 

94 "endEnergy": ("Ending Energy", "kiloelectron_volt"), 

95 "tilt": ("Stage Tilt", "degree"), 

96 } 

97 

98 for in_term, (out_name, unit) in term_mapping.items(): 

99 val = try_getting_dict_value(mdict["original_metadata"], in_term) 

100 if val is not None: 

101 if unit is not None: 

102 with contextlib.suppress(ValueError, TypeError): 

103 val = ureg.Quantity(val, unit) 

104 mdict["nx_meta"][out_name] = val 

105 

106 # add any elements present: 

107 if "Sample" in s.metadata and "elements" in s.metadata.Sample: 

108 mdict["nx_meta"]["Elements"] = s.metadata.Sample.elements 

109 

110 # Move vendor-specific fields to extensions 

111 mdict = self._migrate_to_schema_compliant_metadata(mdict) 

112 

113 return [mdict] 

114 

115 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: 

116 """ 

117 Migrate metadata to schema-compliant format. 

118 

119 Moves EDAX-specific fields to extensions section. 

120 

121 Parameters 

122 ---------- 

123 mdict 

124 Metadata dictionary with nx_meta containing extracted fields 

125 

126 Returns 

127 ------- 

128 dict 

129 Metadata dictionary with schema-compliant nx_meta structure 

130 """ 

131 nx_meta = mdict.get("nx_meta", {}) 

132 extensions = {} 

133 

134 # These EDAX-specific fields go to extensions 

135 vendor_fields = { 

136 "Azimuthal Angle", 

137 "Live Time", 

138 "Detector Energy Resolution", 

139 "Elevation Angle", 

140 "Channel Size", 

141 "Accelerating Voltage", 

142 "Number of Spectrum Channels", 

143 "Starting Energy", 

144 "Ending Energy", 

145 "Stage Tilt", 

146 "Elements", 

147 } 

148 

149 # Build new nx_meta with core fields only 

150 new_nx_meta = {} 

151 for field in [ 

152 "DatasetType", 

153 "Data Type", 

154 "Creation Time", 

155 "Instrument ID", 

156 "warnings", 

157 ]: 

158 if field in nx_meta: 

159 new_nx_meta[field] = nx_meta[field] 

160 

161 # Move vendor fields to extensions 

162 for field_name, value in nx_meta.items(): 

163 if field_name in vendor_fields: 

164 extensions[field_name] = value 

165 elif field_name not in new_nx_meta: 

166 # Any other unknown fields also go to extensions 

167 extensions[field_name] = value 

168 

169 # Add extensions if we have any 

170 for key, value in extensions.items(): 

171 add_to_extensions(new_nx_meta, key, value) 

172 

173 mdict["nx_meta"] = new_nx_meta 

174 return mdict 

175 

176 

177class MsaExtractor: 

178 """ 

179 Extractor for EMSA/MAS .msa spectrum files. 

180 

181 This extractor handles metadata extraction from .msa files, which may be 

182 saved by various EDS acquisition software packages, most commonly as exports 

183 from EDAX or Oxford software. 

184 """ 

185 

186 name = "msa_extractor" 

187 priority = 100 

188 supported_extensions: ClassVar = {"msa"} 

189 

190 def supports(self, context: ExtractionContext) -> bool: 

191 """ 

192 Check if this extractor supports the given file. 

193 

194 Parameters 

195 ---------- 

196 context 

197 The extraction context containing file information 

198 

199 Returns 

200 ------- 

201 bool 

202 True if file extension is .msa 

203 """ 

204 extension = context.file_path.suffix.lower().lstrip(".") 

205 return extension == "msa" 

206 

207 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: 

208 """ 

209 Extract metadata from an .msa file. 

210 

211 Returns the metadata (as a list of dicts) from an .msa spectrum file. 

212 This file may be saved by a number of different EDS acquisition software, but 

213 most often is produced as an export from EDAX or Oxford software. This format is 

214 a standard, but vendors (such as EDAX) often add other values into the metadata 

215 header. See https://www.microscopy.org/resources/scientific_data/ for the fomal 

216 specification. 

217 

218 Parameters 

219 ---------- 

220 context 

221 The extraction context containing file information 

222 

223 Returns 

224 ------- 

225 list[dict] 

226 List containing a single metadata dict with 'nx_meta' key. 

227 If None, the file could not be opened 

228 """ 

229 filename = context.file_path 

230 _logger.debug("Extracting metadata from MSA file: %s", filename) 

231 

232 s = load(filename, lazy=False) 

233 mdict = {"nx_meta": {}} 

234 mdict["original_metadata"] = s.original_metadata.as_dictionary() 

235 

236 # assume all .spc datasets are EDS single spectra 

237 mdict["nx_meta"]["DatasetType"] = "Spectrum" 

238 mdict["nx_meta"]["Data Type"] = "EDS_Spectrum" 

239 

240 _set_instr_name_and_time(mdict, filename) 

241 

242 # Map input field names to (output_name, unit) tuples 

243 # If unit is None, value is stored as-is; otherwise, create Pint Quantity 

244 term_mapping = { 

245 "AZIMANGLE-dg": ("Azimuthal Angle", "degree"), 

246 "AmpTime (usec)": ("Amplifier Time", "microsecond"), 

247 "Analyzer Type": ("Analyzer Type", None), 

248 "BEAMKV -kV": ("Beam Energy", "kiloelectron_volt"), 

249 "CHOFFSET": ("Channel Offset", None), 

250 "COMMENT": ("EDAX Comment", None), 

251 "DATATYPE": ("Data Format", None), 

252 "DATE": ("EDAX Date", None), 

253 "ELEVANGLE-dg": ("Elevation Angle", "degree"), 

254 "Elements": ("User-Selected Elements", None), 

255 "FILENAME": ("Originating File of MSA Export", None), 

256 "FORMAT": ("File Format", None), 

257 "FPGA Version": ("FPGA Version", None), 

258 "LIVETIME -s": ("Live Time", "second"), 

259 "NCOLUMNS": ("Number of Data Columns", None), 

260 "NPOINTS": ("Number of Data Points", None), 

261 "OFFSET": ("Offset", None), 

262 "OWNER": ("EDAX Owner", None), 

263 "REALTIME -s": ("Real Time", "second"), 

264 "RESO (MnKa)": ("Energy Resolution", "electron_volt"), 

265 "SIGNALTYPE": ("Signal Type", None), 

266 "TACTYLR -cm": ("Active Layer Thickness", "centimeter"), 

267 "TBEWIND -cm": ("Be Window Thickness", "centimeter"), 

268 "TDEADLYR -cm": ("Dead Layer Thickness", "centimeter"), 

269 "TIME": ("EDAX Time", None), 

270 "TITLE": ("EDAX Title", None), 

271 "TakeOff Angle": ("TakeOff Angle", "degree"), 

272 "Tilt Angle": ("Stage Tilt", "degree"), 

273 "VERSION": ("MSA Format Version", None), 

274 "XLABEL": ("X Column Label", None), 

275 "XPERCHAN": ("X Units Per Channel", None), 

276 "XUNITS": ("X Column Units", None), 

277 "YLABEL": ("Y Column Label", None), 

278 "YUNITS": ("Y Column Units", None), 

279 } 

280 

281 for in_term, (out_name, unit) in term_mapping.items(): 

282 val = try_getting_dict_value(mdict["original_metadata"], in_term) 

283 if val is not None: 

284 if unit is not None: 

285 with contextlib.suppress(ValueError, TypeError): 

286 val = ureg.Quantity(val, unit) 

287 mdict["nx_meta"][out_name] = val 

288 

289 # Move vendor-specific fields to extensions 

290 mdict = self._migrate_to_schema_compliant_metadata(mdict) 

291 

292 return [mdict] 

293 

294 def _migrate_to_schema_compliant_metadata(self, mdict: dict) -> dict: 

295 """ 

296 Migrate metadata to schema-compliant format. 

297 

298 Moves EDAX/EMSA-specific fields to extensions section. 

299 

300 Parameters 

301 ---------- 

302 mdict 

303 Metadata dictionary with nx_meta containing extracted fields 

304 

305 Returns 

306 ------- 

307 dict 

308 Metadata dictionary with schema-compliant nx_meta structure 

309 """ 

310 nx_meta = mdict.get("nx_meta", {}) 

311 extensions = {} 

312 

313 # These EDAX/EMSA-specific fields go to extensions 

314 vendor_fields = { 

315 "Azimuthal Angle", 

316 "Live Time", 

317 "Detector Energy Resolution", 

318 "Elevation Angle", 

319 "Channel Size", 

320 "Accelerating Voltage", 

321 "Number of Spectrum Channels", 

322 "Starting Energy", 

323 "Ending Energy", 

324 "Stage Tilt", 

325 "Elements", 

326 } 

327 

328 # Build new nx_meta with core fields only 

329 new_nx_meta = {} 

330 for field in [ 

331 "DatasetType", 

332 "Data Type", 

333 "Creation Time", 

334 "Instrument ID", 

335 "warnings", 

336 ]: 

337 if field in nx_meta: 

338 new_nx_meta[field] = nx_meta[field] 

339 

340 # Move vendor fields to extensions 

341 for field_name, value in nx_meta.items(): 

342 if field_name in vendor_fields: 

343 extensions[field_name] = value 

344 elif field_name not in new_nx_meta: 

345 # Any other unknown fields also go to extensions 

346 extensions[field_name] = value 

347 

348 # Add extensions if we have any 

349 for key, value in extensions.items(): 

350 add_to_extensions(new_nx_meta, key, value) 

351 

352 mdict["nx_meta"] = new_nx_meta 

353 return mdict 

354 

355 

356# Backward compatibility functions for tests 

357def get_spc_metadata(filename): 

358 """ 

359 Get metadata from a .spc file. 

360 

361 .. deprecated:: 1.4.0 

362 This function is deprecated. Use :class:`SpcExtractor` class instead. 

363 

364 Parameters 

365 ---------- 

366 filename : pathlib.Path 

367 path to a file saved in the harvested directory of the instrument 

368 

369 Returns 

370 ------- 

371 mdict : dict 

372 A description of the file's metadata. 

373 """ 

374 context = ExtractionContext( 

375 file_path=filename, instrument=get_instr_from_filepath(filename) 

376 ) 

377 extractor = SpcExtractor() 

378 return extractor.extract(context) 

379 

380 

381def get_msa_metadata(filename): 

382 """ 

383 Get metadata from an .msa file. 

384 

385 .. deprecated:: 1.4.0 

386 This function is deprecated. Use :class:`MsaExtractor` class instead. 

387 

388 Parameters 

389 ---------- 

390 filename : pathlib.Path 

391 path to a file saved in the harvested directory of the instrument 

392 

393 Returns 

394 ------- 

395 mdict : dict 

396 A description of the file's metadata. 

397 """ 

398 context = ExtractionContext( 

399 file_path=filename, instrument=get_instr_from_filepath(filename) 

400 ) 

401 extractor = MsaExtractor() 

402 return extractor.extract(context)