Coverage for nexusLIMS/extractors/plugins/tofwerk_pfib.py: 100%

123 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1"""Tofwerk fibTOF pFIB-ToF-SIMS HDF5 extractor plugin.""" 

2 

3from __future__ import annotations 

4 

5import contextlib 

6import logging 

7from datetime import UTC, datetime 

8from typing import TYPE_CHECKING, Any, ClassVar 

9 

10import h5py 

11import numpy as np 

12 

13from nexusLIMS.extractors.utils import _get_mtime_iso, add_to_extensions 

14from nexusLIMS.instruments import get_instr_from_filepath 

15from nexusLIMS.schemas.units import ureg 

16 

17if TYPE_CHECKING: 

18 from nexusLIMS.extractors.base import ExtractionContext 

19 

20_logger = logging.getLogger(__name__) 

21 

22 

23class TofwerkPfibExtractor: 

24 """ 

25 Extractor for Tofwerk fibTOF pFIB-ToF-SIMS HDF5 files. 

26 

27 Handles both raw files (no ``PeakData/PeakData``) and opened/processed files 

28 (has ``PeakData/PeakData`` with integrated peak intensities). Performs content 

29 sniffing to confirm the file is a Tofwerk fibTOF FIB-SIMS acquisition before 

30 attempting extraction. 

31 """ 

32 

33 name = "tofwerk_pfib_extractor" 

34 priority = 150 

35 supported_extensions: ClassVar = {"h5"} 

36 

37 def supports(self, context: ExtractionContext) -> bool: 

38 """ 

39 Check if this extractor supports the given file. 

40 

41 Performs content sniffing to verify this is a Tofwerk fibTOF FIB-SIMS HDF5 

42 file by checking for the presence of ``FullSpectra/SumSpectrum``, 

43 ``FIBParams``, ``FIBImages``, and the ``TofDAQ Version`` root attribute. 

44 

45 Parameters 

46 ---------- 

47 context 

48 The extraction context containing file information 

49 

50 Returns 

51 ------- 

52 bool 

53 True if this appears to be a Tofwerk fibTOF FIB-SIMS HDF5 file 

54 """ 

55 if context.file_path.suffix.lower() != ".h5": 

56 return False 

57 try: 

58 with h5py.File(context.file_path, "r") as f: 

59 return ( 

60 "FullSpectra/SumSpectrum" in f 

61 and "FIBParams" in f 

62 and "FIBImages" in f 

63 and "TofDAQ Version" in f.attrs 

64 ) 

65 except Exception: 

66 return False 

67 

68 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: 

69 """ 

70 Extract metadata from a Tofwerk fibTOF pFIB-ToF-SIMS HDF5 file. 

71 

72 Parameters 

73 ---------- 

74 context 

75 The extraction context containing file information 

76 

77 Returns 

78 ------- 

79 list[dict] 

80 List containing one metadata dict with ``nx_meta`` key 

81 """ 

82 nx_meta: dict[str, Any] = { 

83 "DatasetType": "SpectrumImage", 

84 "Data Type": "PFIB_TOFSIMS", 

85 } 

86 try: 

87 with h5py.File(context.file_path, "r") as f: 

88 nx_meta["Creation Time"] = _parse_creation_time(f, context.file_path) 

89 instr = get_instr_from_filepath(context.file_path) 

90 nx_meta["Instrument ID"] = instr.name if instr is not None else None 

91 

92 _extract_fib_params(f, nx_meta) 

93 _extract_spatial_dims(f, nx_meta) 

94 _extract_spectral_params(f, nx_meta) 

95 _extract_acquisition_params(f, nx_meta) 

96 

97 variant = "pre-processed" if "PeakData/PeakData" in f else "raw" 

98 add_to_extensions(nx_meta, "File Variant", variant) 

99 

100 except Exception: 

101 _logger.exception("Failed to extract metadata from %s", context.file_path) 

102 if "Creation Time" not in nx_meta: 

103 nx_meta["Creation Time"] = _get_mtime_iso(context.file_path) 

104 

105 return [{"nx_meta": nx_meta}] 

106 

107 

108# --------------------------------------------------------------------------- 

109# Internal parsing helpers 

110# --------------------------------------------------------------------------- 

111 

112 

113def _read_attr_scalar(obj, key: str, default=None): 

114 """Return a scalar attribute value, decoding bytes if needed.""" 

115 if key not in obj.attrs: 

116 return default 

117 val = np.asarray(obj.attrs[key]).flat[0] 

118 if isinstance(val, (bytes, np.bytes_)): 

119 val = val.decode() 

120 return val 

121 

122 

123def _parse_creation_time(f: h5py.File, filepath) -> str: 

124 """ 

125 Return ISO-8601 creation time string with timezone. 

126 

127 Tries ``AcquisitionLog/Log[0]['timestring']`` first (preferred, includes 

128 timezone), then falls back to the ``HDF5 File Creation Time`` root attribute 

129 (no timezone, treated as UTC), then falls back to file mtime. 

130 """ 

131 with contextlib.suppress(Exception): 

132 timestring = f["AcquisitionLog/Log"][0]["timestring"] 

133 if isinstance(timestring, (bytes, np.bytes_)): 

134 timestring = timestring.decode() 

135 dt = datetime.fromisoformat(timestring) 

136 return dt.isoformat() 

137 

138 with contextlib.suppress(Exception): 

139 raw = f.attrs.get("HDF5 File Creation Time", b"") 

140 if isinstance(raw, (bytes, np.bytes_)): 

141 raw = raw.decode() 

142 dt = datetime.strptime(raw, "%d.%m.%Y %H:%M:%S").replace(tzinfo=UTC) 

143 return dt.isoformat() 

144 

145 return _get_mtime_iso(filepath) 

146 

147 

148def _extract_fib_params(f: h5py.File, nx_meta: dict) -> None: 

149 """Extract FIB column parameters from FIBParams group attributes.""" 

150 try: 

151 fibparams = f["FIBParams"] 

152 except KeyError: 

153 return 

154 

155 fib_hw = _read_attr_scalar(fibparams, "FibHardware") 

156 if fib_hw is not None: 

157 add_to_extensions(nx_meta, "FIB Hardware", fib_hw) 

158 

159 voltage = _read_attr_scalar(fibparams, "Voltage") 

160 if voltage is not None: 

161 with contextlib.suppress(Exception): 

162 add_to_extensions( 

163 nx_meta, 

164 "accelerating_voltage", 

165 ureg.Quantity(float(voltage) / 1000.0, "kilovolt"), 

166 ) 

167 

168 current = _read_attr_scalar(fibparams, "Current") 

169 if current is not None: 

170 with contextlib.suppress(Exception): 

171 add_to_extensions( 

172 nx_meta, 

173 "beam_current", 

174 ureg.Quantity(float(current), "ampere"), 

175 ) 

176 

177 view_field = _read_attr_scalar(fibparams, "ViewField") 

178 if view_field is not None: 

179 with contextlib.suppress(Exception): 

180 # ViewField is stored in mm 

181 add_to_extensions( 

182 nx_meta, 

183 "field_of_view", 

184 ureg.Quantity(float(view_field), "millimeter"), 

185 ) 

186 

187 

188def _extract_spatial_dims(f: h5py.File, nx_meta: dict) -> None: 

189 """Extract spatial dimensions and derive pixel size.""" 

190 nwrites = _read_attr_scalar(f, "NbrWrites") 

191 nsegs = _read_attr_scalar(f, "NbrSegments") 

192 nbr_peaks = _read_attr_scalar(f, "NbrPeaks") 

193 

194 # Determine NX from EventList or PeakData shape 

195 nx = None 

196 if "FullSpectra/EventList" in f: 

197 with contextlib.suppress(Exception): 

198 nx = f["FullSpectra/EventList"].shape[2] 

199 if nx is None and "PeakData/PeakData" in f: 

200 with contextlib.suppress(Exception): 

201 nx = f["PeakData/PeakData"].shape[2] 

202 if nx is None: 

203 nx = nsegs # fallback: assume square scan 

204 

205 if nwrites is not None and nsegs is not None and nx is not None: 

206 add_to_extensions( 

207 nx_meta, 

208 "data_dimensions", 

209 f"({int(nwrites)}, {int(nsegs)}, {int(nx)})", 

210 ) 

211 nx_meta["Data Dimensions"] = f"({int(nwrites)}, {int(nsegs)}, {int(nx)})" 

212 

213 if nbr_peaks is not None: 

214 add_to_extensions(nx_meta, "Number of Peaks", int(nbr_peaks)) 

215 

216 # Pixel size from FIBParams.ViewField (mm) / nx 

217 with contextlib.suppress(Exception): 

218 view_field_mm = float(np.asarray(f["FIBParams"].attrs["ViewField"]).flat[0]) 

219 if nx is not None and nx > 0: 

220 pixel_size_um = (view_field_mm * 1e3) / int(nx) 

221 add_to_extensions( 

222 nx_meta, 

223 "Pixel Size", 

224 ureg.Quantity(pixel_size_um, "micrometer"), 

225 ) 

226 

227 

228def _extract_spectral_params(f: h5py.File, nx_meta: dict) -> None: 

229 """Extract spectral (mass axis) parameters.""" 

230 with contextlib.suppress(Exception): 

231 mass_axis = f["FullSpectra/MassAxis"][:] 

232 add_to_extensions( 

233 nx_meta, 

234 "Mass Range Minimum", 

235 ureg.Quantity(float(mass_axis.min()), "dalton"), 

236 ) 

237 add_to_extensions( 

238 nx_meta, 

239 "Mass Range Maximum", 

240 ureg.Quantity(float(mass_axis.max()), "dalton"), 

241 ) 

242 

243 

244def _extract_acquisition_params(f: h5py.File, nx_meta: dict) -> None: 

245 """Extract acquisition-wide parameters from root attributes and FibParams.""" 

246 ion_mode = _read_attr_scalar(f, "IonMode") 

247 if ion_mode is not None: 

248 add_to_extensions(nx_meta, "Ion Mode", ion_mode) 

249 

250 gui_version = _read_attr_scalar(f, "FiblysGUIVersion") 

251 if gui_version is not None: 

252 add_to_extensions(nx_meta, "FibLys GUI Version", gui_version) 

253 

254 daq_version = _read_attr_scalar(f, "TofDAQ Version") 

255 if daq_version is not None: 

256 add_to_extensions(nx_meta, "TofDAQ Version", str(daq_version)) 

257 

258 # Chamber pressure — mean over all writes 

259 with contextlib.suppress(Exception): 

260 pressure_data = f["FibParams/FibPressure/TwData"][:] 

261 add_to_extensions( 

262 nx_meta, 

263 "Chamber Pressure", 

264 ureg.Quantity(float(pressure_data.mean()), "pascal"), 

265 )