Coverage for nexusLIMS/extractors/plugins/tofwerk_pfib.py: 100%
123 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""Tofwerk fibTOF pFIB-ToF-SIMS HDF5 extractor plugin."""
3from __future__ import annotations
5import contextlib
6import logging
7from datetime import UTC, datetime
8from typing import TYPE_CHECKING, Any, ClassVar
10import h5py
11import numpy as np
13from nexusLIMS.extractors.utils import _get_mtime_iso, add_to_extensions
14from nexusLIMS.instruments import get_instr_from_filepath
15from nexusLIMS.schemas.units import ureg
17if TYPE_CHECKING:
18 from nexusLIMS.extractors.base import ExtractionContext
20_logger = logging.getLogger(__name__)
23class TofwerkPfibExtractor:
24 """
25 Extractor for Tofwerk fibTOF pFIB-ToF-SIMS HDF5 files.
27 Handles both raw files (no ``PeakData/PeakData``) and opened/processed files
28 (has ``PeakData/PeakData`` with integrated peak intensities). Performs content
29 sniffing to confirm the file is a Tofwerk fibTOF FIB-SIMS acquisition before
30 attempting extraction.
31 """
33 name = "tofwerk_pfib_extractor"
34 priority = 150
35 supported_extensions: ClassVar = {"h5"}
37 def supports(self, context: ExtractionContext) -> bool:
38 """
39 Check if this extractor supports the given file.
41 Performs content sniffing to verify this is a Tofwerk fibTOF FIB-SIMS HDF5
42 file by checking for the presence of ``FullSpectra/SumSpectrum``,
43 ``FIBParams``, ``FIBImages``, and the ``TofDAQ Version`` root attribute.
45 Parameters
46 ----------
47 context
48 The extraction context containing file information
50 Returns
51 -------
52 bool
53 True if this appears to be a Tofwerk fibTOF FIB-SIMS HDF5 file
54 """
55 if context.file_path.suffix.lower() != ".h5":
56 return False
57 try:
58 with h5py.File(context.file_path, "r") as f:
59 return (
60 "FullSpectra/SumSpectrum" in f
61 and "FIBParams" in f
62 and "FIBImages" in f
63 and "TofDAQ Version" in f.attrs
64 )
65 except Exception:
66 return False
68 def extract(self, context: ExtractionContext) -> list[dict[str, Any]]:
69 """
70 Extract metadata from a Tofwerk fibTOF pFIB-ToF-SIMS HDF5 file.
72 Parameters
73 ----------
74 context
75 The extraction context containing file information
77 Returns
78 -------
79 list[dict]
80 List containing one metadata dict with ``nx_meta`` key
81 """
82 nx_meta: dict[str, Any] = {
83 "DatasetType": "SpectrumImage",
84 "Data Type": "PFIB_TOFSIMS",
85 }
86 try:
87 with h5py.File(context.file_path, "r") as f:
88 nx_meta["Creation Time"] = _parse_creation_time(f, context.file_path)
89 instr = get_instr_from_filepath(context.file_path)
90 nx_meta["Instrument ID"] = instr.name if instr is not None else None
92 _extract_fib_params(f, nx_meta)
93 _extract_spatial_dims(f, nx_meta)
94 _extract_spectral_params(f, nx_meta)
95 _extract_acquisition_params(f, nx_meta)
97 variant = "pre-processed" if "PeakData/PeakData" in f else "raw"
98 add_to_extensions(nx_meta, "File Variant", variant)
100 except Exception:
101 _logger.exception("Failed to extract metadata from %s", context.file_path)
102 if "Creation Time" not in nx_meta:
103 nx_meta["Creation Time"] = _get_mtime_iso(context.file_path)
105 return [{"nx_meta": nx_meta}]
108# ---------------------------------------------------------------------------
109# Internal parsing helpers
110# ---------------------------------------------------------------------------
113def _read_attr_scalar(obj, key: str, default=None):
114 """Return a scalar attribute value, decoding bytes if needed."""
115 if key not in obj.attrs:
116 return default
117 val = np.asarray(obj.attrs[key]).flat[0]
118 if isinstance(val, (bytes, np.bytes_)):
119 val = val.decode()
120 return val
123def _parse_creation_time(f: h5py.File, filepath) -> str:
124 """
125 Return ISO-8601 creation time string with timezone.
127 Tries ``AcquisitionLog/Log[0]['timestring']`` first (preferred, includes
128 timezone), then falls back to the ``HDF5 File Creation Time`` root attribute
129 (no timezone, treated as UTC), then falls back to file mtime.
130 """
131 with contextlib.suppress(Exception):
132 timestring = f["AcquisitionLog/Log"][0]["timestring"]
133 if isinstance(timestring, (bytes, np.bytes_)):
134 timestring = timestring.decode()
135 dt = datetime.fromisoformat(timestring)
136 return dt.isoformat()
138 with contextlib.suppress(Exception):
139 raw = f.attrs.get("HDF5 File Creation Time", b"")
140 if isinstance(raw, (bytes, np.bytes_)):
141 raw = raw.decode()
142 dt = datetime.strptime(raw, "%d.%m.%Y %H:%M:%S").replace(tzinfo=UTC)
143 return dt.isoformat()
145 return _get_mtime_iso(filepath)
148def _extract_fib_params(f: h5py.File, nx_meta: dict) -> None:
149 """Extract FIB column parameters from FIBParams group attributes."""
150 try:
151 fibparams = f["FIBParams"]
152 except KeyError:
153 return
155 fib_hw = _read_attr_scalar(fibparams, "FibHardware")
156 if fib_hw is not None:
157 add_to_extensions(nx_meta, "FIB Hardware", fib_hw)
159 voltage = _read_attr_scalar(fibparams, "Voltage")
160 if voltage is not None:
161 with contextlib.suppress(Exception):
162 add_to_extensions(
163 nx_meta,
164 "accelerating_voltage",
165 ureg.Quantity(float(voltage) / 1000.0, "kilovolt"),
166 )
168 current = _read_attr_scalar(fibparams, "Current")
169 if current is not None:
170 with contextlib.suppress(Exception):
171 add_to_extensions(
172 nx_meta,
173 "beam_current",
174 ureg.Quantity(float(current), "ampere"),
175 )
177 view_field = _read_attr_scalar(fibparams, "ViewField")
178 if view_field is not None:
179 with contextlib.suppress(Exception):
180 # ViewField is stored in mm
181 add_to_extensions(
182 nx_meta,
183 "field_of_view",
184 ureg.Quantity(float(view_field), "millimeter"),
185 )
188def _extract_spatial_dims(f: h5py.File, nx_meta: dict) -> None:
189 """Extract spatial dimensions and derive pixel size."""
190 nwrites = _read_attr_scalar(f, "NbrWrites")
191 nsegs = _read_attr_scalar(f, "NbrSegments")
192 nbr_peaks = _read_attr_scalar(f, "NbrPeaks")
194 # Determine NX from EventList or PeakData shape
195 nx = None
196 if "FullSpectra/EventList" in f:
197 with contextlib.suppress(Exception):
198 nx = f["FullSpectra/EventList"].shape[2]
199 if nx is None and "PeakData/PeakData" in f:
200 with contextlib.suppress(Exception):
201 nx = f["PeakData/PeakData"].shape[2]
202 if nx is None:
203 nx = nsegs # fallback: assume square scan
205 if nwrites is not None and nsegs is not None and nx is not None:
206 add_to_extensions(
207 nx_meta,
208 "data_dimensions",
209 f"({int(nwrites)}, {int(nsegs)}, {int(nx)})",
210 )
211 nx_meta["Data Dimensions"] = f"({int(nwrites)}, {int(nsegs)}, {int(nx)})"
213 if nbr_peaks is not None:
214 add_to_extensions(nx_meta, "Number of Peaks", int(nbr_peaks))
216 # Pixel size from FIBParams.ViewField (mm) / nx
217 with contextlib.suppress(Exception):
218 view_field_mm = float(np.asarray(f["FIBParams"].attrs["ViewField"]).flat[0])
219 if nx is not None and nx > 0:
220 pixel_size_um = (view_field_mm * 1e3) / int(nx)
221 add_to_extensions(
222 nx_meta,
223 "Pixel Size",
224 ureg.Quantity(pixel_size_um, "micrometer"),
225 )
228def _extract_spectral_params(f: h5py.File, nx_meta: dict) -> None:
229 """Extract spectral (mass axis) parameters."""
230 with contextlib.suppress(Exception):
231 mass_axis = f["FullSpectra/MassAxis"][:]
232 add_to_extensions(
233 nx_meta,
234 "Mass Range Minimum",
235 ureg.Quantity(float(mass_axis.min()), "dalton"),
236 )
237 add_to_extensions(
238 nx_meta,
239 "Mass Range Maximum",
240 ureg.Quantity(float(mass_axis.max()), "dalton"),
241 )
244def _extract_acquisition_params(f: h5py.File, nx_meta: dict) -> None:
245 """Extract acquisition-wide parameters from root attributes and FibParams."""
246 ion_mode = _read_attr_scalar(f, "IonMode")
247 if ion_mode is not None:
248 add_to_extensions(nx_meta, "Ion Mode", ion_mode)
250 gui_version = _read_attr_scalar(f, "FiblysGUIVersion")
251 if gui_version is not None:
252 add_to_extensions(nx_meta, "FibLys GUI Version", gui_version)
254 daq_version = _read_attr_scalar(f, "TofDAQ Version")
255 if daq_version is not None:
256 add_to_extensions(nx_meta, "TofDAQ Version", str(daq_version))
258 # Chamber pressure — mean over all writes
259 with contextlib.suppress(Exception):
260 pressure_data = f["FibParams/FibPressure/TwData"][:]
261 add_to_extensions(
262 nx_meta,
263 "Chamber Pressure",
264 ureg.Quantity(float(pressure_data.mean()), "pascal"),
265 )