Source code for nexusLIMS.extractors.plugins.basic_metadata

"""Basic metadata extractor plugin (fallback for unknown file types)."""

import logging
from datetime import datetime as dt
from typing import Any, ClassVar

from nexusLIMS.extractors.base import ExtractionContext
from nexusLIMS.instruments import get_instr_from_filepath
from nexusLIMS.utils.time import current_system_tz

_logger = logging.getLogger(__name__)


[docs] class BasicFileInfoExtractor: """ Fallback extractor for files without a specific format handler. This extractor provides basic metadata (creation time, file size, etc.) for files that don't have a specialized extractor. It has the lowest priority and will only be used if no other extractor supports the file. """ name = "basic_file_info_extractor" priority = 0 # Lowest priority - only used as fallback supported_extensions: ClassVar = None # Wildcard extractor - supports all files
[docs] def supports(self, context: ExtractionContext) -> bool: # noqa: ARG002 """ Check if this extractor supports the given file. This extractor always returns True since it's the fallback for all files. Parameters ---------- context The extraction context containing file information Returns ------- bool Always True (this is the fallback extractor) """ return True
[docs] def extract(self, context: ExtractionContext) -> list[dict[str, Any]]: """ Extract basic metadata from any file. Provides minimal metadata such as modification time and instrument ID for files that don't have a specialized extractor. Parameters ---------- context The extraction context containing file information Returns ------- list[dict] List containing a single metadata dict with 'nx_meta' key """ _logger.debug( "Extracting basic metadata from file (no specialized extractor): %s", context.file_path, ) mdict = {"nx_meta": {}} mdict["nx_meta"]["DatasetType"] = "Unknown" mdict["nx_meta"]["Data Type"] = "Unknown" # get the modification time (as ISO format): mtime = context.file_path.stat().st_mtime # Use instrument timezone if available, otherwise fall back to system timezone tz = context.instrument.timezone if context.instrument else current_system_tz() mtime_iso = dt.fromtimestamp(mtime, tz=tz).isoformat() mdict["nx_meta"]["Creation Time"] = mtime_iso return [mdict]
# Backward compatibility function for tests
[docs] def get_basic_metadata(filename): """ Get basic metadata from a file. Returns basic metadata from a file that's not currently interpretable by NexusLIMS. .. deprecated:: This function is deprecated. Use BasicFileInfoExtractor class instead. Parameters ---------- filename : pathlib.Path path to a file saved in the harvested directory of the instrument Returns ------- mdict : dict A description of the file in lieu of any metadata extracted from it. """ context = ExtractionContext( file_path=filename, instrument=get_instr_from_filepath(filename) ) extractor = BasicFileInfoExtractor() return extractor.extract(context)