Coverage for nexusLIMS/extractors/registry.py: 100%

242 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1"""Extractor registry for plugin discovery and selection. 

2 

3This module provides the central registry that discovers, manages, and selects 

4extractors based on file type and context. It implements auto-discovery by 

5walking the plugins directory and uses priority-based selection. 

6""" 

7 

8from __future__ import annotations 

9 

10import importlib 

11import inspect 

12import logging 

13import pkgutil 

14from collections import defaultdict 

15from pathlib import Path 

16from typing import TYPE_CHECKING, Any 

17 

18from nexusLIMS.extractors.plugins.basic_metadata import BasicFileInfoExtractor 

19from nexusLIMS.extractors.plugins.profiles import register_all_profiles 

20 

21if TYPE_CHECKING: 

22 from nexusLIMS.extractors.base import ( 

23 BaseExtractor, 

24 ExtractionContext, 

25 PreviewGenerator, 

26 ) 

27 

28_logger = logging.getLogger(__name__) 

29 

30__all__ = [ 

31 "ExtractorRegistry", 

32 "get_registry", 

33] 

34 

35 

36class ExtractorRegistry: 

37 """ 

38 Central registry for extractor plugins. 

39 

40 Manages auto-discovery, registration, and selection of metadata extractors. 

41 Uses priority-based selection with content sniffing support. 

42 

43 This is a singleton - use :func:`get_registry` to access. 

44 

45 Features 

46 -------- 

47 - Auto-discovers plugins by walking nexusLIMS/extractors/plugins/ 

48 - Maintains priority-sorted lists per extension 

49 - Lazy instantiation for performance 

50 - Caches extractor instances 

51 - Never returns None (always has fallback extractor) 

52 

53 Examples 

54 -------- 

55 Get an extractor for a file: 

56 

57 >>> from nexusLIMS.extractors.registry import get_registry 

58 >>> from nexusLIMS.extractors.base import ExtractionContext 

59 >>> from pathlib import Path 

60 >>> 

61 >>> registry = get_registry() 

62 >>> context = ExtractionContext(Path("data.dm3"), instrument=None) 

63 >>> extractor = registry.get_extractor(context) 

64 >>> metadata = extractor.extract(context) 

65 

66 Manual registration (for testing): 

67 

68 >>> class MyExtractor: 

69 ... name = "my_extractor" 

70 ... priority = 100 

71 ... def supports(self, context): return True 

72 ... def extract(self, context): return {"nx_meta": {}} 

73 >>> 

74 >>> registry = get_registry() 

75 >>> registry.register_extractor(MyExtractor) 

76 """ 

77 

78 def __init__(self): 

79 """Initialize the extractor registry.""" 

80 # Maps extension -> list of extractor classes (sorted by priority) 

81 self._extractors: dict[str, list[type[BaseExtractor]]] = defaultdict(list) 

82 

83 # Cache of instantiated extractors (name -> instance) 

84 self._instances: dict[str, BaseExtractor] = {} 

85 

86 # Wildcard extractors that support any extension 

87 self._wildcard_extractors: list[type[BaseExtractor]] = [] 

88 

89 # Preview generators (maps extension -> list of generator classes) 

90 self._preview_generators: dict[str, list[type[PreviewGenerator]]] = defaultdict( 

91 list 

92 ) 

93 

94 # Cache of instantiated preview generators (name -> instance) 

95 self._preview_instances: dict[str, PreviewGenerator] = {} 

96 

97 # Discovery state 

98 self._discovered = False 

99 

100 _logger.debug("Initialized ExtractorRegistry") 

101 

102 @property 

103 def extractors(self) -> dict[str, list[type[BaseExtractor]]]: 

104 """ 

105 Get the extractor list. 

106 

107 Returns a dictionary mapping file extensions to lists of extractor classes, 

108 sorted by priority (descending). 

109 

110 Auto-discovers plugins if not already discovered. 

111 

112 Returns 

113 ------- 

114 dict[str, list[type[BaseExtractor]]] 

115 Maps extension (without dot) to list of extractor classes 

116 

117 Examples 

118 -------- 

119 >>> registry = get_registry() 

120 >>> extractors_by_ext = registry.extractors 

121 >>> print(extractors_by_ext.get("dm3", [])) 

122 """ 

123 if not self._discovered: 

124 self.discover_plugins() 

125 return dict(self._extractors) 

126 

127 @property 

128 def extractor_names(self) -> list[str]: 

129 """ 

130 Get a deduplicated list of extractor names. 

131 

132 Returns extractor names sorted alphabetically, with duplicates removed. 

133 

134 Auto-discovers plugins if not already discovered. 

135 

136 Returns 

137 ------- 

138 list[str] 

139 Sorted list of unique extractor names 

140 

141 Examples 

142 -------- 

143 >>> registry = get_registry() 

144 >>> names = registry.extractor_names 

145 >>> print(names) 

146 ['BasicFileInfoExtractor', 'DM3Extractor', 'QuantaTiffExtractor', ...] 

147 """ 

148 if not self._discovered: 

149 self.discover_plugins() 

150 

151 # Collect all extractor names 

152 extractor_names_set = set() 

153 for extractor_classes in self._extractors.values(): 

154 for extractor_class in extractor_classes: 

155 extractor_names_set.add(extractor_class.__name__) 

156 

157 # Also add wildcard extractors 

158 for extractor_class in self._wildcard_extractors: 

159 extractor_names_set.add(extractor_class.__name__) 

160 

161 return sorted(extractor_names_set) 

162 

163 @property 

164 def all_extractors(self) -> list[BaseExtractor]: 

165 """ 

166 Get a deduplicated flat list of all registered extractor instances. 

167 

168 Returns one instance per unique extractor class (both extension-specific 

169 and wildcard extractors), sorted by priority descending. 

170 

171 Auto-discovers plugins if not already discovered. 

172 

173 Returns 

174 ------- 

175 list[BaseExtractor] 

176 Unique extractor instances sorted by priority (descending) 

177 

178 Examples 

179 -------- 

180 >>> registry = get_registry() 

181 >>> for ext in registry.all_extractors: 

182 ... print(f"{ext.name}: priority {ext.priority}") 

183 """ 

184 if not self._discovered: 

185 self.discover_plugins() 

186 

187 seen: set[type] = set() 

188 unique_classes: list[type] = [] 

189 for extractor_classes in self._extractors.values(): 

190 for cls in extractor_classes: 

191 if cls not in seen: 

192 seen.add(cls) 

193 unique_classes.append(cls) 

194 for cls in self._wildcard_extractors: 

195 if cls not in seen: 

196 seen.add(cls) 

197 unique_classes.append(cls) 

198 

199 instances = [self._get_instance(cls) for cls in unique_classes] 

200 return sorted(instances, key=lambda e: e.priority, reverse=True) 

201 

202 def discover_plugins(self) -> None: 

203 """ 

204 Auto-discover extractor plugins by walking the plugins directory. 

205 

206 Walks nexusLIMS/extractors/plugins/, imports all Python modules, 

207 and registers any classes that implement the BaseExtractor protocol. 

208 

209 This is called automatically on first use, but can be called manually 

210 to force re-discovery. 

211 

212 Examples 

213 -------- 

214 >>> registry = get_registry() 

215 >>> registry.discover_plugins() 

216 >>> extractors = registry.get_extractors_for_extension("dm3") 

217 >>> print(f"Found {len(extractors)} extractors for .dm3 files") 

218 """ 

219 if self._discovered: 

220 _logger.debug("Plugins already discovered, skipping") 

221 return 

222 

223 _logger.info("Discovering extractor plugins...") 

224 

225 # Find the plugins directory 

226 plugins_package = "nexusLIMS.extractors.plugins" 

227 

228 try: 

229 # Import the plugins package to get its path 

230 plugins_module = importlib.import_module(plugins_package) 

231 plugins_path = Path(plugins_module.__file__).parent 

232 except (ImportError, AttributeError) as e: 

233 _logger.warning( 

234 "Could not import plugins package '%s': %s. Plugin discovery skipped.", 

235 plugins_package, 

236 e, 

237 ) 

238 self._discovered = True 

239 return 

240 

241 # Walk the plugins directory 

242 discovered_count = 0 

243 for _finder, name, _ispkg in pkgutil.walk_packages( 

244 [str(plugins_path)], 

245 prefix=f"{plugins_package}.", 

246 ): 

247 # Skip __pycache__ and other special directories 

248 if "__pycache__" in name: 

249 continue # pragma: no cover 

250 

251 try: 

252 module = importlib.import_module(name) 

253 _logger.debug("Imported plugin module: %s", name) 

254 

255 # Look for classes implementing BaseExtractor/PreviewGenerator protocol 

256 for _item_name, obj in inspect.getmembers(module, inspect.isclass): 

257 # Skip imported classes (only use classes defined in this module) 

258 if obj.__module__ != module.__name__: 

259 continue 

260 

261 # Check if it looks like a BaseExtractor 

262 if self._is_extractor(obj): 

263 self.register_extractor(obj) 

264 discovered_count += 1 

265 _logger.debug( 

266 "Discovered extractor: %s (priority: %d)", 

267 obj.name, 

268 obj.priority, 

269 ) 

270 # Check if it looks like a PreviewGenerator 

271 elif self._is_preview_generator(obj): 

272 self.register_preview_generator(obj) 

273 discovered_count += 1 

274 _logger.debug( 

275 "Discovered preview generator: %s (priority: %d)", 

276 obj.name, 

277 obj.priority, 

278 ) 

279 

280 except Exception as e: 

281 _logger.warning( 

282 "Failed to import plugin module '%s': %s", 

283 name, 

284 e, 

285 exc_info=True, 

286 ) 

287 

288 _logger.info("Discovered %d extractor plugins", discovered_count) 

289 

290 # Register instrument profiles 

291 self._register_instrument_profiles() 

292 

293 self._discovered = True 

294 

295 def _register_instrument_profiles(self) -> None: 

296 """ 

297 Register all instrument profiles. 

298 

299 This calls the profile package's auto-discovery function to load 

300 and register all instrument-specific profiles. 

301 """ 

302 try: 

303 register_all_profiles() 

304 except ImportError as e: 

305 _logger.warning( 

306 "Could not import profiles package: %s. No profiles will be loaded.", 

307 e, 

308 ) 

309 except Exception as e: 

310 _logger.warning( 

311 "Error registering instrument profiles: %s", 

312 e, 

313 exc_info=True, 

314 ) 

315 

316 def _is_extractor(self, obj: Any) -> bool: 

317 """ 

318 Check if an object implements the BaseExtractor protocol. 

319 

320 Parameters 

321 ---------- 

322 obj 

323 Object to check 

324 

325 Returns 

326 ------- 

327 bool 

328 True if obj implements BaseExtractor protocol 

329 """ 

330 # Must be a class 

331 if not inspect.isclass(obj): 

332 return False 

333 

334 # Check for required attributes 

335 if not hasattr(obj, "name") or not isinstance(obj.name, str): 

336 return False 

337 

338 if not hasattr(obj, "priority") or not isinstance(obj.priority, int): 

339 return False 

340 

341 # Check for required methods 

342 if not hasattr(obj, "supports") or not callable(obj.supports): 

343 return False 

344 

345 if not hasattr(obj, "extract") or not callable(obj.extract): # noqa: SIM103 

346 return False 

347 

348 return True 

349 

350 def _is_preview_generator(self, obj: Any) -> bool: 

351 """ 

352 Check if an object implements the PreviewGenerator protocol. 

353 

354 Parameters 

355 ---------- 

356 obj 

357 Object to check 

358 

359 Returns 

360 ------- 

361 bool 

362 True if obj implements PreviewGenerator protocol 

363 """ 

364 # Must be a class 

365 if not inspect.isclass(obj): 

366 return False 

367 

368 # Check for required attributes 

369 if not hasattr(obj, "name") or not isinstance(obj.name, str): 

370 return False 

371 

372 if not hasattr(obj, "priority") or not isinstance(obj.priority, int): 

373 return False 

374 

375 # Check for required methods 

376 if not hasattr(obj, "supports") or not callable(obj.supports): 

377 return False 

378 

379 if not hasattr(obj, "generate") or not callable(obj.generate): # noqa: SIM103 

380 return False 

381 

382 return True 

383 

384 def register_extractor(self, extractor_class: type[BaseExtractor]) -> None: 

385 """ 

386 Manually register an extractor class. 

387 

388 This method is called automatically during plugin discovery, but can 

389 also be used to manually register extractors (useful for testing). 

390 

391 Parameters 

392 ---------- 

393 extractor_class 

394 The extractor class to register (not an instance) 

395 

396 Examples 

397 -------- 

398 >>> class MyExtractor: 

399 ... name = "my_extractor" 

400 ... priority = 100 

401 ... def supports(self, context): return True 

402 ... def extract(self, context): return {"nx_meta": {}} 

403 >>> 

404 >>> registry = get_registry() 

405 >>> registry.register_extractor(MyExtractor) 

406 """ 

407 # Determine which extensions this extractor supports 

408 # We'll do this by creating a temporary instance and asking it 

409 extensions = self._get_supported_extensions(extractor_class) 

410 

411 if not extensions: 

412 # This is a wildcard extractor (supports any extension) 

413 if extractor_class not in self._wildcard_extractors: 

414 self._wildcard_extractors.append(extractor_class) 

415 _logger.debug( 

416 "Registered wildcard extractor: %s", 

417 extractor_class.name, 

418 ) 

419 else: 

420 _logger.debug( 

421 "Extractor %s already registered (skipping duplicate)", 

422 extractor_class.name, 

423 ) 

424 else: 

425 # Register for specific extensions 

426 for ext in extensions: 

427 if extractor_class not in self._extractors[ext]: 

428 self._extractors[ext].append(extractor_class) 

429 _logger.debug( 

430 "Registered %s for extension: .%s", 

431 extractor_class.name, 

432 ext, 

433 ) 

434 else: 

435 _logger.debug( 

436 "Extractor %s already registered for .%s (skipping duplicate)", 

437 extractor_class.name, 

438 ext, 

439 ) 

440 

441 # Sort by priority (descending) for each extension 

442 for ext in extensions: 

443 self._extractors[ext].sort(key=lambda e: e.priority, reverse=True) 

444 

445 def _get_supported_extensions( 

446 self, 

447 extractor_class: type[BaseExtractor], 

448 ) -> set[str]: 

449 """ 

450 Get supported file extensions from an extractor class. 

451 

452 Uses the extractor's declared supported_extensions attribute. 

453 

454 Parameters 

455 ---------- 

456 extractor_class 

457 The extractor class to check 

458 

459 Returns 

460 ------- 

461 set[str] 

462 Set of supported extensions (without dots), or empty set if 

463 this is a wildcard extractor 

464 """ 

465 if not hasattr(extractor_class, "supported_extensions"): 

466 _logger.warning( 

467 "Extractor %s does not have supported_extensions attribute", 

468 extractor_class.name if hasattr(extractor_class, "name") else "unknown", 

469 ) 

470 return set() 

471 

472 extensions = extractor_class.supported_extensions 

473 if extensions is None: 

474 # Wildcard extractor 

475 return set() 

476 

477 # Return the declared extensions 

478 return extensions if isinstance(extensions, set) else set(extensions) 

479 

480 def _get_instance(self, extractor_class: type[BaseExtractor]) -> BaseExtractor: 

481 """ 

482 Get or create an instance of an extractor class. 

483 

484 Instances are cached for performance. 

485 

486 Parameters 

487 ---------- 

488 extractor_class 

489 The extractor class 

490 

491 Returns 

492 ------- 

493 BaseExtractor 

494 Instance of the extractor 

495 """ 

496 name = extractor_class.name 

497 if name not in self._instances: 

498 self._instances[name] = extractor_class() 

499 _logger.debug("Instantiated extractor: %s", name) 

500 

501 return self._instances[name] 

502 

503 def get_extractor(self, context: ExtractionContext) -> BaseExtractor: 

504 """ 

505 Get the best extractor for a given file context. 

506 

507 Selection algorithm: 

508 1. Auto-discover plugins if not already done 

509 2. Get extractors registered for this file's extension 

510 3. Try each in priority order (high to low) until one's supports() returns True 

511 4. If none match, try wildcard extractors 

512 5. If still none, return BasicMetadataExtractor fallback 

513 

514 This method NEVER returns None - there is always a fallback. 

515 

516 Parameters 

517 ---------- 

518 context 

519 Extraction context containing file path, instrument, etc. 

520 

521 Returns 

522 ------- 

523 BaseExtractor 

524 The best extractor for this file (never None) 

525 

526 Examples 

527 -------- 

528 >>> from nexusLIMS.extractors.base import ExtractionContext 

529 >>> from pathlib import Path 

530 >>> 

531 >>> context = ExtractionContext(Path("data.dm3"), None) 

532 >>> registry = get_registry() 

533 >>> extractor = registry.get_extractor(context) 

534 >>> print(f"Selected: {extractor.name}") 

535 """ 

536 # Auto-discover if needed 

537 if not self._discovered: 

538 self.discover_plugins() 

539 

540 # Get file extension 

541 ext = context.file_path.suffix.lstrip(".").lower() 

542 

543 # Try extension-specific extractors 

544 if ext in self._extractors: 

545 for extractor_class in self._extractors[ext]: 

546 instance = self._get_instance(extractor_class) 

547 try: 

548 if instance.supports(context): 

549 _logger.debug( 

550 "Selected extractor %s for %s", 

551 instance.name, 

552 context.file_path.name, 

553 ) 

554 return instance 

555 except Exception as e: 

556 _logger.warning( 

557 "Error in %s.supports(): %s", 

558 instance.name, 

559 e, 

560 exc_info=True, 

561 ) 

562 

563 # Try wildcard extractors 

564 for extractor_class in self._wildcard_extractors: 

565 instance = self._get_instance(extractor_class) 

566 try: 

567 if instance.supports(context): 

568 _logger.debug( 

569 "Selected wildcard extractor %s for %s", 

570 instance.name, 

571 context.file_path.name, 

572 ) 

573 return instance 

574 except Exception as e: 

575 _logger.warning( 

576 "Error in wildcard %s.supports(): %s", 

577 instance.name, 

578 e, 

579 exc_info=True, 

580 ) 

581 

582 # Fallback: use basic metadata extractor 

583 _logger.debug( 

584 "No extractor found for %s, using fallback", 

585 context.file_path.name, 

586 ) 

587 return self._get_fallback_extractor() 

588 

589 def _get_fallback_extractor(self) -> BaseExtractor: 

590 """ 

591 Get the fallback extractor for unknown file types. 

592 

593 Returns 

594 ------- 

595 BaseExtractor 

596 BasicFileInfoExtractor instance 

597 """ 

598 return self._get_instance(BasicFileInfoExtractor) 

599 

600 def get_extractors_for_extension(self, extension: str) -> list[BaseExtractor]: 

601 """ 

602 Get all extractors registered for a specific extension. 

603 

604 Parameters 

605 ---------- 

606 extension 

607 File extension (with or without leading dot) 

608 

609 Returns 

610 ------- 

611 list[BaseExtractor] 

612 List of extractors, sorted by priority (descending) 

613 

614 Examples 

615 -------- 

616 >>> registry = get_registry() 

617 >>> extractors = registry.get_extractors_for_extension("dm3") 

618 >>> for e in extractors: 

619 ... print(f"{e.name}: priority {e.priority}") 

620 """ 

621 # Auto-discover if needed 

622 if not self._discovered: 

623 self.discover_plugins() 

624 

625 ext = extension.lstrip(".").lower() 

626 if ext not in self._extractors: 

627 return [] 

628 

629 return [ 

630 self._get_instance(extractor_class) 

631 for extractor_class in self._extractors[ext] 

632 ] 

633 

634 def get_supported_extensions(self, exclude_fallback: bool = False) -> set[str]: # noqa: FBT001, FBT002 

635 """ 

636 Get all file extensions that have registered extractors. 

637 

638 Parameters 

639 ---------- 

640 exclude_fallback 

641 If True, exclude extensions that only have the fallback extractor 

642 

643 Returns 

644 ------- 

645 set[str] 

646 Set of extensions (without dots) 

647 

648 Examples 

649 -------- 

650 >>> registry = get_registry() 

651 >>> extensions = registry.get_supported_extensions() 

652 >>> print(f"Supported: {', '.join(sorted(extensions))}") 

653 >>> specialized = registry.get_supported_extensions(exclude_fallback=True) 

654 >>> print(f"Specialized: {', '.join(sorted(specialized))}") 

655 """ 

656 # Auto-discover if needed 

657 if not self._discovered: 

658 self.discover_plugins() 

659 

660 if not exclude_fallback: 

661 return set(self._extractors.keys()) 

662 

663 # Only return extensions that have non-fallback extractors 

664 specialized_extensions = set() 

665 for ext, extractors in self._extractors.items(): 

666 # Check if any extractor for this extension is NOT the fallback 

667 for extractor_class in extractors: 

668 instance = self._get_instance(extractor_class) 

669 # Basic file info extractor has priority 0 and is the fallback 

670 if instance.priority > 0: 

671 specialized_extensions.add(ext) 

672 break 

673 

674 return specialized_extensions 

675 

676 def clear(self) -> None: 

677 """ 

678 Clear all registered extractors and reset discovery state. 

679 

680 Primarily used for testing. 

681 

682 Examples 

683 -------- 

684 >>> registry = get_registry() 

685 >>> registry.clear() 

686 >>> # Will re-discover on next use 

687 """ 

688 self._extractors.clear() 

689 self._instances.clear() 

690 self._wildcard_extractors.clear() 

691 self._preview_generators.clear() 

692 self._preview_instances.clear() 

693 self._discovered = False 

694 _logger.debug("Cleared extractor registry") 

695 

696 def register_preview_generator( 

697 self, 

698 generator_class: type[PreviewGenerator], 

699 ) -> None: 

700 """ 

701 Manually register a preview generator class. 

702 

703 This method is called automatically during plugin discovery, but can 

704 also be used to manually register generators (useful for testing). 

705 

706 Parameters 

707 ---------- 

708 generator_class 

709 The preview generator class to register (not an instance) 

710 

711 Examples 

712 -------- 

713 >>> class MyGenerator: 

714 ... name = "my_generator" 

715 ... priority = 100 

716 ... def supports(self, context): return True 

717 ... def generate(self, context, output_path): return True 

718 >>> 

719 >>> registry = get_registry() 

720 >>> registry.register_preview_generator(MyGenerator) 

721 """ 

722 # Determine which extensions this generator supports 

723 extensions = self._get_supported_extensions_for_generator(generator_class) 

724 

725 if extensions: 

726 # Register for specific extensions 

727 for ext in extensions: 

728 self._preview_generators[ext].append(generator_class) 

729 _logger.debug( 

730 "Registered preview generator %s for extension: .%s", 

731 generator_class.name, 

732 ext, 

733 ) 

734 

735 # Sort by priority (descending) for each extension 

736 for ext in extensions: 

737 self._preview_generators[ext].sort( 

738 key=lambda g: g.priority, 

739 reverse=True, 

740 ) 

741 

742 def _get_supported_extensions_for_generator( 

743 self, 

744 generator_class: type[PreviewGenerator], 

745 ) -> set[str]: 

746 """ 

747 Get supported file extensions from a preview generator class. 

748 

749 Uses the generator's declared supported_extensions attribute. 

750 

751 Parameters 

752 ---------- 

753 generator_class 

754 The preview generator class to check 

755 

756 Returns 

757 ------- 

758 set[str] 

759 Set of supported extensions (without dots) 

760 """ 

761 if not hasattr(generator_class, "supported_extensions"): 

762 _logger.warning( 

763 "Preview generator %s does not have supported_extensions attribute", 

764 generator_class.name if hasattr(generator_class, "name") else "unknown", 

765 ) 

766 return set() 

767 

768 extensions = generator_class.supported_extensions 

769 if extensions is None: 

770 # Wildcard generator 

771 return set() 

772 

773 # Return the declared extensions 

774 return extensions if isinstance(extensions, set) else set(extensions) 

775 

776 def _get_preview_instance( 

777 self, 

778 generator_class: type[PreviewGenerator], 

779 ) -> PreviewGenerator: 

780 """ 

781 Get or create an instance of a preview generator class. 

782 

783 Instances are cached for performance. 

784 

785 Parameters 

786 ---------- 

787 generator_class 

788 The preview generator class 

789 

790 Returns 

791 ------- 

792 PreviewGenerator 

793 Instance of the preview generator 

794 """ 

795 name = generator_class.name 

796 if name not in self._preview_instances: 

797 self._preview_instances[name] = generator_class() 

798 _logger.debug("Instantiated preview generator: %s", name) 

799 

800 return self._preview_instances[name] 

801 

802 def get_preview_generator( 

803 self, 

804 context: ExtractionContext, 

805 ) -> PreviewGenerator | None: 

806 """ 

807 Get the best preview generator for a given file context. 

808 

809 Selection algorithm: 

810 1. Auto-discover plugins if not already done 

811 2. Get generators registered for this file's extension 

812 3. Try each in priority order (high to low) until one's supports() returns True 

813 4. If none match, return None 

814 

815 Parameters 

816 ---------- 

817 context 

818 Extraction context containing file path, instrument, etc. 

819 

820 Returns 

821 ------- 

822 PreviewGenerator | None 

823 The best preview generator for this file, or None if no generator found 

824 

825 Examples 

826 -------- 

827 >>> from nexusLIMS.extractors.base import ExtractionContext 

828 >>> from pathlib import Path 

829 >>> 

830 >>> context = ExtractionContext(Path("data.dm3"), None) 

831 >>> registry = get_registry() 

832 >>> generator = registry.get_preview_generator(context) 

833 >>> if generator: 

834 ... generator.generate(context, Path("preview.png")) 

835 """ 

836 # Auto-discover if needed 

837 if not self._discovered: 

838 self.discover_plugins() 

839 

840 # Get file extension 

841 ext = context.file_path.suffix.lstrip(".").lower() 

842 

843 # Try extension-specific generators 

844 if ext in self._preview_generators: 

845 for generator_class in self._preview_generators[ext]: 

846 instance = self._get_preview_instance(generator_class) 

847 try: 

848 if instance.supports(context): 

849 _logger.debug( 

850 "Selected preview generator %s for %s", 

851 instance.name, 

852 context.file_path.name, 

853 ) 

854 return instance 

855 except Exception as e: 

856 _logger.warning( 

857 "Error in %s.supports(): %s", 

858 instance.name, 

859 e, 

860 exc_info=True, 

861 ) 

862 

863 # No generator found 

864 _logger.debug( 

865 "No preview generator found for %s", 

866 context.file_path.name, 

867 ) 

868 return None 

869 

870 

871# Singleton instance 

872_registry: ExtractorRegistry | None = None 

873 

874 

875def get_registry() -> ExtractorRegistry: 

876 """ 

877 Get the global extractor registry (singleton). 

878 

879 Returns 

880 ------- 

881 ExtractorRegistry 

882 The global registry instance 

883 

884 Examples 

885 -------- 

886 >>> from nexusLIMS.extractors.registry import get_registry 

887 >>> registry = get_registry() 

888 >>> # Always returns the same instance 

889 >>> assert get_registry() is registry 

890 """ 

891 global _registry # noqa: PLW0603 

892 if _registry is None: 

893 _registry = ExtractorRegistry() 

894 return _registry