Coverage for nexusLIMS/cli/process_records.py: 100%

188 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1# ruff: noqa: FBT001 

2""" 

3CLI command to process new NexusLIMS records. 

4 

5This module provides a command-line interface for running the NexusLIMS record 

6builder with file locking, timestamped logging, and email notifications. 

7It replaces the functionality previously provided by process_new_records.sh. 

8 

9Usage 

10----- 

11 

12.. code-block:: bash 

13 

14 nexuslims build-records [OPTIONS] 

15 

16Options 

17------- 

18 

19.. code-block:: bash 

20 

21 -n, --dry-run : Dry run mode (find files without building records) 

22 -v, --verbose : Increase verbosity (-v for INFO, -vv for DEBUG) 

23 --from <date> : Start date for filtering (ISO format). Defaults to 1 week ago. 

24 Use "none" to disable lower bound. 

25 --to <date> : End date for filtering (ISO format). Omit to disable upper bound. 

26 --version : Show version and exit 

27 --help : Show help message and exit 

28""" 

29 

30import json 

31import logging 

32import re 

33import smtplib 

34import sys 

35from datetime import datetime, timedelta 

36from email.mime.text import MIMEText 

37from pathlib import Path 

38 

39import click 

40from filelock import FileLock, Timeout 

41from rich.console import Console 

42from rich.logging import RichHandler 

43 

44from nexusLIMS.builder.preflight import PreflightError 

45from nexusLIMS.cli import _format_version 

46 

47# Heavy NexusLIMS imports are lazy-loaded inside functions to speed up --help/--version 

48# See: setup_file_logging(), send_error_notification(), and main() 

49 

50logger = logging.getLogger(__name__) 

51console = Console() 

52 

53 

54# Error patterns to search for in log files 

55ERROR_PATTERNS = [ 

56 re.compile(r"\bcritical\b", re.IGNORECASE), 

57 re.compile(r"\berror\b", re.IGNORECASE), 

58 re.compile(r"\bfatal\b", re.IGNORECASE), 

59] 

60 

61# Patterns to exclude from error detection (known non-critical errors) 

62EXCLUDE_PATTERNS = [ 

63 "Temporary failure in name resolution", 

64 "NoDataConsentError", 

65 "NoMatchingReservationError", 

66] 

67 

68 

69def setup_file_logging(dry_run: bool = False) -> tuple[Path, logging.FileHandler]: # noqa: FBT002 

70 """ 

71 Set up file logging with timestamped log file. 

72 

73 Creates a log directory structure based on the current date and adds a 

74 FileHandler to the root logger. Log files are named with timestamps 

75 in the format YYYYMMDD-HHMM.log (or YYYYMMDD-HHMM_dryrun.log for dry runs). 

76 

77 Note: This function removes any existing FileHandlers from the root logger 

78 before adding the new handler to prevent handler accumulation across multiple 

79 invocations (important for testing scenarios where the same process runs 

80 multiple CLI commands). 

81 

82 Parameters 

83 ---------- 

84 dry_run : bool 

85 If True, append '_dryrun' to the log filename 

86 

87 Returns 

88 ------- 

89 tuple[pathlib.Path, logging.FileHandler] 

90 A tuple containing: 

91 - Path to the created log file 

92 - The FileHandler instance that was added to the root logger 

93 

94 Raises 

95 ------ 

96 OSError 

97 If log directory creation fails 

98 """ 

99 from nexusLIMS.config import settings # noqa: PLC0415 

100 

101 # Remove any existing FileHandlers from root logger to prevent accumulation 

102 # This is critical when the function is called multiple times (e.g., in tests) 

103 # to ensure log messages go only to the current log file 

104 for handler in logging.root.handlers[ 

105 : 

106 ]: # Use slice to avoid modifying list during iteration 

107 if isinstance(handler, logging.FileHandler): 

108 logging.root.removeHandler(handler) 

109 handler.close() 

110 

111 now = datetime.now().astimezone() 

112 year = now.strftime("%Y") 

113 month = now.strftime("%m") 

114 day = now.strftime("%d") 

115 # Include seconds in timestamp to prevent collisions when multiple runs 

116 # happen in same minute 

117 timestamp = now.strftime("%Y%m%d-%H%M%S") 

118 

119 # Create log directory structure: logs/YYYY/MM/DD/ 

120 log_dir = settings.log_dir_path / year / month / day 

121 log_dir.mkdir(parents=True, exist_ok=True) 

122 

123 # Create log filename 

124 suffix = "_dryrun" if dry_run else "" 

125 log_file = log_dir / f"{timestamp}{suffix}.log" 

126 

127 # Add file handler to root logger 

128 file_handler = logging.FileHandler(log_file) 

129 file_handler.setFormatter( 

130 logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 

131 ) 

132 logging.root.addHandler(file_handler) 

133 

134 logger.info("Logging to file: %s", log_file) 

135 

136 return log_file, file_handler 

137 

138 

139def check_log_for_errors(log_path: Path) -> tuple[bool, list[str]]: 

140 """ 

141 Check log file for error patterns. 

142 

143 Reads the log file and searches for error patterns (critical, error, fatal) 

144 while excluding known non-critical error patterns. 

145 

146 Parameters 

147 ---------- 

148 log_path : pathlib.Path 

149 Path to the log file to check 

150 

151 Returns 

152 ------- 

153 tuple[bool, list[str]] 

154 A tuple containing: 

155 - bool: True if errors were found, False otherwise 

156 - list[str]: List of error pattern names that were found 

157 

158 Raises 

159 ------ 

160 FileNotFoundError 

161 If the log file doesn't exist 

162 """ 

163 if not log_path.exists(): 

164 logger.error("Log file not found: %s", log_path) 

165 return False, [] 

166 

167 try: 

168 log_contents = log_path.read_text() 

169 except OSError: 

170 logger.exception("Failed to read log file: %s", log_path) 

171 return False, [] 

172 

173 # Check if any exclude patterns are present 

174 for exclude_pattern in EXCLUDE_PATTERNS: 

175 if exclude_pattern in log_contents: 

176 logger.debug("Found excluded pattern: %s", exclude_pattern) 

177 # If we find an excluded pattern, don't send email 

178 return False, [] 

179 

180 # Check for error patterns 

181 found_patterns = [] 

182 for pattern in ERROR_PATTERNS: 

183 if pattern.search(log_contents): 

184 pattern_name = pattern.pattern.strip("\\b").lower() 

185 found_patterns.append(pattern_name) 

186 logger.debug("Found error pattern: %s", pattern_name) 

187 

188 has_errors = len(found_patterns) > 0 

189 return has_errors, found_patterns 

190 

191 

192def send_error_notification(log_path: Path, found_patterns: list[str]) -> None: 

193 """ 

194 Send error notification email. 

195 

196 Sends an email notification with the log file contents when errors are 

197 detected. Email sending is skipped if email configuration is not available. 

198 

199 Parameters 

200 ---------- 

201 log_path : pathlib.Path 

202 Path to the log file to include in the email 

203 found_patterns : list[str] 

204 List of error pattern names that were found in the log 

205 

206 Returns 

207 ------- 

208 None 

209 This function doesn't return anything. Errors are logged but not raised. 

210 

211 Notes 

212 ----- 

213 - Email sending is gracefully skipped if configuration is missing 

214 - Any email sending errors are logged but don't cause the function to fail 

215 - Uses SMTP with TLS encryption if configured 

216 """ 

217 from nexusLIMS.config import settings # noqa: PLC0415 

218 

219 # Check if email is configured 

220 email_config = settings.email_config() 

221 if email_config is None: 

222 logger.info("Email not configured, skipping notification") 

223 return 

224 

225 logger.info("Sending error notification email") 

226 

227 try: 

228 # Read log file contents 

229 log_contents = log_path.read_text() 

230 

231 # Build email message 

232 subject = "ERROR in NexusLIMS record builder" 

233 body = f"""There was an error (or unusual output) in the record builder. 

234Here is the output of {log_path}. 

235To help you debug, the following "bad" strings were found in the output: 

236 

237{", ".join(found_patterns)} 

238 

239{log_contents}""" 

240 

241 msg = MIMEText(body) 

242 msg["Subject"] = subject 

243 msg["From"] = email_config.sender 

244 msg["To"] = ", ".join(email_config.recipients) 

245 

246 # Send email via SMTP 

247 smtp_class = smtplib.SMTP 

248 with smtp_class( 

249 email_config.smtp_host, email_config.smtp_port, timeout=30 

250 ) as server: 

251 if email_config.use_tls: 

252 server.starttls() 

253 

254 # Authenticate if credentials provided 

255 if email_config.smtp_username and email_config.smtp_password: 

256 server.login(email_config.smtp_username, email_config.smtp_password) 

257 

258 # Send message 

259 server.send_message(msg) 

260 

261 logger.info("Error notification email sent successfully") 

262 

263 except smtplib.SMTPException: 

264 logger.exception("Failed to send error notification email") 

265 except OSError: 

266 logger.exception("Failed to read log file for email: %s", log_path) 

267 except Exception: 

268 logger.exception("Unexpected error while sending email") 

269 

270 

271def _get_log_level(verbose: int) -> int: 

272 """ 

273 Convert verbose count to logging level. 

274 

275 Parameters 

276 ---------- 

277 verbose : int 

278 Verbosity level (0 = WARNING, 1 = INFO, 2+ = DEBUG) 

279 

280 Returns 

281 ------- 

282 int 

283 Logging level constant from the logging module 

284 """ 

285 if verbose == 0: 

286 return logging.WARNING 

287 if verbose == 1: 

288 return logging.INFO 

289 return logging.DEBUG 

290 

291 

292def _setup_logging(log_level: int, dry_run: bool) -> tuple[Path, logging.FileHandler]: 

293 """ 

294 Configure console and file logging. 

295 

296 Parameters 

297 ---------- 

298 log_level : int 

299 Logging level constant from the logging module 

300 dry_run : bool 

301 If True, append '_dryrun' to the log filename 

302 

303 Returns 

304 ------- 

305 tuple[Path, logging.FileHandler] 

306 Tuple of (log_file_path, file_handler) 

307 

308 Raises 

309 ------ 

310 OSError 

311 If file logging setup fails 

312 SystemExit 

313 If file logging setup fails (exits with code 1) 

314 """ 

315 from nexusLIMS.utils.logging import setup_loggers # noqa: PLC0415 

316 

317 # Setup console logging with rich 

318 logging.basicConfig( 

319 level=log_level, 

320 format="%(message)s", 

321 handlers=[RichHandler(console=console, rich_tracebacks=True)], 

322 ) 

323 

324 # Setup all nexusLIMS loggers 

325 setup_loggers(log_level) 

326 

327 # Setup file logging 

328 try: 

329 return setup_file_logging(dry_run) 

330 except OSError: 

331 logger.exception("Failed to setup file logging") 

332 console.print("[bold red]Failed to setup file logging[/bold red]") 

333 sys.exit(1) 

334 

335 

336def _run_with_lock( 

337 dry_run: bool, dt_from: datetime | None, dt_to: datetime | None 

338) -> None: 

339 """ 

340 Run the record builder with file locking. 

341 

342 Parameters 

343 ---------- 

344 dry_run : bool 

345 If True, run in dry-run mode (find files without building records) 

346 dt_from : datetime | None 

347 The point in time after which sessions will be fetched 

348 dt_to : datetime | None 

349 The point in time before which sessions will be fetched 

350 

351 Returns 

352 ------- 

353 None 

354 

355 Raises 

356 ------ 

357 SystemExit 

358 If lock cannot be acquired (another instance is running) 

359 """ 

360 from nexusLIMS.builder import record_builder # noqa: PLC0415 

361 from nexusLIMS.config import settings # noqa: PLC0415 

362 

363 lock_file = settings.lock_file_path 

364 lock = FileLock(str(lock_file), timeout=0) 

365 

366 try: 

367 logger.info("Attempting to acquire lock at %s", lock_file) 

368 with lock: 

369 logger.info("Lock acquired successfully") 

370 try: 

371 record_builder.process_new_records( 

372 dry_run=dry_run, dt_from=dt_from, dt_to=dt_to 

373 ) 

374 logger.info("Record processing completed") 

375 except PreflightError as e: 

376 logger.error( # noqa: TRY400 

377 "Preflight checks failed — record builder aborted" 

378 ) 

379 for check in e.failed_checks: 

380 logger.error(" [%s] %s", check.name, check.message) # noqa: TRY400 

381 except Exception: 

382 logger.exception("Error during record processing") 

383 

384 except Timeout: 

385 logger.warning( 

386 "Lock file already exists at %s - another instance is running", 

387 lock_file, 

388 ) 

389 console.print(f"[yellow]Lock file already exists at {lock_file}[/yellow]") 

390 console.print("[yellow]Another instance is already running. Exiting.[/yellow]") 

391 sys.exit(0) 

392 

393 

394def _parse_date_argument( 

395 date_str: str | None, *, inclusive_end: bool = False 

396) -> datetime | None: 

397 """ 

398 Parse a date string argument into a datetime object. 

399 

400 Parameters 

401 ---------- 

402 date_str : str | None 

403 Date string in ISO format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS), 

404 or special values "none"/"all" to disable filtering, 

405 or None to return None 

406 inclusive_end : bool 

407 If True and date_str has no time component, set time to 23:59:59 

408 to include the entire day. Used for --to parameter to make date 

409 ranges inclusive. Default is False (use midnight). 

410 

411 Returns 

412 ------- 

413 datetime | None 

414 Parsed datetime with system timezone, or None if date_str is None 

415 or a special value 

416 

417 Raises 

418 ------ 

419 click.BadParameter 

420 If date string cannot be parsed 

421 """ 

422 if date_str is None: 

423 return None 

424 

425 # Check for special values that disable filtering 

426 if date_str.lower() in ("none", "all"): 

427 return None 

428 

429 # Parse ISO format date string 

430 try: 

431 # Try parsing with time component first 

432 if "T" in date_str: 

433 dt_obj = datetime.fromisoformat(date_str) 

434 # Parse date-only string 

435 elif inclusive_end: 

436 # For inclusive end dates, set time to end of day 

437 dt_obj = datetime.fromisoformat(date_str + "T23:59:59") 

438 else: 

439 # For start dates, set time to midnight 

440 dt_obj = datetime.fromisoformat(date_str + "T00:00:00") 

441 

442 # Ensure timezone-aware datetime using system timezone 

443 if dt_obj.tzinfo is None: 

444 from nexusLIMS.utils.time import current_system_tz # noqa: PLC0415 

445 

446 dt_obj = dt_obj.replace(tzinfo=current_system_tz()) 

447 except ValueError as e: 

448 msg = ( 

449 f"Invalid date format: {date_str}. " 

450 f"Use ISO format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS) " 

451 f'or special value "none" to disable filtering.' 

452 ) 

453 raise click.BadParameter(msg) from e 

454 else: 

455 return dt_obj 

456 

457 

458def _handle_error_notification( 

459 log_file: Path, file_handler: logging.FileHandler 

460) -> None: 

461 """ 

462 Check log for errors and send notification if needed. 

463 

464 Parameters 

465 ---------- 

466 log_file : Path 

467 Path to the log file to check 

468 file_handler : logging.FileHandler 

469 File handler to flush before checking log 

470 

471 Returns 

472 ------- 

473 None 

474 This function doesn't return anything. All errors are caught and logged. 

475 """ 

476 # Ensure file handler is flushed before checking log 

477 # This is important for error detection to work correctly 

478 file_handler.flush() 

479 

480 logger.info("NexusLIMS record processor finished") 

481 

482 try: 

483 has_errors, found_patterns = check_log_for_errors(log_file) 

484 if has_errors: 

485 logger.info("Errors detected in log, sending notification") 

486 send_error_notification(log_file, found_patterns) 

487 else: 

488 logger.info("No errors detected in log") 

489 except Exception: 

490 logger.exception("Error while checking log or sending notification") 

491 finally: 

492 # Clean up file handler after all logging is complete 

493 logging.root.removeHandler(file_handler) 

494 file_handler.close() 

495 

496 

497@click.command( 

498 epilog=""" 

499Examples: 

500 

501 \b 

502 # Normal run (process records from last week) 

503 $ nexuslims build-records 

504 

505 \b 

506 # Process all sessions (no date filtering) 

507 $ nexuslims build-records --from=none 

508 

509 \b 

510 # Process sessions since a specific date 

511 $ nexuslims build-records --from=2025-01-01 

512 

513 \b 

514 # Process a specific date range 

515 $ nexuslims build-records --from=2025-01-01 --to=2025-01-31 

516 

517 \b 

518 # Dry run (find files only) 

519 $ nexuslims build-records -n 

520 

521 \b 

522 # Verbose output 

523 $ nexuslims build-records -vv 

524""" 

525) 

526@click.option( 

527 "-n", 

528 "--dry-run", 

529 is_flag=True, 

530 help="Dry run: find files without building records", 

531) 

532@click.option( 

533 "-v", 

534 "--verbose", 

535 count=True, 

536 help="Increase verbosity (-v for INFO, -vv for DEBUG)", 

537) 

538@click.option( 

539 "--from", 

540 "from_arg", 

541 type=str, 

542 default=None, 

543 help="Start date for session filtering (ISO format: YYYY-MM-DD). " 

544 'Defaults to 1 week ago. Use "none" to disable lower bound.', 

545) 

546@click.option( 

547 "--to", 

548 "to_arg", 

549 type=str, 

550 default=None, 

551 help="End date for session filtering (ISO format: YYYY-MM-DD). " 

552 "Omit to disable upper bound.", 

553) 

554@click.version_option(version=None, message=_format_version("nexuslims build-records")) 

555def main( 

556 *, dry_run: bool, verbose: int, from_arg: str | None, to_arg: str | None 

557) -> None: 

558 """ 

559 Process new NexusLIMS records with logging and email notifications. 

560 

561 This command runs the NexusLIMS record builder to process new experimental 

562 sessions and generate XML records. It provides file locking to prevent 

563 concurrent runs, timestamped logging, and email notifications on errors. 

564 

565 By default, only sessions from the last week are processed. Use --from=none 

566 to process all sessions, or specify custom date ranges with --from and --to. 

567 """ 

568 from nexusLIMS.cli import handle_config_error # noqa: PLC0415 

569 

570 with handle_config_error(): 

571 # Setup logging (accesses settings for log directory path) 

572 log_level = _get_log_level(verbose) 

573 log_file, file_handler = _setup_logging(log_level, dry_run) 

574 

575 # Parse date arguments from raw string parameters 

576 dt_from = _parse_date_argument(from_arg) 

577 dt_to = _parse_date_argument(to_arg, inclusive_end=True) 

578 

579 # Apply default: fetch last week if no --from was provided 

580 # (Don't apply if user explicitly passed --from=none) 

581 if from_arg is None: 

582 from nexusLIMS.utils.time import current_system_tz # noqa: PLC0415 

583 

584 dt_from = datetime.now(tz=current_system_tz()) - timedelta(weeks=1) 

585 

586 # Log startup information 

587 logger.info("Starting NexusLIMS record processor") 

588 logger.info("Dry run: %s", dry_run) 

589 if dt_from is not None: 

590 logger.info("Fetching sessions from: %s", dt_from.isoformat()) 

591 else: 

592 logger.info("Fetching sessions from: (no lower bound)") 

593 if dt_to is not None: 

594 logger.info("Fetching sessions to: %s", dt_to.isoformat()) 

595 else: 

596 logger.info("Fetching sessions to: (no upper bound)") 

597 

598 # Dump sanitized effective configuration when verbose 

599 if verbose >= 1: 

600 from nexusLIMS.cli.config import ( # noqa: PLC0415 

601 _build_config_dict, 

602 _sanitize_config, 

603 ) 

604 from nexusLIMS.config import settings # noqa: PLC0415 

605 

606 logger.info( 

607 "Effective configuration:\n%s", 

608 json.dumps( 

609 _sanitize_config(_build_config_dict(settings)), 

610 indent=2, 

611 default=str, 

612 ), 

613 ) 

614 

615 # Run record builder with file locking 

616 _run_with_lock(dry_run, dt_from, dt_to) 

617 

618 # Handle error notifications and cleanup 

619 _handle_error_notification(log_file, file_handler) 

620 

621 

622if __name__ == "__main__": # pragma: no cover 

623 main()