Coverage for nexusLIMS/cli/process

1# ruff: noqa: FBT001

2"""

3CLI command to process new NexusLIMS records.

5This module provides a command-line interface for running the NexusLIMS record

6builder with file locking, timestamped logging, and email notifications.

7It replaces the functionality previously provided by process_new_records.sh.

9Usage

10-----

12.. code-block:: bash

14 nexuslims build-records [OPTIONS]

16Options

17-------

19.. code-block:: bash

21 -n, --dry-run : Dry run mode (find files without building records)

22 -v, --verbose : Increase verbosity (-v for INFO, -vv for DEBUG)

23 --from <date> : Start date for filtering (ISO format). Defaults to 1 week ago.

24 Use "none" to disable lower bound.

25 --to <date> : End date for filtering (ISO format). Omit to disable upper bound.

26 --version : Show version and exit

27 --help : Show help message and exit

28"""

30import json

31import logging

32import re

33import smtplib

34import sys

35from datetime import datetime, timedelta

36from email.mime.text import MIMEText

37from pathlib import Path

39import click

40from filelock import FileLock, Timeout

41from rich.console import Console

42from rich.logging import RichHandler

44from nexusLIMS.builder.preflight import PreflightError

45from nexusLIMS.cli import _format_version

47# Heavy NexusLIMS imports are lazy-loaded inside functions to speed up --help/--version

48# See: setup_file_logging(), send_error_notification(), and main()

50logger = logging.getLogger(__name__)

51console = Console()

54# Error patterns to search for in log files

55ERROR_PATTERNS = [

56 re.compile(r"\bcritical\b", re.IGNORECASE),

57 re.compile(r"\berror\b", re.IGNORECASE),

58 re.compile(r"\bfatal\b", re.IGNORECASE),

59]

61# Patterns to exclude from error detection (known non-critical errors)

62EXCLUDE_PATTERNS = [

63 "Temporary failure in name resolution",

64 "NoDataConsentError",

65 "NoMatchingReservationError",

66]

69def setup_file_logging(dry_run: bool = False) -> tuple[Path, logging.FileHandler]: # noqa: FBT002

70 """

71 Set up file logging with timestamped log file.

73 Creates a log directory structure based on the current date and adds a

74 FileHandler to the root logger. Log files are named with timestamps

75 in the format YYYYMMDD-HHMM.log (or YYYYMMDD-HHMM_dryrun.log for dry runs).

77 Note: This function removes any existing FileHandlers from the root logger

78 before adding the new handler to prevent handler accumulation across multiple

79 invocations (important for testing scenarios where the same process runs

80 multiple CLI commands).

82 Parameters

83 ----------

84 dry_run : bool

85 If True, append '_dryrun' to the log filename

87 Returns

88 -------

89 tuple[pathlib.Path, logging.FileHandler]

90 A tuple containing:

91 - Path to the created log file

92 - The FileHandler instance that was added to the root logger

94 Raises

95 ------

96 OSError

97 If log directory creation fails

98 """

99 from nexusLIMS.config import settings # noqa: PLC0415

100

101 # Remove any existing FileHandlers from root logger to prevent accumulation

102 # This is critical when the function is called multiple times (e.g., in tests)

103 # to ensure log messages go only to the current log file

104 for handler in logging.root.handlers[

105 :

106 ]: # Use slice to avoid modifying list during iteration

107 if isinstance(handler, logging.FileHandler):

108 logging.root.removeHandler(handler)

109 handler.close()

110

111 now = datetime.now().astimezone()

112 year = now.strftime("%Y")

113 month = now.strftime("%m")

114 day = now.strftime("%d")

115 # Include seconds in timestamp to prevent collisions when multiple runs

116 # happen in same minute

117 timestamp = now.strftime("%Y%m%d-%H%M%S")

118

119 # Create log directory structure: logs/YYYY/MM/DD/

120 log_dir = settings.log_dir_path / year / month / day

121 log_dir.mkdir(parents=True, exist_ok=True)

122

123 # Create log filename

124 suffix = "_dryrun" if dry_run else ""

125 log_file = log_dir / f"{timestamp}{suffix}.log"

126

127 # Add file handler to root logger

128 file_handler = logging.FileHandler(log_file)

129 file_handler.setFormatter(

130 logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")

131 )

132 logging.root.addHandler(file_handler)

133

134 logger.info("Logging to file: %s", log_file)

135

136 return log_file, file_handler

137

138

139def check_log_for_errors(log_path: Path) -> tuple[bool, list[str]]:

140 """

141 Check log file for error patterns.

142

143 Reads the log file and searches for error patterns (critical, error, fatal)

144 while excluding known non-critical error patterns.

145

146 Parameters

147 ----------

148 log_path : pathlib.Path

149 Path to the log file to check

150

151 Returns

152 -------

153 tuple[bool, list[str]]

154 A tuple containing:

155 - bool: True if errors were found, False otherwise

156 - list[str]: List of error pattern names that were found

157

158 Raises

159 ------

160 FileNotFoundError

161 If the log file doesn't exist

162 """

163 if not log_path.exists():

164 logger.error("Log file not found: %s", log_path)

165 return False, []

166

167 try:

168 log_contents = log_path.read_text()

169 except OSError:

170 logger.exception("Failed to read log file: %s", log_path)

171 return False, []

172

173 # Check if any exclude patterns are present

174 for exclude_pattern in EXCLUDE_PATTERNS:

175 if exclude_pattern in log_contents:

176 logger.debug("Found excluded pattern: %s", exclude_pattern)

177 # If we find an excluded pattern, don't send email

178 return False, []

179

180 # Check for error patterns

181 found_patterns = []

182 for pattern in ERROR_PATTERNS:

183 if pattern.search(log_contents):

184 pattern_name = pattern.pattern.strip("\\b").lower()

185 found_patterns.append(pattern_name)

186 logger.debug("Found error pattern: %s", pattern_name)

187

188 has_errors = len(found_patterns) > 0

189 return has_errors, found_patterns

190

191

192def send_error_notification(log_path: Path, found_patterns: list[str]) -> None:

193 """

194 Send error notification email.

195

196 Sends an email notification with the log file contents when errors are

197 detected. Email sending is skipped if email configuration is not available.

198

199 Parameters

200 ----------

201 log_path : pathlib.Path

202 Path to the log file to include in the email

203 found_patterns : list[str]

204 List of error pattern names that were found in the log

205

206 Returns

207 -------

208 None

209 This function doesn't return anything. Errors are logged but not raised.

210

211 Notes

212 -----

213 - Email sending is gracefully skipped if configuration is missing

214 - Any email sending errors are logged but don't cause the function to fail

215 - Uses SMTP with TLS encryption if configured

216 """

217 from nexusLIMS.config import settings # noqa: PLC0415

218

219 # Check if email is configured

220 email_config = settings.email_config()

221 if email_config is None:

222 logger.info("Email not configured, skipping notification")

223 return

224

225 logger.info("Sending error notification email")

226

227 try:

228 # Read log file contents

229 log_contents = log_path.read_text()

230

231 # Build email message

232 subject = "ERROR in NexusLIMS record builder"

233 body = f"""There was an error (or unusual output) in the record builder.

234Here is the output of {log_path}.

235To help you debug, the following "bad" strings were found in the output:

236

237{", ".join(found_patterns)}

238

239{log_contents}"""

240

241 msg = MIMEText(body)

242 msg["Subject"] = subject

243 msg["From"] = email_config.sender

244 msg["To"] = ", ".join(email_config.recipients)

245

246 # Send email via SMTP

247 smtp_class = smtplib.SMTP

248 with smtp_class(

249 email_config.smtp_host, email_config.smtp_port, timeout=30

250 ) as server:

251 if email_config.use_tls:

252 server.starttls()

253

254 # Authenticate if credentials provided

255 if email_config.smtp_username and email_config.smtp_password:

256 server.login(email_config.smtp_username, email_config.smtp_password)

257

258 # Send message

259 server.send_message(msg)

260

261 logger.info("Error notification email sent successfully")

262

263 except smtplib.SMTPException:

264 logger.exception("Failed to send error notification email")

265 except OSError:

266 logger.exception("Failed to read log file for email: %s", log_path)

267 except Exception:

268 logger.exception("Unexpected error while sending email")

269

270

271def _get_log_level(verbose: int) -> int:

272 """

273 Convert verbose count to logging level.

274

275 Parameters

276 ----------

277 verbose : int

278 Verbosity level (0 = WARNING, 1 = INFO, 2+ = DEBUG)

279

280 Returns

281 -------

282 int

283 Logging level constant from the logging module

284 """

285 if verbose == 0:

286 return logging.WARNING

287 if verbose == 1:

288 return logging.INFO

289 return logging.DEBUG

290

291

292def _setup_logging(log_level: int, dry_run: bool) -> tuple[Path, logging.FileHandler]:

293 """

294 Configure console and file logging.

295

296 Parameters

297 ----------

298 log_level : int

299 Logging level constant from the logging module

300 dry_run : bool

301 If True, append '_dryrun' to the log filename

302

303 Returns

304 -------

305 tuple[Path, logging.FileHandler]

306 Tuple of (log_file_path, file_handler)

307

308 Raises

309 ------

310 OSError

311 If file logging setup fails

312 SystemExit

313 If file logging setup fails (exits with code 1)

314 """

315 from nexusLIMS.utils.logging import setup_loggers # noqa: PLC0415

316

317 # Setup console logging with rich

318 logging.basicConfig(

319 level=log_level,

320 format="%(message)s",

321 handlers=[RichHandler(console=console, rich_tracebacks=True)],

322 )

323

324 # Setup all nexusLIMS loggers

325 setup_loggers(log_level)

326

327 # Setup file logging

328 try:

329 return setup_file_logging(dry_run)

330 except OSError:

331 logger.exception("Failed to setup file logging")

332 console.print("[bold red]Failed to setup file logging[/bold red]")

333 sys.exit(1)

334

335

336def _run_with_lock(

337 dry_run: bool, dt_from: datetime | None, dt_to: datetime | None

338) -> None:

339 """

340 Run the record builder with file locking.

341

342 Parameters

343 ----------

344 dry_run : bool

345 If True, run in dry-run mode (find files without building records)

346 dt_from : datetime | None

347 The point in time after which sessions will be fetched

348 dt_to : datetime | None

349 The point in time before which sessions will be fetched

350

351 Returns

352 -------

353 None

354

355 Raises

356 ------

357 SystemExit

358 If lock cannot be acquired (another instance is running)

359 """

360 from nexusLIMS.builder import record_builder # noqa: PLC0415

361 from nexusLIMS.config import settings # noqa: PLC0415

362

363 lock_file = settings.lock_file_path

364 lock = FileLock(str(lock_file), timeout=0)

365

366 try:

367 logger.info("Attempting to acquire lock at %s", lock_file)

368 with lock:

369 logger.info("Lock acquired successfully")

370 try:

371 record_builder.process_new_records(

372 dry_run=dry_run, dt_from=dt_from, dt_to=dt_to

373 )

374 logger.info("Record processing completed")

375 except PreflightError as e:

376 logger.error( # noqa: TRY400

377 "Preflight checks failed — record builder aborted"

378 )

379 for check in e.failed_checks:

380 logger.error(" [%s] %s", check.name, check.message) # noqa: TRY400

381 except Exception:

382 logger.exception("Error during record processing")

383

384 except Timeout:

385 logger.warning(

386 "Lock file already exists at %s - another instance is running",

387 lock_file,

388 )

389 console.print(f"[yellow]Lock file already exists at {lock_file}[/yellow]")

390 console.print("[yellow]Another instance is already running. Exiting.[/yellow]")

391 sys.exit(0)

392

393

394def _parse_date_argument(

395 date_str: str | None, *, inclusive_end: bool = False

396) -> datetime | None:

397 """

398 Parse a date string argument into a datetime object.

399

400 Parameters

401 ----------

402 date_str : str | None

403 Date string in ISO format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS),

404 or special values "none"/"all" to disable filtering,

405 or None to return None

406 inclusive_end : bool

407 If True and date_str has no time component, set time to 23:59:59

408 to include the entire day. Used for --to parameter to make date

409 ranges inclusive. Default is False (use midnight).

410

411 Returns

412 -------

413 datetime | None

414 Parsed datetime with system timezone, or None if date_str is None

415 or a special value

416

417 Raises

418 ------

419 click.BadParameter

420 If date string cannot be parsed

421 """

422 if date_str is None:

423 return None

424

425 # Check for special values that disable filtering

426 if date_str.lower() in ("none", "all"):

427 return None

428

429 # Parse ISO format date string

430 try:

431 # Try parsing with time component first

432 if "T" in date_str:

433 dt_obj = datetime.fromisoformat(date_str)

434 # Parse date-only string

435 elif inclusive_end:

436 # For inclusive end dates, set time to end of day

437 dt_obj = datetime.fromisoformat(date_str + "T23:59:59")

438 else:

439 # For start dates, set time to midnight

440 dt_obj = datetime.fromisoformat(date_str + "T00:00:00")

441

442 # Ensure timezone-aware datetime using system timezone

443 if dt_obj.tzinfo is None:

444 from nexusLIMS.utils.time import current_system_tz # noqa: PLC0415

445

446 dt_obj = dt_obj.replace(tzinfo=current_system_tz())

447 except ValueError as e:

448 msg = (

449 f"Invalid date format: {date_str}. "

450 f"Use ISO format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS) "

451 f'or special value "none" to disable filtering.'

452 )

453 raise click.BadParameter(msg) from e

454 else:

455 return dt_obj

456

457

458def _handle_error_notification(

459 log_file: Path, file_handler: logging.FileHandler

460) -> None:

461 """

462 Check log for errors and send notification if needed.

463

464 Parameters

465 ----------

466 log_file : Path

467 Path to the log file to check

468 file_handler : logging.FileHandler

469 File handler to flush before checking log

470

471 Returns

472 -------

473 None

474 This function doesn't return anything. All errors are caught and logged.

475 """

476 # Ensure file handler is flushed before checking log

477 # This is important for error detection to work correctly

478 file_handler.flush()

479

480 logger.info("NexusLIMS record processor finished")

481

482 try:

483 has_errors, found_patterns = check_log_for_errors(log_file)

484 if has_errors:

485 logger.info("Errors detected in log, sending notification")

486 send_error_notification(log_file, found_patterns)

487 else:

488 logger.info("No errors detected in log")

489 except Exception:

490 logger.exception("Error while checking log or sending notification")

491 finally:

492 # Clean up file handler after all logging is complete

493 logging.root.removeHandler(file_handler)

494 file_handler.close()

495

496

497@click.command(

498 epilog="""

499Examples:

500

501 \b

502 # Normal run (process records from last week)

503 $ nexuslims build-records

504

505 \b

506 # Process all sessions (no date filtering)

507 $ nexuslims build-records --from=none

508

509 \b

510 # Process sessions since a specific date

511 $ nexuslims build-records --from=2025-01-01

512

513 \b

514 # Process a specific date range

515 $ nexuslims build-records --from=2025-01-01 --to=2025-01-31

516

517 \b

518 # Dry run (find files only)

519 $ nexuslims build-records -n

520

521 \b

522 # Verbose output

523 $ nexuslims build-records -vv

524"""

525)

526@click.option(

527 "-n",

528 "--dry-run",

529 is_flag=True,

530 help="Dry run: find files without building records",

531)

532@click.option(

533 "-v",

534 "--verbose",

535 count=True,

536 help="Increase verbosity (-v for INFO, -vv for DEBUG)",

537)

538@click.option(

539 "--from",

540 "from_arg",

541 type=str,

542 default=None,

543 help="Start date for session filtering (ISO format: YYYY-MM-DD). "

544 'Defaults to 1 week ago. Use "none" to disable lower bound.',

545)

546@click.option(

547 "--to",

548 "to_arg",

549 type=str,

550 default=None,

551 help="End date for session filtering (ISO format: YYYY-MM-DD). "

552 "Omit to disable upper bound.",

553)

554@click.version_option(version=None, message=_format_version("nexuslims build-records"))

555def main(

556 *, dry_run: bool, verbose: int, from_arg: str | None, to_arg: str | None

557) -> None:

558 """

559 Process new NexusLIMS records with logging and email notifications.

560

561 This command runs the NexusLIMS record builder to process new experimental

562 sessions and generate XML records. It provides file locking to prevent

563 concurrent runs, timestamped logging, and email notifications on errors.

564

565 By default, only sessions from the last week are processed. Use --from=none

566 to process all sessions, or specify custom date ranges with --from and --to.

567 """

568 from nexusLIMS.cli import handle_config_error # noqa: PLC0415

569

570 with handle_config_error():

571 # Setup logging (accesses settings for log directory path)

572 log_level = _get_log_level(verbose)

573 log_file, file_handler = _setup_logging(log_level, dry_run)

574

575 # Parse date arguments from raw string parameters

576 dt_from = _parse_date_argument(from_arg)

577 dt_to = _parse_date_argument(to_arg, inclusive_end=True)

578

579 # Apply default: fetch last week if no --from was provided

580 # (Don't apply if user explicitly passed --from=none)

581 if from_arg is None:

582 from nexusLIMS.utils.time import current_system_tz # noqa: PLC0415

583

584 dt_from = datetime.now(tz=current_system_tz()) - timedelta(weeks=1)

585

586 # Log startup information

587 logger.info("Starting NexusLIMS record processor")

588 logger.info("Dry run: %s", dry_run)

589 if dt_from is not None:

590 logger.info("Fetching sessions from: %s", dt_from.isoformat())

591 else:

592 logger.info("Fetching sessions from: (no lower bound)")

593 if dt_to is not None:

594 logger.info("Fetching sessions to: %s", dt_to.isoformat())

595 else:

596 logger.info("Fetching sessions to: (no upper bound)")

597

598 # Dump sanitized effective configuration when verbose

599 if verbose >= 1:

600 from nexusLIMS.cli.config import ( # noqa: PLC0415

601 _build_config_dict,

602 _sanitize_config,

603 )

604 from nexusLIMS.config import settings # noqa: PLC0415

605

606 logger.info(

607 "Effective configuration:\n%s",

608 json.dumps(

609 _sanitize_config(_build_config_dict(settings)),

610 indent=2,

611 default=str,

612 ),

613 )

614

615 # Run record builder with file locking

616 _run_with_lock(dry_run, dt_from, dt_to)

617

618 # Handle error notifications and cleanup

619 _handle_error_notification(log_file, file_handler)

620

621

622if __name__ == "__main__": # pragma: no cover

623 main()

Coverage for nexusLIMS/cli/process_records.py: 100%

188 statements