Coverage for nexusLIMS/harvesters/nemo/__init__.py: 100%
66 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""
2NEMO harvester module.
4This module contains the functionality to harvest instruments, reservations,
5etc. from an instance of NEMO (https://github.com/usnistgov/NEMO/), a
6calendering and laboratory logistics application.
7"""
9import json
10import logging
11from datetime import timedelta
13from nexusLIMS.db.session_handler import Session
14from nexusLIMS.harvesters.reservation_event import ReservationEvent
15from nexusLIMS.utils.time import get_timespan_overlap
17from .connector import NemoConnector
18from .exceptions import NoDataConsentError, NoMatchingReservationError
19from .utils import (
20 _get_res_question_value,
21 get_connector_for_session,
22 has_valid_question_data,
23 id_from_url,
24 process_res_question_samples,
25)
27_logger = logging.getLogger(__name__)
30def create_res_event_from_usage_event(
31 usage_event: dict,
32 session: Session,
33 nemo_connector: NemoConnector,
34 field: str = "run_data",
35) -> ReservationEvent:
36 """
37 Create ReservationEvent from usage event with question data.
39 Assumes usage_event has been expanded via _parse_event() and
40 has valid question data in the specified field (run_data or pre_run_data).
42 Both run_data and pre_run_data fields are JSON-encoded strings that use
43 the same structure as reservation question_data, so we can parse them and
44 reuse existing helper functions by creating a wrapper dict.
46 Parameters
47 ----------
48 usage_event
49 The usage event dictionary from NEMO API
50 session
51 The Session object
52 nemo_connector
53 The NemoConnector instance
54 field
55 Which field to extract question data from ("run_data" or "pre_run_data")
57 Returns
58 -------
59 ReservationEvent
60 The created reservation event
62 Raises
63 ------
64 ValueError
65 If the field cannot be parsed as JSON
66 NoDataConsentError
67 If data_consent is missing or the user declined consent
68 """
69 # Parse JSON-encoded question data string
70 try:
71 question_data_parsed = json.loads(usage_event[field])
72 except (json.JSONDecodeError, TypeError) as e:
73 msg = f"Failed to parse {field} for usage event {usage_event['id']}: {e}"
74 raise ValueError(msg) from e
76 # Wrap parsed data as question_data for compatibility with helper functions
77 wrapped_event = {"question_data": question_data_parsed}
79 # Validate consent first
80 consent = _get_res_question_value("data_consent", wrapped_event)
81 if consent is None:
82 msg = (
83 f"Usage event {usage_event['id']} did not have data_consent defined, "
84 "so we should not harvest its data"
85 )
86 raise NoDataConsentError(msg)
88 if consent.lower() in ["disagree", "no", "false", "negative"]:
89 msg = (
90 f"Usage event {usage_event['id']} requested not to have "
91 "their data harvested"
92 )
93 raise NoDataConsentError(msg)
95 # Process sample information
96 (
97 sample_details,
98 sample_pid,
99 sample_name,
100 sample_elements,
101 ) = process_res_question_samples(wrapped_event)
103 # Use operator as creator (who started the session)
104 # Fallback to user if operator is None
105 creator = usage_event.get("operator") or usage_event["user"]
107 # Create ReservationEvent (using wrapped_event for question data)
108 return ReservationEvent(
109 experiment_title=_get_res_question_value("experiment_title", wrapped_event),
110 instrument=session.instrument,
111 last_updated=nemo_connector.strptime(usage_event["start"]), # No creation_time
112 username=usage_event["user"]["username"],
113 user_full_name=(
114 f"{usage_event['user']['first_name']} "
115 f"{usage_event['user']['last_name']} "
116 f"({usage_event['user']['username']})"
117 ),
118 created_by=creator["username"],
119 created_by_full_name=(
120 f"{creator['first_name']} {creator['last_name']} ({creator['username']})"
121 ),
122 start_time=nemo_connector.strptime(usage_event["start"]),
123 end_time=nemo_connector.strptime(usage_event["end"]),
124 reservation_type=None,
125 experiment_purpose=_get_res_question_value("experiment_purpose", wrapped_event),
126 sample_details=sample_details,
127 sample_pid=sample_pid,
128 sample_name=sample_name,
129 sample_elements=sample_elements,
130 project_name=[None],
131 project_id=[_get_res_question_value("project_id", wrapped_event)],
132 project_ref=[None],
133 internal_id=str(usage_event["id"]), # Usage event ID
134 division=None,
135 group=None,
136 url=nemo_connector.config["base_url"].replace(
137 "api/",
138 f"event_details/usage/{usage_event['id']}/", # Usage event URL
139 ),
140 )
143def res_event_from_session(
144 session: Session, connector: NemoConnector | None = None
145) -> ReservationEvent:
146 """
147 Create reservation event from session.
149 Create an internal
150 :py:class:`~nexusLIMS.harvesters.reservation_event.ReservationEvent` representation
151 of a session by finding a matching reservation in the NEMO
152 system and parsing the data contained within into a ``ReservationEvent``.
154 This method assumes a certain format for the "reservation questions"
155 associated with each reservation and parses that information into the resulting
156 ``ReservationEvent``. The most critical of these is the ``data_consent`` field.
157 If an affirmative response in this field is not found (because the user declined
158 consent or the reservation questions are missing), a record will not be built.
160 The following JSON object represents a minimal schema for a set of NEMO "Reservation
161 Questions" that will satisfy the expectations of this method. Please see the
162 NEMO documentation on this feature for more details.
164 ```json
165 [
166 {
167 "type": "textbox",
168 "name": "project_id",
169 "title": "Project ID",
170 },
171 {
172 "type": "textbox",
173 "name": "experiment_title",
174 "title": "Title of Experiment",
175 },
176 {
177 "type": "textarea",
178 "name": "experiment_purpose",
179 "title": "Experiment Purpose",
180 },
181 {
182 "type": "radio",
183 "title": "Agree to NexusLIMS curation",
184 "choices": ["Agree", "Disagree"],
185 "name": "data_consent",
186 "default_choice": "Agree"
187 },
188 {
189 "type": "group",
190 "title": "Sample information",
191 "name": "sample_group",
192 "questions": [
193 {
194 "type": "textbox",
195 "name": "sample_name",
196 "title": "Sample Name / PID",
197 },
198 {
199 "type": "radio",
200 "title": "Sample or PID?",
201 "choices": ["Sample Name", "PID"],
202 "name": "sample_or_pid",
203 },
204 {
205 "type": "textarea",
206 "name": "sample_details",
207 "title": "Sample Details",
208 }
209 ]
210 }
211 ]
212 ```
214 Parameters
215 ----------
216 session
217 The session for which to get a reservation event
218 connector : Optional[NemoConnector], optional
219 Optional NemoConnector to use instead of looking one up. Useful for testing.
221 Returns
222 -------
223 res_event : ~nexusLIMS.harvesters.reservation_event.ReservationEvent
224 The matching reservation event
225 """
226 # a session has instrument, dt_from, dt_to, and user
228 # we should fetch all reservations +/- two days, and then find the one
229 # with the maximal overlap with the session time range
230 # probably don't want to filter by user for now, since sometimes users
231 # will enable/reserve on behalf of others, etc.
233 # in order to get reservations, we need a NemoConnector
234 if connector is None:
235 nemo_connector = get_connector_for_session(session)
236 else:
237 nemo_connector = connector
239 # NEW: Three-tier fallback - try to get usage event question data first
240 # This eliminates the need for reservation matching when usage events
241 # contain all necessary metadata (run_data filled at END of experiment,
242 # or pre_run_data filled at START of experiment)
243 usage_event_id = id_from_url(session.session_identifier)
244 if usage_event_id is not None:
245 usage_events = nemo_connector.get_usage_events(event_id=usage_event_id)
246 if usage_events and len(usage_events) > 0:
247 usage_event = usage_events[0]
249 # Priority 1: Check run_data (most recent - filled at END)
250 if has_valid_question_data(usage_event, field="run_data"):
251 _logger.info(
252 "Usage event %s has run_data with questions, "
253 "using it instead of reservation",
254 usage_event_id,
255 )
256 return create_res_event_from_usage_event(
257 usage_event, session, nemo_connector, field="run_data"
258 )
260 # Priority 2: Check pre_run_data (backup - filled at START)
261 if has_valid_question_data(usage_event, field="pre_run_data"):
262 _logger.info(
263 "Usage event %s has pre_run_data with questions, "
264 "using it instead of reservation",
265 usage_event_id,
266 )
267 return create_res_event_from_usage_event(
268 usage_event, session, nemo_connector, field="pre_run_data"
269 )
271 # Priority 3: Fall back to reservation matching (existing behavior)
272 _logger.info(
273 "Usage event does not have valid question data in run_data or pre_run_data, "
274 "falling back to reservation matching"
275 )
277 # get reservation with maximum overlap
278 reservations = nemo_connector.get_reservations(
279 # tool id can be extracted from instrument api_url query parameter
280 tool_id=id_from_url(session.instrument.api_url),
281 dt_from=session.dt_from - timedelta(days=2),
282 dt_to=session.dt_to + timedelta(days=2),
283 )
285 _logger.info(
286 "Found %i reservations between %s and %s with ids: %s",
287 len(reservations),
288 session.dt_from - timedelta(days=2),
289 session.dt_to + timedelta(days=2),
290 [i["id"] for i in reservations],
291 )
292 for i, res in enumerate(reservations):
293 _logger.debug(
294 "Reservation %i: %sreservations/?id=%s from %s to %s",
295 i + 1,
296 nemo_connector.config["base_url"],
297 res["id"],
298 res["start"],
299 res["end"],
300 )
302 starts = [nemo_connector.strptime(r["start"]) for r in reservations]
303 ends = [nemo_connector.strptime(r["end"]) for r in reservations]
305 overlaps = [
306 get_timespan_overlap((session.dt_from, session.dt_to), (s, e))
307 for s, e in zip(starts, ends)
308 ]
310 # handle if there are no matching sessions (i.e. reservations is an empty list
311 # also need to handle if there is no overlap at all with any reservation
312 if len(reservations) == 0 or max(overlaps) == timedelta(0):
313 # there were no reservations that matched this usage event time range,
314 # or none of the reservations overlapped with the usage event
315 # so we'll use what limited information we have from the usage event
316 # session
317 _logger.warning(
318 "No reservations found with overlap for this usage "
319 "event, so raising NoDataConsentError",
320 )
321 msg = (
322 "No reservation found matching this session, so assuming NexusLIMS "
323 "does not have user consent for data harvesting."
324 )
325 raise NoMatchingReservationError(msg)
327 # select the reservation with the most overlap
328 res = reservations[overlaps.index(max(overlaps))]
329 _logger.info(
330 "Using reservation %sreservations/?id=%s as match for "
331 "usage event %s with overlap of %s",
332 nemo_connector.config["base_url"],
333 res["id"],
334 session.session_identifier,
335 max(overlaps),
336 )
338 # DONE: check for presence of sample_group in the reservation metadata
339 # and change the harvester to process the sample group metadata by
340 # providing lists to the ReservationEvent constructor
341 (
342 sample_details,
343 sample_pid,
344 sample_name,
345 sample_elements,
346 ) = process_res_question_samples(res)
348 # DONE: respect user choice not to harvest data (data_consent)
349 consent = "disagree"
350 consent = _get_res_question_value("data_consent", res)
351 # consent will be None here if it wasn't given (i.e. there was no
352 # data_consent field in the reservation questions)
353 if consent is None:
354 msg = (
355 f"Reservation {res['id']} did not have data_consent defined, "
356 "so we should not harvest its data"
357 )
358 raise NoDataConsentError(msg)
360 if consent.lower() in ["disagree", "no", "false", "negative"]:
361 msg = f"Reservation {res['id']} requested not to have their data harvested"
362 raise NoDataConsentError(msg)
364 # Create ReservationEvent from NEMO reservation dict
365 return ReservationEvent(
366 experiment_title=_get_res_question_value("experiment_title", res),
367 instrument=session.instrument,
368 last_updated=nemo_connector.strptime(res["creation_time"]),
369 username=res["user"]["username"],
370 user_full_name=(
371 f"{res['user']['first_name']} "
372 f"{res['user']['last_name']} "
373 f"({res['user']['username']})"
374 ),
375 created_by=res["creator"]["username"],
376 created_by_full_name=(
377 f"{res['creator']['first_name']} "
378 f"{res['creator']['last_name']} "
379 f"({res['creator']['username']})"
380 ),
381 start_time=nemo_connector.strptime(res["start"]),
382 end_time=nemo_connector.strptime(res["end"]),
383 reservation_type=None, # reservation type is not collected in NEMO
384 experiment_purpose=_get_res_question_value("experiment_purpose", res),
385 sample_details=sample_details,
386 sample_pid=sample_pid,
387 sample_name=sample_name,
388 sample_elements=sample_elements,
389 project_name=[None],
390 project_id=[_get_res_question_value("project_id", res)],
391 project_ref=[None],
392 internal_id=str(res["id"]),
393 division=None,
394 group=None,
395 url=nemo_connector.config["base_url"].replace(
396 "api/",
397 f"event_details/reservation/{res['id']}/",
398 ),
399 )