Coverage for nexusLIMS/harvesters/nemo/__init__.py: 100%

66 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1""" 

2NEMO harvester module. 

3 

4This module contains the functionality to harvest instruments, reservations, 

5etc. from an instance of NEMO (https://github.com/usnistgov/NEMO/), a 

6calendering and laboratory logistics application. 

7""" 

8 

9import json 

10import logging 

11from datetime import timedelta 

12 

13from nexusLIMS.db.session_handler import Session 

14from nexusLIMS.harvesters.reservation_event import ReservationEvent 

15from nexusLIMS.utils.time import get_timespan_overlap 

16 

17from .connector import NemoConnector 

18from .exceptions import NoDataConsentError, NoMatchingReservationError 

19from .utils import ( 

20 _get_res_question_value, 

21 get_connector_for_session, 

22 has_valid_question_data, 

23 id_from_url, 

24 process_res_question_samples, 

25) 

26 

27_logger = logging.getLogger(__name__) 

28 

29 

30def create_res_event_from_usage_event( 

31 usage_event: dict, 

32 session: Session, 

33 nemo_connector: NemoConnector, 

34 field: str = "run_data", 

35) -> ReservationEvent: 

36 """ 

37 Create ReservationEvent from usage event with question data. 

38 

39 Assumes usage_event has been expanded via _parse_event() and 

40 has valid question data in the specified field (run_data or pre_run_data). 

41 

42 Both run_data and pre_run_data fields are JSON-encoded strings that use 

43 the same structure as reservation question_data, so we can parse them and 

44 reuse existing helper functions by creating a wrapper dict. 

45 

46 Parameters 

47 ---------- 

48 usage_event 

49 The usage event dictionary from NEMO API 

50 session 

51 The Session object 

52 nemo_connector 

53 The NemoConnector instance 

54 field 

55 Which field to extract question data from ("run_data" or "pre_run_data") 

56 

57 Returns 

58 ------- 

59 ReservationEvent 

60 The created reservation event 

61 

62 Raises 

63 ------ 

64 ValueError 

65 If the field cannot be parsed as JSON 

66 NoDataConsentError 

67 If data_consent is missing or the user declined consent 

68 """ 

69 # Parse JSON-encoded question data string 

70 try: 

71 question_data_parsed = json.loads(usage_event[field]) 

72 except (json.JSONDecodeError, TypeError) as e: 

73 msg = f"Failed to parse {field} for usage event {usage_event['id']}: {e}" 

74 raise ValueError(msg) from e 

75 

76 # Wrap parsed data as question_data for compatibility with helper functions 

77 wrapped_event = {"question_data": question_data_parsed} 

78 

79 # Validate consent first 

80 consent = _get_res_question_value("data_consent", wrapped_event) 

81 if consent is None: 

82 msg = ( 

83 f"Usage event {usage_event['id']} did not have data_consent defined, " 

84 "so we should not harvest its data" 

85 ) 

86 raise NoDataConsentError(msg) 

87 

88 if consent.lower() in ["disagree", "no", "false", "negative"]: 

89 msg = ( 

90 f"Usage event {usage_event['id']} requested not to have " 

91 "their data harvested" 

92 ) 

93 raise NoDataConsentError(msg) 

94 

95 # Process sample information 

96 ( 

97 sample_details, 

98 sample_pid, 

99 sample_name, 

100 sample_elements, 

101 ) = process_res_question_samples(wrapped_event) 

102 

103 # Use operator as creator (who started the session) 

104 # Fallback to user if operator is None 

105 creator = usage_event.get("operator") or usage_event["user"] 

106 

107 # Create ReservationEvent (using wrapped_event for question data) 

108 return ReservationEvent( 

109 experiment_title=_get_res_question_value("experiment_title", wrapped_event), 

110 instrument=session.instrument, 

111 last_updated=nemo_connector.strptime(usage_event["start"]), # No creation_time 

112 username=usage_event["user"]["username"], 

113 user_full_name=( 

114 f"{usage_event['user']['first_name']} " 

115 f"{usage_event['user']['last_name']} " 

116 f"({usage_event['user']['username']})" 

117 ), 

118 created_by=creator["username"], 

119 created_by_full_name=( 

120 f"{creator['first_name']} {creator['last_name']} ({creator['username']})" 

121 ), 

122 start_time=nemo_connector.strptime(usage_event["start"]), 

123 end_time=nemo_connector.strptime(usage_event["end"]), 

124 reservation_type=None, 

125 experiment_purpose=_get_res_question_value("experiment_purpose", wrapped_event), 

126 sample_details=sample_details, 

127 sample_pid=sample_pid, 

128 sample_name=sample_name, 

129 sample_elements=sample_elements, 

130 project_name=[None], 

131 project_id=[_get_res_question_value("project_id", wrapped_event)], 

132 project_ref=[None], 

133 internal_id=str(usage_event["id"]), # Usage event ID 

134 division=None, 

135 group=None, 

136 url=nemo_connector.config["base_url"].replace( 

137 "api/", 

138 f"event_details/usage/{usage_event['id']}/", # Usage event URL 

139 ), 

140 ) 

141 

142 

143def res_event_from_session( 

144 session: Session, connector: NemoConnector | None = None 

145) -> ReservationEvent: 

146 """ 

147 Create reservation event from session. 

148 

149 Create an internal 

150 :py:class:`~nexusLIMS.harvesters.reservation_event.ReservationEvent` representation 

151 of a session by finding a matching reservation in the NEMO 

152 system and parsing the data contained within into a ``ReservationEvent``. 

153 

154 This method assumes a certain format for the "reservation questions" 

155 associated with each reservation and parses that information into the resulting 

156 ``ReservationEvent``. The most critical of these is the ``data_consent`` field. 

157 If an affirmative response in this field is not found (because the user declined 

158 consent or the reservation questions are missing), a record will not be built. 

159 

160 The following JSON object represents a minimal schema for a set of NEMO "Reservation 

161 Questions" that will satisfy the expectations of this method. Please see the 

162 NEMO documentation on this feature for more details. 

163 

164 ```json 

165 [ 

166 { 

167 "type": "textbox", 

168 "name": "project_id", 

169 "title": "Project ID", 

170 }, 

171 { 

172 "type": "textbox", 

173 "name": "experiment_title", 

174 "title": "Title of Experiment", 

175 }, 

176 { 

177 "type": "textarea", 

178 "name": "experiment_purpose", 

179 "title": "Experiment Purpose", 

180 }, 

181 { 

182 "type": "radio", 

183 "title": "Agree to NexusLIMS curation", 

184 "choices": ["Agree", "Disagree"], 

185 "name": "data_consent", 

186 "default_choice": "Agree" 

187 }, 

188 { 

189 "type": "group", 

190 "title": "Sample information", 

191 "name": "sample_group", 

192 "questions": [ 

193 { 

194 "type": "textbox", 

195 "name": "sample_name", 

196 "title": "Sample Name / PID", 

197 }, 

198 { 

199 "type": "radio", 

200 "title": "Sample or PID?", 

201 "choices": ["Sample Name", "PID"], 

202 "name": "sample_or_pid", 

203 }, 

204 { 

205 "type": "textarea", 

206 "name": "sample_details", 

207 "title": "Sample Details", 

208 } 

209 ] 

210 } 

211 ] 

212 ``` 

213 

214 Parameters 

215 ---------- 

216 session 

217 The session for which to get a reservation event 

218 connector : Optional[NemoConnector], optional 

219 Optional NemoConnector to use instead of looking one up. Useful for testing. 

220 

221 Returns 

222 ------- 

223 res_event : ~nexusLIMS.harvesters.reservation_event.ReservationEvent 

224 The matching reservation event 

225 """ 

226 # a session has instrument, dt_from, dt_to, and user 

227 

228 # we should fetch all reservations +/- two days, and then find the one 

229 # with the maximal overlap with the session time range 

230 # probably don't want to filter by user for now, since sometimes users 

231 # will enable/reserve on behalf of others, etc. 

232 

233 # in order to get reservations, we need a NemoConnector 

234 if connector is None: 

235 nemo_connector = get_connector_for_session(session) 

236 else: 

237 nemo_connector = connector 

238 

239 # NEW: Three-tier fallback - try to get usage event question data first 

240 # This eliminates the need for reservation matching when usage events 

241 # contain all necessary metadata (run_data filled at END of experiment, 

242 # or pre_run_data filled at START of experiment) 

243 usage_event_id = id_from_url(session.session_identifier) 

244 if usage_event_id is not None: 

245 usage_events = nemo_connector.get_usage_events(event_id=usage_event_id) 

246 if usage_events and len(usage_events) > 0: 

247 usage_event = usage_events[0] 

248 

249 # Priority 1: Check run_data (most recent - filled at END) 

250 if has_valid_question_data(usage_event, field="run_data"): 

251 _logger.info( 

252 "Usage event %s has run_data with questions, " 

253 "using it instead of reservation", 

254 usage_event_id, 

255 ) 

256 return create_res_event_from_usage_event( 

257 usage_event, session, nemo_connector, field="run_data" 

258 ) 

259 

260 # Priority 2: Check pre_run_data (backup - filled at START) 

261 if has_valid_question_data(usage_event, field="pre_run_data"): 

262 _logger.info( 

263 "Usage event %s has pre_run_data with questions, " 

264 "using it instead of reservation", 

265 usage_event_id, 

266 ) 

267 return create_res_event_from_usage_event( 

268 usage_event, session, nemo_connector, field="pre_run_data" 

269 ) 

270 

271 # Priority 3: Fall back to reservation matching (existing behavior) 

272 _logger.info( 

273 "Usage event does not have valid question data in run_data or pre_run_data, " 

274 "falling back to reservation matching" 

275 ) 

276 

277 # get reservation with maximum overlap 

278 reservations = nemo_connector.get_reservations( 

279 # tool id can be extracted from instrument api_url query parameter 

280 tool_id=id_from_url(session.instrument.api_url), 

281 dt_from=session.dt_from - timedelta(days=2), 

282 dt_to=session.dt_to + timedelta(days=2), 

283 ) 

284 

285 _logger.info( 

286 "Found %i reservations between %s and %s with ids: %s", 

287 len(reservations), 

288 session.dt_from - timedelta(days=2), 

289 session.dt_to + timedelta(days=2), 

290 [i["id"] for i in reservations], 

291 ) 

292 for i, res in enumerate(reservations): 

293 _logger.debug( 

294 "Reservation %i: %sreservations/?id=%s from %s to %s", 

295 i + 1, 

296 nemo_connector.config["base_url"], 

297 res["id"], 

298 res["start"], 

299 res["end"], 

300 ) 

301 

302 starts = [nemo_connector.strptime(r["start"]) for r in reservations] 

303 ends = [nemo_connector.strptime(r["end"]) for r in reservations] 

304 

305 overlaps = [ 

306 get_timespan_overlap((session.dt_from, session.dt_to), (s, e)) 

307 for s, e in zip(starts, ends) 

308 ] 

309 

310 # handle if there are no matching sessions (i.e. reservations is an empty list 

311 # also need to handle if there is no overlap at all with any reservation 

312 if len(reservations) == 0 or max(overlaps) == timedelta(0): 

313 # there were no reservations that matched this usage event time range, 

314 # or none of the reservations overlapped with the usage event 

315 # so we'll use what limited information we have from the usage event 

316 # session 

317 _logger.warning( 

318 "No reservations found with overlap for this usage " 

319 "event, so raising NoDataConsentError", 

320 ) 

321 msg = ( 

322 "No reservation found matching this session, so assuming NexusLIMS " 

323 "does not have user consent for data harvesting." 

324 ) 

325 raise NoMatchingReservationError(msg) 

326 

327 # select the reservation with the most overlap 

328 res = reservations[overlaps.index(max(overlaps))] 

329 _logger.info( 

330 "Using reservation %sreservations/?id=%s as match for " 

331 "usage event %s with overlap of %s", 

332 nemo_connector.config["base_url"], 

333 res["id"], 

334 session.session_identifier, 

335 max(overlaps), 

336 ) 

337 

338 # DONE: check for presence of sample_group in the reservation metadata 

339 # and change the harvester to process the sample group metadata by 

340 # providing lists to the ReservationEvent constructor 

341 ( 

342 sample_details, 

343 sample_pid, 

344 sample_name, 

345 sample_elements, 

346 ) = process_res_question_samples(res) 

347 

348 # DONE: respect user choice not to harvest data (data_consent) 

349 consent = "disagree" 

350 consent = _get_res_question_value("data_consent", res) 

351 # consent will be None here if it wasn't given (i.e. there was no 

352 # data_consent field in the reservation questions) 

353 if consent is None: 

354 msg = ( 

355 f"Reservation {res['id']} did not have data_consent defined, " 

356 "so we should not harvest its data" 

357 ) 

358 raise NoDataConsentError(msg) 

359 

360 if consent.lower() in ["disagree", "no", "false", "negative"]: 

361 msg = f"Reservation {res['id']} requested not to have their data harvested" 

362 raise NoDataConsentError(msg) 

363 

364 # Create ReservationEvent from NEMO reservation dict 

365 return ReservationEvent( 

366 experiment_title=_get_res_question_value("experiment_title", res), 

367 instrument=session.instrument, 

368 last_updated=nemo_connector.strptime(res["creation_time"]), 

369 username=res["user"]["username"], 

370 user_full_name=( 

371 f"{res['user']['first_name']} " 

372 f"{res['user']['last_name']} " 

373 f"({res['user']['username']})" 

374 ), 

375 created_by=res["creator"]["username"], 

376 created_by_full_name=( 

377 f"{res['creator']['first_name']} " 

378 f"{res['creator']['last_name']} " 

379 f"({res['creator']['username']})" 

380 ), 

381 start_time=nemo_connector.strptime(res["start"]), 

382 end_time=nemo_connector.strptime(res["end"]), 

383 reservation_type=None, # reservation type is not collected in NEMO 

384 experiment_purpose=_get_res_question_value("experiment_purpose", res), 

385 sample_details=sample_details, 

386 sample_pid=sample_pid, 

387 sample_name=sample_name, 

388 sample_elements=sample_elements, 

389 project_name=[None], 

390 project_id=[_get_res_question_value("project_id", res)], 

391 project_ref=[None], 

392 internal_id=str(res["id"]), 

393 division=None, 

394 group=None, 

395 url=nemo_connector.config["base_url"].replace( 

396 "api/", 

397 f"event_details/reservation/{res['id']}/", 

398 ), 

399 )