diff --git a/scripts/backfill_thor_events.py b/scripts/backfill_thor_events.py index 529218d..41e7935 100644 --- a/scripts/backfill_thor_events.py +++ b/scripts/backfill_thor_events.py @@ -35,8 +35,15 @@ and, for each one: 4. Writes the refreshed sidecar with the new ``bw_report``, bumped ``source.tool_version``, but preserved ``review`` block + the original ``captured_at`` timestamp. - 5. For IDFW events with decoded samples, regenerates the .h5 - waveform file via the existing ``event_hdf5`` writer. + 5. Regenerates the .h5 waveform file via the existing + ``event_hdf5`` writer. For IDFW that's the decoded per-sample + stream; for IDFH it's a 1-sample-per-interval synthesised array + (peak ADC count per channel) so the renderer's bar-chart code + has data to group on. Mic peak psi from the binary is merged + onto the IdfEvent before the bridge so the h5 writer's per-count + mic scale factor lands on a sensible value (without this the + mic chart on Thor events plots dB(L)-as-pseudo-psi and shows + bomb-level numbers). Idempotent. Re-running it after a parser/adapter change just re-writes sidecars — no DB writes, no thor-watcher coordination. @@ -231,10 +238,11 @@ def main(argv=None) -> int: new_sidecar["extensions"] = ext if args.dry_run: + will_write_h5 = (idf_samples or idf_intervals) and not args.skip_hdf5 log.info("[DRY] %s/%s — would refresh sidecar (bw_report=%s, h5=%s)", serial, path.name, "wrote" if not has_bw_report else "refreshed", - "would write" if (idf_samples and not args.skip_hdf5) else "skipped") + "would write" if will_write_h5 else "skipped") else: event_file_io.write_sidecar(sidecar_path, new_sidecar) log.info("%s/%s — sidecar refreshed (bw_report=%s, intervals=%d)", @@ -243,10 +251,15 @@ def main(argv=None) -> int: len(idf_intervals) if idf_intervals else 0) refreshed += 1 - # Regenerate .h5 for IDFW events with decoded samples by - # replaying the same IdfEvent → Event bridge save_imported_idf - # uses. IDFH events have no per-sample data; skip. - if idf_samples and not args.skip_hdf5 and not is_histogram: + # Regenerate .h5 by replaying the same IdfEvent → Event bridge + # save_imported_idf uses. For IDFW we write the decoded per- + # sample arrays. For IDFH we synthesise a 1-sample-per-interval + # array (peak ADC count per channel per interval) so the + # renderer's bar-chart code has something to group on. + # Pre-condition: either real samples (IDFW) or decoded intervals + # (IDFH). Skip otherwise. + have_data = bool(idf_samples) or bool(idf_intervals) + if have_data and not args.skip_hdf5: from sfm import event_hdf5 hdf5_path = store.hdf5_path_for(serial, path.name) if args.dry_run: @@ -255,20 +268,44 @@ def main(argv=None) -> int: try: from micromate import IdfEvent from minimateplus.event_file_io import file_sha256 - # Bridge: parsed idf_report dict → IdfEvent → - # minimateplus Event → write_event_hdf5. Mirrors - # save_imported_idf steps 4-7. idf_event = IdfEvent.from_report(report_dict, path.name) + + # Merge the binary-derived mic peak psi (only the + # binary path knows the proper psi value; the .txt + # carries dB(L)). Without this, the h5 writer's + # per-count mic factor is computed against the + # dB(L) value-as-pseudo-psi and the mic chart + # scales wildly. + if (binary_md is not None and res is not None + and res.event.peaks.mic_pspl_psi is not None): + idf_event.peaks.mic_pspl_psi = res.event.peaks.mic_pspl_psi + sha256 = file_sha256(path) waveform_key = bytes.fromhex(sha256)[:16] ev = idf_event.to_minimateplus_event(waveform_key) - ev.raw_samples = idf_samples - n_samp = max( - (len(idf_samples.get(ch, [])) - for ch in ("Tran", "Vert", "Long", "MicL")), - default=0, - ) - ev.total_samples = ev.total_samples or n_samp + + if is_histogram and idf_intervals: + # 1 sample per interval per channel — same + # synthesis save_imported_idf uses. The h5 + # writer's count×geo_fs/32768 conversion turns + # each peak-ADC-count into the bar's physical + # value. + ev.raw_samples = { + "Tran": [iv.peak_count("Tran") for iv in idf_intervals], + "Vert": [iv.peak_count("Vert") for iv in idf_intervals], + "Long": [iv.peak_count("Long") for iv in idf_intervals], + "MicL": [iv.peak_count("MicL") for iv in idf_intervals], + } + ev.total_samples = ev.total_samples or len(idf_intervals) + elif idf_samples: + ev.raw_samples = idf_samples + n_samp = max( + (len(idf_samples.get(ch, [])) + for ch in ("Tran", "Vert", "Long", "MicL")), + default=0, + ) + ev.total_samples = ev.total_samples or n_samp + event_hdf5.write_event_hdf5( hdf5_path, ev, serial=serial, @@ -277,8 +314,10 @@ def main(argv=None) -> int: tool_version=event_file_io.TOOL_VERSION, ) h5_written += 1 - log.debug("%s/%s — .h5 written (%d samples)", - serial, path.name, n_samp) + log.debug("%s/%s — .h5 written (%s)", + serial, path.name, + f"{len(idf_intervals)} intervals" if is_histogram + else f"{sum(len(v) for v in (idf_samples or {}).values())} samples") except Exception as exc: log.warning("%s/%s — .h5 write failed: %s", serial, path.name, exc)