From 25386cab8bfd8b517328ff5530eaf70ccdae0df0 Mon Sep 17 00:00:00 2001 From: serversdown Date: Mon, 1 Jun 2026 20:02:54 +0000 Subject: [PATCH] =?UTF-8?q?fix(backfill):=20regenerate=20IDFH=20.h5=20+=20?= =?UTF-8?q?merge=20binary=20mic=5Fpspl=5Fpsi=20onto=20bridge=20Two=20gaps?= =?UTF-8?q?=20in=20backfill=5Fthor=5Fevents.py=20that=20left=20old=20Thor?= =?UTF-8?q?=20events=20showing=20stale=20charts=20after=20a=20v0.21.1=20ba?= =?UTF-8?q?ckfill=20pass:=201.=20IDFH=20events=20were=20skipped=20from=20.?= =?UTF-8?q?h5=20regeneration=20(the=20"have=20decoded=20=20=20=20samples"?= =?UTF-8?q?=20gate=20was=20IDFW-only).=20=20Histograms=20kept=20their=20pr?= =?UTF-8?q?e-v0.21.1=20=20=20=20.h5=20=E2=80=94=20written=20from=20raw=5Fs?= =?UTF-8?q?amples=20=3D=20None,=20which=20the=20renderer=20turned=20=20=20?= =?UTF-8?q?=20into=20a=20near-empty=20bar=20chart,=20or=20for=20older=20ev?= =?UTF-8?q?ents=20the=20dB(L)-as-pseudo-=20=20=20=20psi=20mic=20scale=20th?= =?UTF-8?q?at=20produced=20"107.7=20psi"=20peaks=20(atomic-bomb=20level=20?= =?UTF-8?q?=20=20=20instead=20of=20footstep=20level).=20=20Fix:=20synthesi?= =?UTF-8?q?se=20the=20same=201-sample-per-=20=20=20=20interval=20array=20s?= =?UTF-8?q?ave=5Fimported=5Fidf=20v0.21.1=20uses=20(peak=20ADC=20count=20p?= =?UTF-8?q?er=20=20=20=20channel=20per=20interval)=20so=20the=20renderer's?= =?UTF-8?q?=20bar-chart=20grouping=20has=20=20=20=20data=20to=20work=20wit?= =?UTF-8?q?h.=202.=20The=20IDFW=20h5=20path=20didn't=20merge=20binary=5Fpe?= =?UTF-8?q?aks.mic=5Fpspl=5Fpsi=20onto=20the=20=20=20=20IdfEvent=20before?= =?UTF-8?q?=20to=5Fminimateplus=5Fevent().=20=20The=20live=20save=5Fimport?= =?UTF-8?q?ed=5Fidf=20=20=20=20does=20this=20merge=20=E2=80=94=20without?= =?UTF-8?q?=20it,=20IdfEvent.from=5Freport()=20only=20sees=20the=20=20=20?= =?UTF-8?q?=20.txt's=20dB(L)=20value,=20the=20bridge=20falls=20back=20to?= =?UTF-8?q?=20the=20dBL=E2=86=92psi=20formula=20=20=20=20(instead=20of=20t?= =?UTF-8?q?he=20binary-accurate=202.14e-6=20psi/count=20value),=20and=20th?= =?UTF-8?q?e=20=20=20=20h5=20writer's=20per-count=20mic=20factor=20lands?= =?UTF-8?q?=20on=20a=20less-correct=20value.=20=20=20=20Fix:=20same=20merg?= =?UTF-8?q?e=20the=20live=20ingest=20does=20(lift=20res.event.peaks.mic=5F?= =?UTF-8?q?pspl=5Fpsi=20=20=20=20onto=20idf=5Fevent.peaks=20before=20the?= =?UTF-8?q?=20bridge=20call).=20Verified=20against=20UM6047=5F202508041900?= =?UTF-8?q?47.IDFH=20(250-interval=20prod=20histogram):=20250=20intervals?= =?UTF-8?q?=20decode,=20mic=5Fpspl=5Fpsi=20=3D=202.78e-5=20(was=20being=20?= =?UTF-8?q?treated=20as=20dB(L)=3D107.7=20in=20the=20old=20h5).=20Operator?= =?UTF-8?q?:=20re-run=20after=20deploy.=20=20`docker=20compose=20exec=20sf?= =?UTF-8?q?m=20python=20scripts/backfill=5Fthor=5Fevents.py`=20is=20idempo?= =?UTF-8?q?tent=20=E2=80=94=20the=20existing=20version=20check=20still=20s?= =?UTF-8?q?kips=20events=20already=20at=20the=20new=20TOOL=5FVERSION,=20an?= =?UTF-8?q?d=20review=20state=20+=20captured=5Fat=20are=20preserved=20on?= =?UTF-8?q?=20the=20second=20pass.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/backfill_thor_events.py | 77 +++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/scripts/backfill_thor_events.py b/scripts/backfill_thor_events.py index 529218d..41e7935 100644 --- a/scripts/backfill_thor_events.py +++ b/scripts/backfill_thor_events.py @@ -35,8 +35,15 @@ and, for each one: 4. Writes the refreshed sidecar with the new ``bw_report``, bumped ``source.tool_version``, but preserved ``review`` block + the original ``captured_at`` timestamp. - 5. For IDFW events with decoded samples, regenerates the .h5 - waveform file via the existing ``event_hdf5`` writer. + 5. Regenerates the .h5 waveform file via the existing + ``event_hdf5`` writer. For IDFW that's the decoded per-sample + stream; for IDFH it's a 1-sample-per-interval synthesised array + (peak ADC count per channel) so the renderer's bar-chart code + has data to group on. Mic peak psi from the binary is merged + onto the IdfEvent before the bridge so the h5 writer's per-count + mic scale factor lands on a sensible value (without this the + mic chart on Thor events plots dB(L)-as-pseudo-psi and shows + bomb-level numbers). Idempotent. Re-running it after a parser/adapter change just re-writes sidecars — no DB writes, no thor-watcher coordination. @@ -231,10 +238,11 @@ def main(argv=None) -> int: new_sidecar["extensions"] = ext if args.dry_run: + will_write_h5 = (idf_samples or idf_intervals) and not args.skip_hdf5 log.info("[DRY] %s/%s — would refresh sidecar (bw_report=%s, h5=%s)", serial, path.name, "wrote" if not has_bw_report else "refreshed", - "would write" if (idf_samples and not args.skip_hdf5) else "skipped") + "would write" if will_write_h5 else "skipped") else: event_file_io.write_sidecar(sidecar_path, new_sidecar) log.info("%s/%s — sidecar refreshed (bw_report=%s, intervals=%d)", @@ -243,10 +251,15 @@ def main(argv=None) -> int: len(idf_intervals) if idf_intervals else 0) refreshed += 1 - # Regenerate .h5 for IDFW events with decoded samples by - # replaying the same IdfEvent → Event bridge save_imported_idf - # uses. IDFH events have no per-sample data; skip. - if idf_samples and not args.skip_hdf5 and not is_histogram: + # Regenerate .h5 by replaying the same IdfEvent → Event bridge + # save_imported_idf uses. For IDFW we write the decoded per- + # sample arrays. For IDFH we synthesise a 1-sample-per-interval + # array (peak ADC count per channel per interval) so the + # renderer's bar-chart code has something to group on. + # Pre-condition: either real samples (IDFW) or decoded intervals + # (IDFH). Skip otherwise. + have_data = bool(idf_samples) or bool(idf_intervals) + if have_data and not args.skip_hdf5: from sfm import event_hdf5 hdf5_path = store.hdf5_path_for(serial, path.name) if args.dry_run: @@ -255,20 +268,44 @@ def main(argv=None) -> int: try: from micromate import IdfEvent from minimateplus.event_file_io import file_sha256 - # Bridge: parsed idf_report dict → IdfEvent → - # minimateplus Event → write_event_hdf5. Mirrors - # save_imported_idf steps 4-7. idf_event = IdfEvent.from_report(report_dict, path.name) + + # Merge the binary-derived mic peak psi (only the + # binary path knows the proper psi value; the .txt + # carries dB(L)). Without this, the h5 writer's + # per-count mic factor is computed against the + # dB(L) value-as-pseudo-psi and the mic chart + # scales wildly. + if (binary_md is not None and res is not None + and res.event.peaks.mic_pspl_psi is not None): + idf_event.peaks.mic_pspl_psi = res.event.peaks.mic_pspl_psi + sha256 = file_sha256(path) waveform_key = bytes.fromhex(sha256)[:16] ev = idf_event.to_minimateplus_event(waveform_key) - ev.raw_samples = idf_samples - n_samp = max( - (len(idf_samples.get(ch, [])) - for ch in ("Tran", "Vert", "Long", "MicL")), - default=0, - ) - ev.total_samples = ev.total_samples or n_samp + + if is_histogram and idf_intervals: + # 1 sample per interval per channel — same + # synthesis save_imported_idf uses. The h5 + # writer's count×geo_fs/32768 conversion turns + # each peak-ADC-count into the bar's physical + # value. + ev.raw_samples = { + "Tran": [iv.peak_count("Tran") for iv in idf_intervals], + "Vert": [iv.peak_count("Vert") for iv in idf_intervals], + "Long": [iv.peak_count("Long") for iv in idf_intervals], + "MicL": [iv.peak_count("MicL") for iv in idf_intervals], + } + ev.total_samples = ev.total_samples or len(idf_intervals) + elif idf_samples: + ev.raw_samples = idf_samples + n_samp = max( + (len(idf_samples.get(ch, [])) + for ch in ("Tran", "Vert", "Long", "MicL")), + default=0, + ) + ev.total_samples = ev.total_samples or n_samp + event_hdf5.write_event_hdf5( hdf5_path, ev, serial=serial, @@ -277,8 +314,10 @@ def main(argv=None) -> int: tool_version=event_file_io.TOOL_VERSION, ) h5_written += 1 - log.debug("%s/%s — .h5 written (%d samples)", - serial, path.name, n_samp) + log.debug("%s/%s — .h5 written (%s)", + serial, path.name, + f"{len(idf_intervals)} intervals" if is_histogram + else f"{sum(len(v) for v in (idf_samples or {}).values())} samples") except Exception as exc: log.warning("%s/%s — .h5 write failed: %s", serial, path.name, exc)