fix(backfill): regenerate IDFH .h5 + merge binary mic_pspl_psi onto bridge
Two gaps in backfill_thor_events.py that left old Thor events showing stale charts after a v0.21.1 backfill pass: 1. IDFH events were skipped from .h5 regeneration (the "have decoded samples" gate was IDFW-only). Histograms kept their pre-v0.21.1 .h5 — written from raw_samples = None, which the renderer turned into a near-empty bar chart, or for older events the dB(L)-as-pseudo- psi mic scale that produced "107.7 psi" peaks (atomic-bomb level instead of footstep level). Fix: synthesise the same 1-sample-per- interval array save_imported_idf v0.21.1 uses (peak ADC count per channel per interval) so the renderer's bar-chart grouping has data to work with. 2. The IDFW h5 path didn't merge binary_peaks.mic_pspl_psi onto the IdfEvent before to_minimateplus_event(). The live save_imported_idf does this merge — without it, IdfEvent.from_report() only sees the .txt's dB(L) value, the bridge falls back to the dBL→psi formula (instead of the binary-accurate 2.14e-6 psi/count value), and the h5 writer's per-count mic factor lands on a less-correct value. Fix: same merge the live ingest does (lift res.event.peaks.mic_pspl_psi onto idf_event.peaks before the bridge call). Verified against UM6047_20250804190047.IDFH (250-interval prod histogram): 250 intervals decode, mic_pspl_psi = 2.78e-5 (was being treated as dB(L)=107.7 in the old h5). Operator: re-run after deploy. `docker compose exec sfm python scripts/backfill_thor_events.py` is idempotent — the existing version check still skips events already at the new TOOL_VERSION, and review state + captured_at are preserved on the second pass.
This commit is contained in:
@@ -35,8 +35,15 @@ and, for each one:
|
||||
4. Writes the refreshed sidecar with the new ``bw_report``,
|
||||
bumped ``source.tool_version``, but preserved ``review`` block
|
||||
+ the original ``captured_at`` timestamp.
|
||||
5. For IDFW events with decoded samples, regenerates the .h5
|
||||
waveform file via the existing ``event_hdf5`` writer.
|
||||
5. Regenerates the .h5 waveform file via the existing
|
||||
``event_hdf5`` writer. For IDFW that's the decoded per-sample
|
||||
stream; for IDFH it's a 1-sample-per-interval synthesised array
|
||||
(peak ADC count per channel) so the renderer's bar-chart code
|
||||
has data to group on. Mic peak psi from the binary is merged
|
||||
onto the IdfEvent before the bridge so the h5 writer's per-count
|
||||
mic scale factor lands on a sensible value (without this the
|
||||
mic chart on Thor events plots dB(L)-as-pseudo-psi and shows
|
||||
bomb-level numbers).
|
||||
|
||||
Idempotent. Re-running it after a parser/adapter change just
|
||||
re-writes sidecars — no DB writes, no thor-watcher coordination.
|
||||
@@ -231,10 +238,11 @@ def main(argv=None) -> int:
|
||||
new_sidecar["extensions"] = ext
|
||||
|
||||
if args.dry_run:
|
||||
will_write_h5 = (idf_samples or idf_intervals) and not args.skip_hdf5
|
||||
log.info("[DRY] %s/%s — would refresh sidecar (bw_report=%s, h5=%s)",
|
||||
serial, path.name,
|
||||
"wrote" if not has_bw_report else "refreshed",
|
||||
"would write" if (idf_samples and not args.skip_hdf5) else "skipped")
|
||||
"would write" if will_write_h5 else "skipped")
|
||||
else:
|
||||
event_file_io.write_sidecar(sidecar_path, new_sidecar)
|
||||
log.info("%s/%s — sidecar refreshed (bw_report=%s, intervals=%d)",
|
||||
@@ -243,10 +251,15 @@ def main(argv=None) -> int:
|
||||
len(idf_intervals) if idf_intervals else 0)
|
||||
refreshed += 1
|
||||
|
||||
# Regenerate .h5 for IDFW events with decoded samples by
|
||||
# replaying the same IdfEvent → Event bridge save_imported_idf
|
||||
# uses. IDFH events have no per-sample data; skip.
|
||||
if idf_samples and not args.skip_hdf5 and not is_histogram:
|
||||
# Regenerate .h5 by replaying the same IdfEvent → Event bridge
|
||||
# save_imported_idf uses. For IDFW we write the decoded per-
|
||||
# sample arrays. For IDFH we synthesise a 1-sample-per-interval
|
||||
# array (peak ADC count per channel per interval) so the
|
||||
# renderer's bar-chart code has something to group on.
|
||||
# Pre-condition: either real samples (IDFW) or decoded intervals
|
||||
# (IDFH). Skip otherwise.
|
||||
have_data = bool(idf_samples) or bool(idf_intervals)
|
||||
if have_data and not args.skip_hdf5:
|
||||
from sfm import event_hdf5
|
||||
hdf5_path = store.hdf5_path_for(serial, path.name)
|
||||
if args.dry_run:
|
||||
@@ -255,20 +268,44 @@ def main(argv=None) -> int:
|
||||
try:
|
||||
from micromate import IdfEvent
|
||||
from minimateplus.event_file_io import file_sha256
|
||||
# Bridge: parsed idf_report dict → IdfEvent →
|
||||
# minimateplus Event → write_event_hdf5. Mirrors
|
||||
# save_imported_idf steps 4-7.
|
||||
idf_event = IdfEvent.from_report(report_dict, path.name)
|
||||
|
||||
# Merge the binary-derived mic peak psi (only the
|
||||
# binary path knows the proper psi value; the .txt
|
||||
# carries dB(L)). Without this, the h5 writer's
|
||||
# per-count mic factor is computed against the
|
||||
# dB(L) value-as-pseudo-psi and the mic chart
|
||||
# scales wildly.
|
||||
if (binary_md is not None and res is not None
|
||||
and res.event.peaks.mic_pspl_psi is not None):
|
||||
idf_event.peaks.mic_pspl_psi = res.event.peaks.mic_pspl_psi
|
||||
|
||||
sha256 = file_sha256(path)
|
||||
waveform_key = bytes.fromhex(sha256)[:16]
|
||||
ev = idf_event.to_minimateplus_event(waveform_key)
|
||||
ev.raw_samples = idf_samples
|
||||
n_samp = max(
|
||||
(len(idf_samples.get(ch, []))
|
||||
for ch in ("Tran", "Vert", "Long", "MicL")),
|
||||
default=0,
|
||||
)
|
||||
ev.total_samples = ev.total_samples or n_samp
|
||||
|
||||
if is_histogram and idf_intervals:
|
||||
# 1 sample per interval per channel — same
|
||||
# synthesis save_imported_idf uses. The h5
|
||||
# writer's count×geo_fs/32768 conversion turns
|
||||
# each peak-ADC-count into the bar's physical
|
||||
# value.
|
||||
ev.raw_samples = {
|
||||
"Tran": [iv.peak_count("Tran") for iv in idf_intervals],
|
||||
"Vert": [iv.peak_count("Vert") for iv in idf_intervals],
|
||||
"Long": [iv.peak_count("Long") for iv in idf_intervals],
|
||||
"MicL": [iv.peak_count("MicL") for iv in idf_intervals],
|
||||
}
|
||||
ev.total_samples = ev.total_samples or len(idf_intervals)
|
||||
elif idf_samples:
|
||||
ev.raw_samples = idf_samples
|
||||
n_samp = max(
|
||||
(len(idf_samples.get(ch, []))
|
||||
for ch in ("Tran", "Vert", "Long", "MicL")),
|
||||
default=0,
|
||||
)
|
||||
ev.total_samples = ev.total_samples or n_samp
|
||||
|
||||
event_hdf5.write_event_hdf5(
|
||||
hdf5_path, ev,
|
||||
serial=serial,
|
||||
@@ -277,8 +314,10 @@ def main(argv=None) -> int:
|
||||
tool_version=event_file_io.TOOL_VERSION,
|
||||
)
|
||||
h5_written += 1
|
||||
log.debug("%s/%s — .h5 written (%d samples)",
|
||||
serial, path.name, n_samp)
|
||||
log.debug("%s/%s — .h5 written (%s)",
|
||||
serial, path.name,
|
||||
f"{len(idf_intervals)} intervals" if is_histogram
|
||||
else f"{sum(len(v) for v in (idf_samples or {}).values())} samples")
|
||||
except Exception as exc:
|
||||
log.warning("%s/%s — .h5 write failed: %s",
|
||||
serial, path.name, exc)
|
||||
|
||||
Reference in New Issue
Block a user