diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py index 8037d1f..36d8747 100644 --- a/scripts/backfill_sidecars.py +++ b/scripts/backfill_sidecars.py @@ -311,12 +311,32 @@ def main(argv=None) -> int: # int16-LE codec era — bumping TOOL_VERSION to 0.20.0+ # marks every pre-codec sidecar stale, which now # correctly cascades to .h5 regeneration too. + # + # Skip the .h5 write when the decoder couldn't produce + # samples — this is the histogram-mode case today + # (waveform_codec.decode_waveform_v2 only handles the + # waveform-mode body format per §7.6.1; the histogram + # codec at §7.6.2 is documented but not yet implemented). + # Without this check we'd replace the existing (broken + # int16-LE) histogram .h5 with an empty one, which is + # arguably worse for any consumer expecting non-empty + # sample arrays. When the histogram codec lands, this + # check can come out. + has_samples = bool( + ev.raw_samples and any( + ev.raw_samples.get(ch) for ch in ("Tran", "Vert", "Long", "MicL") + ) + ) hdf5_path = store.hdf5_path_for(serial, path.name) hdf5_filename = hdf5_path.name if hdf5_path.exists() else None hdf5_action = "kept" - need_h5 = not args.skip_hdf5 and ( - args.force or not hdf5_path.exists() or sidecar_stale + need_h5 = ( + not args.skip_hdf5 + and (args.force or not hdf5_path.exists() or sidecar_stale) + and has_samples ) + if not has_samples and not args.skip_hdf5: + hdf5_action = "skipped-empty-samples" if need_h5: if args.dry_run: hdf5_action = "would (re)write"