diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py index 5618f72..a7980f1 100644 --- a/minimateplus/event_file_io.py +++ b/minimateplus/event_file_io.py @@ -48,7 +48,7 @@ SIDECAR_KIND = "sfm.event" # bumped without a `pip install` re-run — leading to confusing stale # version stamps in sidecars. Bump this constant and CHANGELOG.md # together at release time. -TOOL_VERSION = "0.16.1" +TOOL_VERSION = "0.20.0" try: # Best-effort: prefer the installed metadata when it's NEWER than the diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py index b937e8c..8037d1f 100644 --- a/scripts/backfill_sidecars.py +++ b/scripts/backfill_sidecars.py @@ -12,8 +12,20 @@ Walks `//` and for each BW event file: parsing the BW binary directly (peaks computed from samples). Clean waveform (.h5): - - Skip when .h5 already exists (idempotent). - - Else write from .a5.pkl (preferred) or BW binary parse (fallback). + - Regenerated whenever the sidecar is regenerated (sha mismatch + OR sidecar.source.tool_version < current TOOL_VERSION OR --force). + The .h5 and the sidecar both come from the same decoder output, + so if the sidecar is stale the .h5 is too. + - Written when missing. + - --skip-hdf5 turns off all .h5 writes. + +Typical use after a decoder upgrade: + 1. Pull the new seismo-relay code (which bumped TOOL_VERSION). + 2. Run this script — every sidecar with an older tool_version + stamp regenerates, and the associated .h5 cascade-regenerates. + 3. Operator review state (review.false_trigger, notes, reviewer) + and the sidecar's extensions block are preserved across the + regen. Usage: python scripts/backfill_sidecars.py [--store-root PATH] @@ -123,6 +135,12 @@ def main(argv=None) -> int: # the sidecar was written by a build that includes any # decoder fixes shipped since). # Either part failing → regenerate. --force bypasses both. + # + # Tracks whether we're regenerating the sidecar this iteration + # so the .h5 logic below knows to refresh that too — staleness + # of the sidecar implies staleness of the derived .h5 (both + # come out of the same decoder). + sidecar_stale = True if sidecar_path.exists() and not args.force: try: existing = event_file_io.read_sidecar(sidecar_path) @@ -136,6 +154,7 @@ def main(argv=None) -> int: ver_ok = _vt(src_ver) >= _vt(event_file_io.TOOL_VERSION) if sha_ok and ver_ok: skipped += 1 + sidecar_stale = False continue if sha_ok and not ver_ok: log.info( @@ -281,12 +300,23 @@ def main(argv=None) -> int: extensions=preserved_ext, ) - # Also emit the .h5 clean-waveform file when missing OR when - # --force was passed (so a re-backfill picks up decoder fixes). + # Also emit the .h5 clean-waveform file when: + # - it's missing, OR + # - --force was passed, OR + # - the sidecar is being regenerated this iteration + # (sha mismatch / tool_version too old). The .h5 and + # the sidecar are both derived from the same decoder + # output, so if the sidecar is stale, so is the .h5. + # This is the path that recovers from the broken- + # int16-LE codec era — bumping TOOL_VERSION to 0.20.0+ + # marks every pre-codec sidecar stale, which now + # correctly cascades to .h5 regeneration too. hdf5_path = store.hdf5_path_for(serial, path.name) hdf5_filename = hdf5_path.name if hdf5_path.exists() else None hdf5_action = "kept" - need_h5 = not args.skip_hdf5 and (args.force or not hdf5_path.exists()) + need_h5 = not args.skip_hdf5 and ( + args.force or not hdf5_path.exists() or sidecar_stale + ) if need_h5: if args.dry_run: hdf5_action = "would (re)write"