diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py index 04583f7..04789a5 100644 --- a/scripts/backfill_sidecars.py +++ b/scripts/backfill_sidecars.py @@ -103,6 +103,17 @@ def main(argv=None) -> int: "STRT-rectime byte-offset fix in v0.15.x)." ), ) + p.add_argument( + "--reparse-txt", action="store_true", + help=( + "Re-parse the preserved /_ASCII.TXT with the " + "current bw_ascii_report parser and overwrite the sidecar's " + "bw_report block. Use this after upgrading the ASCII parser to " + "pull in new fields (e.g. zc_freq_above_range for BW '>100 Hz' " + "ZC peaks). No-op for events without a preserved .TXT; safely " + "idempotent when the parser hasn't changed." + ), + ) p.add_argument("-v", "--verbose", action="store_true") args = p.parse_args(argv) @@ -153,7 +164,7 @@ def main(argv=None) -> int: # of the sidecar implies staleness of the derived .h5 (both # come out of the same decoder). sidecar_stale = True - if sidecar_path.exists() and not args.force: + if sidecar_path.exists() and not args.force and not args.reparse_txt: try: existing = event_file_io.read_sidecar(sidecar_path) sha_ok = existing.get("blastware", {}).get("sha256") == bw_sha @@ -314,6 +325,24 @@ def main(argv=None) -> int: except Exception: pass + # --reparse-txt: if a .TXT is preserved on disk, run the + # current parser against it and overwrite the bw_report + # block. Picks up post-ingest parser fixes (e.g. the + # 2026-05-28 zc_freq_above_range / ">100 Hz" addition). + if args.reparse_txt and preserved_txt_fn: + try: + from minimateplus import bw_ascii_report + txt_path = store.txt_path_for(serial, path.name) + if txt_path.exists(): + refreshed = bw_ascii_report.parse_report_file(txt_path) + preserved_bw_report = event_file_io._bw_report_to_dict(refreshed) + log.debug("reparsed bw_report from %s", txt_path.name) + else: + log.debug("--reparse-txt: no .TXT at %s (sidecar says %r)", + txt_path, preserved_txt_fn) + except Exception as exc: + log.warning("--reparse-txt failed for %s: %s", path.name, exc) + # Overlay BW ASCII report fields onto the rebuilt Event # BEFORE the sidecar + DB write. Mirrors what the ingest # path does — BW's reported peaks (and sample_rate /