feat(backfill): --reparse-txt flag to refresh bw_report from preserved .TXT
The existing backfill_sidecars.py PRESERVES the bw_report block across regenerations — it's treated as the source of truth from the original ingest pass (the .TXT isn't reachable from the script's normal data path, so it can't be re-derived). That means parser-side fixes (like the 2026-05-28 ">100 Hz" ZC Freq addition) won't reach old events even with --force. The new --reparse-txt flag fixes that: when the sidecar's source.txt_filename points at a preserved <serial>/<filename>_ASCII.TXT, the script re-runs the current parser against it and overwrites the bw_report block. Implies sidecar regeneration on every event (bypasses the sha-up-to-date / version-up-to-date skip), so that the .h5 cascade- regenerates alongside. No-op for events without a preserved .TXT (legacy ingests pre-2026-05-27). Idempotent — re-running it produces the same sidecar bytes when the parser hasn't changed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -103,6 +103,17 @@ def main(argv=None) -> int:
|
|||||||
"STRT-rectime byte-offset fix in v0.15.x)."
|
"STRT-rectime byte-offset fix in v0.15.x)."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
p.add_argument(
|
||||||
|
"--reparse-txt", action="store_true",
|
||||||
|
help=(
|
||||||
|
"Re-parse the preserved <serial>/<filename>_ASCII.TXT with the "
|
||||||
|
"current bw_ascii_report parser and overwrite the sidecar's "
|
||||||
|
"bw_report block. Use this after upgrading the ASCII parser to "
|
||||||
|
"pull in new fields (e.g. zc_freq_above_range for BW '>100 Hz' "
|
||||||
|
"ZC peaks). No-op for events without a preserved .TXT; safely "
|
||||||
|
"idempotent when the parser hasn't changed."
|
||||||
|
),
|
||||||
|
)
|
||||||
p.add_argument("-v", "--verbose", action="store_true")
|
p.add_argument("-v", "--verbose", action="store_true")
|
||||||
args = p.parse_args(argv)
|
args = p.parse_args(argv)
|
||||||
|
|
||||||
@@ -153,7 +164,7 @@ def main(argv=None) -> int:
|
|||||||
# of the sidecar implies staleness of the derived .h5 (both
|
# of the sidecar implies staleness of the derived .h5 (both
|
||||||
# come out of the same decoder).
|
# come out of the same decoder).
|
||||||
sidecar_stale = True
|
sidecar_stale = True
|
||||||
if sidecar_path.exists() and not args.force:
|
if sidecar_path.exists() and not args.force and not args.reparse_txt:
|
||||||
try:
|
try:
|
||||||
existing = event_file_io.read_sidecar(sidecar_path)
|
existing = event_file_io.read_sidecar(sidecar_path)
|
||||||
sha_ok = existing.get("blastware", {}).get("sha256") == bw_sha
|
sha_ok = existing.get("blastware", {}).get("sha256") == bw_sha
|
||||||
@@ -314,6 +325,24 @@ def main(argv=None) -> int:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# --reparse-txt: if a .TXT is preserved on disk, run the
|
||||||
|
# current parser against it and overwrite the bw_report
|
||||||
|
# block. Picks up post-ingest parser fixes (e.g. the
|
||||||
|
# 2026-05-28 zc_freq_above_range / ">100 Hz" addition).
|
||||||
|
if args.reparse_txt and preserved_txt_fn:
|
||||||
|
try:
|
||||||
|
from minimateplus import bw_ascii_report
|
||||||
|
txt_path = store.txt_path_for(serial, path.name)
|
||||||
|
if txt_path.exists():
|
||||||
|
refreshed = bw_ascii_report.parse_report_file(txt_path)
|
||||||
|
preserved_bw_report = event_file_io._bw_report_to_dict(refreshed)
|
||||||
|
log.debug("reparsed bw_report from %s", txt_path.name)
|
||||||
|
else:
|
||||||
|
log.debug("--reparse-txt: no .TXT at %s (sidecar says %r)",
|
||||||
|
txt_path, preserved_txt_fn)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("--reparse-txt failed for %s: %s", path.name, exc)
|
||||||
|
|
||||||
# Overlay BW ASCII report fields onto the rebuilt Event
|
# Overlay BW ASCII report fields onto the rebuilt Event
|
||||||
# BEFORE the sidecar + DB write. Mirrors what the ingest
|
# BEFORE the sidecar + DB write. Mirrors what the ingest
|
||||||
# path does — BW's reported peaks (and sample_rate /
|
# path does — BW's reported peaks (and sample_rate /
|
||||||
|
|||||||
Reference in New Issue
Block a user