From e949232875053781e97503d11f39799cce25dfff Mon Sep 17 00:00:00 2001 From: serversdown Date: Thu, 21 May 2026 02:50:10 +0000 Subject: [PATCH] histogram_codec + backfill: tighter peak ceiling, preserve bw_report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit histogram_codec: drop _MAX_PEAK_COUNT 4096 → 2200. The old ceiling let extension-byte blocks slip through at up to 20.48 in/s per channel, producing 35× inflated PVS sums when first deployed to prod. 2200 covers Normal-range full-scale (10 in/s = 2000 counts) plus 10% headroom for quantization edge cases. backfill_sidecars: also preserve the bw_report block alongside review + extensions when regenerating sidecars. event_to_sidecar_dict takes a BwAsciiReport dataclass not a dict, so for bw_report we overlay the existing block after regen rather than passing as a kwarg. Co-Authored-By: Claude Opus 4.7 (1M context) --- minimateplus/histogram_codec.py | 19 +++++++++++++------ scripts/backfill_sidecars.py | 25 ++++++++++++++++++------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/minimateplus/histogram_codec.py b/minimateplus/histogram_codec.py index beed36f..adc0714 100644 --- a/minimateplus/histogram_codec.py +++ b/minimateplus/histogram_codec.py @@ -101,11 +101,13 @@ _BLOCK_SIZE = 32 # additional validation that we're looking at a real block. _BLOCK_MARKER = 10 -# Maximum plausible peak-count value. Normal-range geophone tops out -# at 10 in/s = 2000 counts at the 0.005 in/s per count scale; even -# Sensitive range (1.25 in/s FS) wouldn't exceed ~250. Mic counts run -# 0..~400 in observed data. 4096 leaves comfortable headroom for any -# legitimate value across all modes. +# Maximum plausible peak-count value. The geophone tops out at 10 in/s +# at Normal range = 2000 counts at the 0.005 in/s per count scale. +# Sensitive range (1.25 in/s FS) tops at ~250. Mic peak counts have +# been observed up to ~400 (≈ 100 dB(L)) and per the protocol doc can +# reach ~813 (140 dB(L)). 2200 covers Normal full-scale plus ~10% +# headroom for quantization edge cases while keeping every physically +# implausible value out of the PVS computation. # # Some prod blocks have been observed with peak-count fields whose # HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558 @@ -116,7 +118,12 @@ _BLOCK_MARKER = 10 # device writes in some sub-mode (possibly Histogram+Continuous). # Until reverse-engineered, blocks exceeding this bound are skipped # rather than propagating bogus values into PVS computations. -_MAX_PEAK_COUNT = 4096 +# +# Earlier we tried 4096 — that allowed peak counts up to 4096 × 0.005 +# = 20.48 in/s per channel, which produced 35× inflated PVS sums when +# the extension-byte blocks slipped through. See feat/wire-histogram-codec +# branch history for the rollback. +_MAX_PEAK_COUNT = 2200 # Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one # 0.005 in/s display quantum. Equivalent to the waveform codec's diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py index 2b9533e..bbe0d0f 100644 --- a/scripts/backfill_sidecars.py +++ b/scripts/backfill_sidecars.py @@ -287,16 +287,25 @@ def main(argv=None) -> int: or ev.total_samples < derived // 4): ev.total_samples = derived - # Preserve user-edited review state + extensions from the - # existing sidecar (false_trigger flag, notes, etc.) so a - # backfill never wipes them out. - preserved_review = None - preserved_ext = None + # Preserve user-edited review state + extensions + the + # bw_report block from the existing sidecar so a backfill + # never wipes them out. The bw_report block originates + # from the paired .TXT ASCII report parsed at ORIGINAL + # import time (ach forward / direct upload); the .TXT + # file is not in the waveform store, so we can't re-derive + # it from disk. event_to_sidecar_dict takes a + # BwAsciiReport dataclass (not a dict), so for bw_report + # we overlay the existing block after regen instead of + # passing it as a kwarg. + preserved_review = None + preserved_ext = None + preserved_bw_report = None if sidecar_path.exists(): try: _existing = event_file_io.read_sidecar(sidecar_path) - preserved_review = _existing.get("review") - preserved_ext = _existing.get("extensions") + preserved_review = _existing.get("review") + preserved_ext = _existing.get("extensions") + preserved_bw_report = _existing.get("bw_report") except Exception: pass @@ -311,6 +320,8 @@ def main(argv=None) -> int: review=preserved_review, extensions=preserved_ext, ) + if preserved_bw_report is not None: + sidecar["bw_report"] = preserved_bw_report # Also emit the .h5 clean-waveform file when: # - it's missing, OR