Histogram body codec — full RE + peak-count fix that resolves the prod inflation incident #26

Merged
serversdown merged 5 commits from feat/wire-histogram-codec into dev 2026-05-22 13:08:04 -04:00
2 changed files with 31 additions and 13 deletions
Showing only changes of commit e949232875 - Show all commits
+13 -6
View File
@@ -101,11 +101,13 @@ _BLOCK_SIZE = 32
# additional validation that we're looking at a real block. # additional validation that we're looking at a real block.
_BLOCK_MARKER = 10 _BLOCK_MARKER = 10
# Maximum plausible peak-count value. Normal-range geophone tops out # Maximum plausible peak-count value. The geophone tops out at 10 in/s
# at 10 in/s = 2000 counts at the 0.005 in/s per count scale; even # at Normal range = 2000 counts at the 0.005 in/s per count scale.
# Sensitive range (1.25 in/s FS) wouldn't exceed ~250. Mic counts run # Sensitive range (1.25 in/s FS) tops at ~250. Mic peak counts have
# 0..~400 in observed data. 4096 leaves comfortable headroom for any # been observed up to ~400 (≈ 100 dB(L)) and per the protocol doc can
# legitimate value across all modes. # reach ~813 (140 dB(L)). 2200 covers Normal full-scale plus ~10%
# headroom for quantization edge cases while keeping every physically
# implausible value out of the PVS computation.
# #
# Some prod blocks have been observed with peak-count fields whose # Some prod blocks have been observed with peak-count fields whose
# HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558 # HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558
@@ -116,7 +118,12 @@ _BLOCK_MARKER = 10
# device writes in some sub-mode (possibly Histogram+Continuous). # device writes in some sub-mode (possibly Histogram+Continuous).
# Until reverse-engineered, blocks exceeding this bound are skipped # Until reverse-engineered, blocks exceeding this bound are skipped
# rather than propagating bogus values into PVS computations. # rather than propagating bogus values into PVS computations.
_MAX_PEAK_COUNT = 4096 #
# Earlier we tried 4096 — that allowed peak counts up to 4096 × 0.005
# = 20.48 in/s per channel, which produced 35× inflated PVS sums when
# the extension-byte blocks slipped through. See feat/wire-histogram-codec
# branch history for the rollback.
_MAX_PEAK_COUNT = 2200
# Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one # Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one
# 0.005 in/s display quantum. Equivalent to the waveform codec's # 0.005 in/s display quantum. Equivalent to the waveform codec's
+18 -7
View File
@@ -287,16 +287,25 @@ def main(argv=None) -> int:
or ev.total_samples < derived // 4): or ev.total_samples < derived // 4):
ev.total_samples = derived ev.total_samples = derived
# Preserve user-edited review state + extensions from the # Preserve user-edited review state + extensions + the
# existing sidecar (false_trigger flag, notes, etc.) so a # bw_report block from the existing sidecar so a backfill
# backfill never wipes them out. # never wipes them out. The bw_report block originates
preserved_review = None # from the paired .TXT ASCII report parsed at ORIGINAL
preserved_ext = None # import time (ach forward / direct upload); the .TXT
# file is not in the waveform store, so we can't re-derive
# it from disk. event_to_sidecar_dict takes a
# BwAsciiReport dataclass (not a dict), so for bw_report
# we overlay the existing block after regen instead of
# passing it as a kwarg.
preserved_review = None
preserved_ext = None
preserved_bw_report = None
if sidecar_path.exists(): if sidecar_path.exists():
try: try:
_existing = event_file_io.read_sidecar(sidecar_path) _existing = event_file_io.read_sidecar(sidecar_path)
preserved_review = _existing.get("review") preserved_review = _existing.get("review")
preserved_ext = _existing.get("extensions") preserved_ext = _existing.get("extensions")
preserved_bw_report = _existing.get("bw_report")
except Exception: except Exception:
pass pass
@@ -311,6 +320,8 @@ def main(argv=None) -> int:
review=preserved_review, review=preserved_review,
extensions=preserved_ext, extensions=preserved_ext,
) )
if preserved_bw_report is not None:
sidecar["bw_report"] = preserved_bw_report
# Also emit the .h5 clean-waveform file when: # Also emit the .h5 clean-waveform file when:
# - it's missing, OR # - it's missing, OR