diff --git a/minimateplus/histogram_codec.py b/minimateplus/histogram_codec.py index c969f45..beed36f 100644 --- a/minimateplus/histogram_codec.py +++ b/minimateplus/histogram_codec.py @@ -101,6 +101,23 @@ _BLOCK_SIZE = 32 # additional validation that we're looking at a real block. _BLOCK_MARKER = 10 +# Maximum plausible peak-count value. Normal-range geophone tops out +# at 10 in/s = 2000 counts at the 0.005 in/s per count scale; even +# Sensitive range (1.25 in/s FS) wouldn't exceed ~250. Mic counts run +# 0..~400 in observed data. 4096 leaves comfortable headroom for any +# legitimate value across all modes. +# +# Some prod blocks have been observed with peak-count fields whose +# HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558 +# and BE18003 units in Histogram-mode events. Reading these as +# uint16 LE produces values like 30981 / 41733 / 62469, which scale +# to physically impossible peaks (150+ in/s). Best guess: an +# undocumented "time-of-peak-within-interval" extension byte the +# device writes in some sub-mode (possibly Histogram+Continuous). +# Until reverse-engineered, blocks exceeding this bound are skipped +# rather than propagating bogus values into PVS computations. +_MAX_PEAK_COUNT = 4096 + # Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one # 0.005 in/s display quantum. Equivalent to the waveform codec's # 16-count-unit output (1 unit = 0.005 in/s = 16 ADC counts). @@ -128,14 +145,24 @@ def _is_data_block(block: bytes) -> bool: return True -def _decode_block(block: bytes) -> dict: +def _decode_block(block: bytes) -> Optional[dict]: """Decode one 32-byte histogram block. Caller must have validated - with ``_is_data_block`` first.""" + with ``_is_data_block`` first. + + Returns ``None`` if any peak field exceeds ``_MAX_PEAK_COUNT`` — + those blocks contain an undocumented extension byte format whose + naive uint16 LE interpretation gives physically impossible peaks. + Skipping the block is safer than propagating bogus values into + PVS computations downstream. + """ # All 16-bit fields are little-endian unsigned. Peak counts are # always non-negative; half-periods are always positive when valid. t_peak, t_halfp, v_peak, v_halfp, l_peak, l_halfp, m_peak, m_halfp = struct.unpack_from( " _MAX_PEAK_COUNT or v_peak > _MAX_PEAK_COUNT + or l_peak > _MAX_PEAK_COUNT or m_peak > _MAX_PEAK_COUNT): + return None segment_id = block[1] block_ctr = block[2] | (block[3] << 8) var_meta = bytes(block[24:28]) @@ -158,8 +185,10 @@ def walk_body(body: bytes) -> List[dict]: """Walk the body and return one dict per histogram interval. Iterates 32-byte strides from offset 0. Yields a decoded record - for every block that passes ``_is_data_block`` validation. Stops - when the remaining bytes are too short to form a complete block. + for every block that passes ``_is_data_block`` validation AND has + plausible peak values (``_decode_block`` returns None for blocks + with out-of-bound peaks). Stops when the remaining bytes are too + short to form a complete block. """ records: List[dict] = [] for off in range(0, len(body) - _BLOCK_SIZE + 1, _BLOCK_SIZE): @@ -169,7 +198,13 @@ def walk_body(body: bytes) -> List[dict]: # Continue walking — block alignment is fixed at 32-stride # from offset 0, so we don't lose alignment by skipping. continue - records.append(_decode_block(blk)) + decoded = _decode_block(blk) + if decoded is None: + # Block validated as a histogram block but had peak fields + # outside the plausible range — undocumented extension. + # Skip rather than propagating bogus PVS contributions. + continue + records.append(decoded) return records