v0.20.0 -- Full s3 event parse and PDF creation. #28
+1
-1
@@ -1,6 +1,6 @@
|
||||
/bridges/captures/
|
||||
/example-events/
|
||||
|
||||
/tests/fixtures/
|
||||
/manuals/
|
||||
|
||||
# Python build artifacts
|
||||
|
||||
@@ -12,7 +12,21 @@ implementation lives in `minimateplus/histogram_codec.py`.
|
||||
in-repo histogram fixture corpus decodes byte-exact against BW's
|
||||
ASCII export.
|
||||
|
||||
24 regression tests pass against ~3,500 blocks across 5 fixtures.
|
||||
26 regression tests pass against ~3,500 blocks across 5 in-repo
|
||||
fixtures, plus a synthetic regression block taken from a real
|
||||
BE9558 prod event to lock in the uint8-peak interpretation.
|
||||
|
||||
**Important correction (2026-05-21):** the per-channel peak count
|
||||
is `uint8` at byte[6]/[10]/[14]/[18], NOT `uint16 LE` at byte[6:8]
|
||||
etc. The N844 fixture corpus the original RE was done against has
|
||||
zero values in bytes [7]/[11]/[15]/[19] for every block, so the
|
||||
two interpretations happened to be equivalent. Cross-correlating
|
||||
non-N844 events (BE9558 Tran-drift, BE18003 Histogram+Continuous)
|
||||
against BW's per-interval ASCII export — 4 channels × ~1400 blocks
|
||||
per event × multiple events = 100% byte-exact only when the peak
|
||||
is read as uint8. Reading as uint16 LE produced peaks up to 268
|
||||
in/s per channel and 35× inflated PVS sums when first deployed to
|
||||
prod (rolled back, root-caused, and fixed in commit 7183b95+1).
|
||||
|
||||
## Body format
|
||||
|
||||
@@ -27,15 +41,21 @@ Each block represents one histogram interval. Block layout:
|
||||
[1] segment_id (uint8) 0x00..0x03 — 256 blocks per segment
|
||||
[2:4] block_ctr (uint16 LE) resets each segment (0x0100, 0x0101, …)
|
||||
[4:6] 0x000a (uint16 LE) constant marker (= 10)
|
||||
[6:8] T_peak_count uint16 LE Tran peak (count × 0.005 → in/s at Normal)
|
||||
[6] T_peak_count uint8 Tran peak (count × 0.005 → in/s at Normal,
|
||||
max 1.275 in/s — fits in uint8)
|
||||
[7] T_annotation uint8 empirically non-zero on intervals with sub-Hz
|
||||
or unmeasurable freq; meaning not fully RE'd
|
||||
[8:10] T_halfperiod uint16 LE Tran half-period in samples
|
||||
(freq_Hz = 512 / halfp; ≤ 5 means ">100 Hz")
|
||||
[10:12] V_peak_count uint16 LE Vert peak
|
||||
[10] V_peak_count uint8 Vert peak
|
||||
[11] V_annotation uint8
|
||||
[12:14] V_halfperiod uint16 LE Vert freq half-period
|
||||
[14:16] L_peak_count uint16 LE Long peak
|
||||
[14] L_peak_count uint8 Long peak
|
||||
[15] L_annotation uint8
|
||||
[16:18] L_halfperiod uint16 LE Long freq half-period
|
||||
[18:20] M_peak_count uint16 LE MicL peak count
|
||||
[18] M_peak_count uint8 MicL peak count
|
||||
(dB via waveform_codec.mic_count_to_db)
|
||||
[19] M_annotation uint8
|
||||
[20:22] M_halfperiod uint16 LE MicL freq half-period
|
||||
[22:24] 0x00 0x00 constant
|
||||
[24:28] 4-byte variable purpose unknown — possibly CRC,
|
||||
@@ -99,6 +119,16 @@ slot[8] = 9 → 512/9 = 56.9 → 57 Hz ✓ M_freq
|
||||
|
||||
## What's NOT yet decoded
|
||||
|
||||
- **Annotation bytes (`block[7]/[11]/[15]/[19]`)**. Empirically
|
||||
non-zero on intervals where the per-channel ZC frequency comes
|
||||
out as `N/A` or sub-Hz (`<1.0`, `1.X`). Hypothesis tested in the
|
||||
RE session: byte != 0 ↔ sub-Hz freq. Only ~50% correlation
|
||||
across the K558 corpus, so the relationship is more complex.
|
||||
Possibilities: time-of-peak-within-interval, halfp extension for
|
||||
very-long-period signals, or a debug/diagnostic field the firmware
|
||||
writes opportunistically. Doesn't affect peak amplitudes or
|
||||
waveform reconstruction. Captured as `record["annotations"]` for
|
||||
future RE.
|
||||
- **4-byte variable metadata field (bytes 24:28)**. Not needed for
|
||||
waveform reconstruction. Speculation: per-block CRC, sub-second
|
||||
timestamp offset, or a Mic psi(L) count not in the 9 samples.
|
||||
|
||||
@@ -28,18 +28,32 @@ iterate 32-stride and stop before the tail.
|
||||
[1] segment_id (uint8) 0x00..0x03 — 256 blocks per segment
|
||||
[2:4] block_ctr (uint16 LE) resets each segment (0x0100, 0x0101, …)
|
||||
[4:6] 0x000a (uint16 LE) constant marker (= 10)
|
||||
[6:8] T_peak_count uint16 LE Tran peak (count × 0.005 → in/s)
|
||||
[6] T_peak_count uint8 Tran peak (count × 0.005 → in/s, max 1.275 in/s)
|
||||
[7] T_annotation uint8 empirically non-zero on intervals with sub-Hz
|
||||
or unmeasurable Tran freq; meaning not fully RE'd
|
||||
[8:10] T_halfperiod uint16 LE Tran half-period in samples (freq = 512 / halfp Hz)
|
||||
[10:12] V_peak_count uint16 LE
|
||||
[10] V_peak_count uint8
|
||||
[11] V_annotation uint8
|
||||
[12:14] V_halfperiod uint16 LE
|
||||
[14:16] L_peak_count uint16 LE
|
||||
[14] L_peak_count uint8
|
||||
[15] L_annotation uint8
|
||||
[16:18] L_halfperiod uint16 LE
|
||||
[18:20] M_peak_count uint16 LE MicL peak (count → dB via mic_count_to_db)
|
||||
[18] M_peak_count uint8 MicL peak (count → dB via mic_count_to_db)
|
||||
[19] M_annotation uint8
|
||||
[20:22] M_halfperiod uint16 LE MicL half-period in samples (freq = 512 / halfp Hz)
|
||||
[22:24] 0x00 0x00 constant
|
||||
[24:28] 4-byte variable purpose unknown (possibly CRC or timestamp delta)
|
||||
[28:32] 0x1e 0x0a 0x00 0x00 constant block-end signature
|
||||
|
||||
NOTE on peak-count width: an earlier interpretation treated the peak
|
||||
fields as uint16 LE spanning [6:8] / [10:12] / [14:16] / [18:20].
|
||||
That happened to be byte-exact against the N844 fixture corpus only
|
||||
because every annotation byte in those fixtures was zero, making
|
||||
``uint16 LE == uint8``. Cross-correlating BE9558 (K558) Tran-drift
|
||||
and BE18003 (T003) Histogram+Continuous events against the BW ASCII
|
||||
export proved peak is uint8 alone — see test_histogram_codec.py
|
||||
and docs/histogram_codec_re_status.md.
|
||||
|
||||
Block-identification anchor: ``block[22:24] == b"\\x00\\x00"`` AND
|
||||
``block[28:32] == b"\\x1e\\x0a\\x00\\x00"``. This is the reliable
|
||||
distinguisher from non-block content in the file.
|
||||
@@ -101,30 +115,6 @@ _BLOCK_SIZE = 32
|
||||
# additional validation that we're looking at a real block.
|
||||
_BLOCK_MARKER = 10
|
||||
|
||||
# Maximum plausible peak-count value. The geophone tops out at 10 in/s
|
||||
# at Normal range = 2000 counts at the 0.005 in/s per count scale.
|
||||
# Sensitive range (1.25 in/s FS) tops at ~250. Mic peak counts have
|
||||
# been observed up to ~400 (≈ 100 dB(L)) and per the protocol doc can
|
||||
# reach ~813 (140 dB(L)). 2200 covers Normal full-scale plus ~10%
|
||||
# headroom for quantization edge cases while keeping every physically
|
||||
# implausible value out of the PVS computation.
|
||||
#
|
||||
# Some prod blocks have been observed with peak-count fields whose
|
||||
# HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558
|
||||
# and BE18003 units in Histogram-mode events. Reading these as
|
||||
# uint16 LE produces values like 30981 / 41733 / 62469, which scale
|
||||
# to physically impossible peaks (150+ in/s). Best guess: an
|
||||
# undocumented "time-of-peak-within-interval" extension byte the
|
||||
# device writes in some sub-mode (possibly Histogram+Continuous).
|
||||
# Until reverse-engineered, blocks exceeding this bound are skipped
|
||||
# rather than propagating bogus values into PVS computations.
|
||||
#
|
||||
# Earlier we tried 4096 — that allowed peak counts up to 4096 × 0.005
|
||||
# = 20.48 in/s per channel, which produced 35× inflated PVS sums when
|
||||
# the extension-byte blocks slipped through. See feat/wire-histogram-codec
|
||||
# branch history for the rollback.
|
||||
_MAX_PEAK_COUNT = 2200
|
||||
|
||||
# Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one
|
||||
# 0.005 in/s display quantum. Equivalent to the waveform codec's
|
||||
# 16-count-unit output (1 unit = 0.005 in/s = 16 ADC counts).
|
||||
@@ -156,23 +146,36 @@ def _decode_block(block: bytes) -> Optional[dict]:
|
||||
"""Decode one 32-byte histogram block. Caller must have validated
|
||||
with ``_is_data_block`` first.
|
||||
|
||||
Returns ``None`` if any peak field exceeds ``_MAX_PEAK_COUNT`` —
|
||||
those blocks contain an undocumented extension byte format whose
|
||||
naive uint16 LE interpretation gives physically impossible peaks.
|
||||
Skipping the block is safer than propagating bogus values into
|
||||
PVS computations downstream.
|
||||
Returns a record with per-channel peak counts (uint8) and
|
||||
half-periods (uint16 LE).
|
||||
"""
|
||||
# All 16-bit fields are little-endian unsigned. Peak counts are
|
||||
# always non-negative; half-periods are always positive when valid.
|
||||
t_peak, t_halfp, v_peak, v_halfp, l_peak, l_halfp, m_peak, m_halfp = struct.unpack_from(
|
||||
"<HHHHHHHH", block, 6
|
||||
)
|
||||
if (t_peak > _MAX_PEAK_COUNT or v_peak > _MAX_PEAK_COUNT
|
||||
or l_peak > _MAX_PEAK_COUNT or m_peak > _MAX_PEAK_COUNT):
|
||||
return None
|
||||
# Peak counts are uint8 at bytes [6] / [10] / [14] / [18]. The
|
||||
# adjacent bytes [7] / [11] / [15] / [19] hold an annotation field
|
||||
# whose meaning isn't fully understood (empirically non-zero in
|
||||
# intervals with sub-Hz or unmeasurable geo frequencies, mostly
|
||||
# zero otherwise — see test fixtures from BE9558/BE18003 corpora).
|
||||
# Crucially, those annotation bytes are NOT the high byte of the
|
||||
# peak count: cross-correlating against BW's per-interval ASCII
|
||||
# export proves the peak is uint8 alone.
|
||||
#
|
||||
# Reading the peak as uint16 LE (the original interpretation) was
|
||||
# accidentally correct only because every block in the N844 fixture
|
||||
# corpus had a zero annotation byte; non-N844 events with non-zero
|
||||
# annotation bytes decoded to physically impossible peaks (e.g.
|
||||
# 268 in/s per channel) and produced 35× inflated PVS sums when
|
||||
# first run against prod data. See histogram_codec_re_status.md.
|
||||
t_peak = block[6]
|
||||
v_peak = block[10]
|
||||
l_peak = block[14]
|
||||
m_peak = block[18]
|
||||
t_halfp = block[8] | (block[9] << 8)
|
||||
v_halfp = block[12] | (block[13] << 8)
|
||||
l_halfp = block[16] | (block[17] << 8)
|
||||
m_halfp = block[20] | (block[21] << 8)
|
||||
segment_id = block[1]
|
||||
block_ctr = block[2] | (block[3] << 8)
|
||||
var_meta = bytes(block[24:28])
|
||||
annotations = (block[7], block[11], block[15], block[19])
|
||||
return {
|
||||
"segment_id": segment_id,
|
||||
"block_ctr": block_ctr,
|
||||
@@ -185,6 +188,7 @@ def _decode_block(block: bytes) -> Optional[dict]:
|
||||
"m_peak": m_peak,
|
||||
"m_halfp": m_halfp,
|
||||
"meta_var": var_meta,
|
||||
"annotations": annotations,
|
||||
}
|
||||
|
||||
|
||||
@@ -192,10 +196,15 @@ def walk_body(body: bytes) -> List[dict]:
|
||||
"""Walk the body and return one dict per histogram interval.
|
||||
|
||||
Iterates 32-byte strides from offset 0. Yields a decoded record
|
||||
for every block that passes ``_is_data_block`` validation AND has
|
||||
plausible peak values (``_decode_block`` returns None for blocks
|
||||
with out-of-bound peaks). Stops when the remaining bytes are too
|
||||
short to form a complete block.
|
||||
for every block that passes ``_is_data_block`` validation. Stops
|
||||
when the remaining bytes are too short to form a complete block.
|
||||
|
||||
In Histogram+Continuous mode the body interleaves data blocks with
|
||||
other 32-byte content (likely continuous-mode waveform blocks) that
|
||||
fail the data-block validation; the walker naturally skips them
|
||||
without losing 32-byte alignment. Use ``block_ctr`` from each
|
||||
returned record to map back to the original interval index — the
|
||||
record list is sparse when other block types are interleaved.
|
||||
"""
|
||||
records: List[dict] = []
|
||||
for off in range(0, len(body) - _BLOCK_SIZE + 1, _BLOCK_SIZE):
|
||||
|
||||
@@ -335,3 +335,51 @@ def test_geo_count_to_ins_scale():
|
||||
assert geo_count_to_ins(1) == pytest.approx(0.005)
|
||||
assert geo_count_to_ins(10) == pytest.approx(0.050)
|
||||
assert geo_count_to_ins(0) == 0.0
|
||||
|
||||
|
||||
# ── Regression: peak is uint8 byte[N], NOT uint16 LE byte[N:N+2] ────────────
|
||||
#
|
||||
# Block taken verbatim from K558LKZU.RE0H (BE9558) interval 12 — a real
|
||||
# field event where the Tran channel had developed a DC offset and was
|
||||
# producing sub-Hz drift content the device couldn't characterize.
|
||||
# The annotation byte at [7] = 0xd2 is non-zero in that case. The
|
||||
# legacy codec read [6:8] as uint16 LE, producing T_peak = 53763 →
|
||||
# 268 in/s — physically impossible and 35× too high for the actual
|
||||
# 0.015 in/s value (T_lo = 3 alone gives the correct count).
|
||||
# Verified against the paired BW ASCII export.
|
||||
_K558_INTERVAL_12_BLOCK = bytes.fromhex(
|
||||
"00 00 0c 01 0a 00 03 d2 45 00 02 00 02 00 02 00"
|
||||
"02 00 10 00 06 00 00 00 0e 91 2f 00 1e 0a 00 00".replace(" ", "")
|
||||
)
|
||||
|
||||
|
||||
def test_extension_byte_does_not_inflate_peak():
|
||||
"""The annotation byte at [7]/[11]/[15]/[19] must NOT contribute to
|
||||
the peak count. Decoded T_peak must be 3 (uint8 byte[6]), NOT
|
||||
53763 (uint16 LE byte[6:8])."""
|
||||
body = _K558_INTERVAL_12_BLOCK
|
||||
records = decode_histogram_body_full(body)
|
||||
assert records is not None
|
||||
assert len(records) == 1
|
||||
r = records[0]
|
||||
assert r["t_peak"] == 3, f"T_peak should be 3 (uint8), got {r['t_peak']}"
|
||||
assert r["v_peak"] == 2
|
||||
assert r["l_peak"] == 2
|
||||
assert r["m_peak"] == 16
|
||||
# Half-periods unchanged — still uint16 LE.
|
||||
assert r["t_halfp"] == 0x0045 # 69 → 7.4 Hz
|
||||
assert r["m_halfp"] == 6 # → 85.3 Hz
|
||||
# Annotation byte is preserved (for future RE) but does not affect peak.
|
||||
assert r["annotations"] == (0xd2, 0x00, 0x00, 0x00)
|
||||
|
||||
|
||||
def test_extension_byte_decoded_to_correct_in_s():
|
||||
"""End-to-end: the channel-grouped output for the K558 ext block
|
||||
should give T = 3 counts = 0.015 in/s, not 53763 counts = 268 in/s."""
|
||||
channels = decode_histogram_body(_K558_INTERVAL_12_BLOCK)
|
||||
assert channels is not None
|
||||
assert channels["Tran"] == [3]
|
||||
assert geo_count_to_ins(channels["Tran"][0]) == pytest.approx(0.015)
|
||||
assert channels["Vert"] == [2]
|
||||
assert channels["Long"] == [2]
|
||||
assert channels["MicL"] == [16]
|
||||
|
||||
Reference in New Issue
Block a user