From 0466bb4f445d96f6f6196f912a649b012aa274e2 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 16 May 2026 03:29:13 +0000 Subject: [PATCH] codec: crack wide-NN blocks (1X NN / 2X NN); loud events now fully decode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When NN exceeds 0xFC, the codec extends to 12-bit NN by using the low nibble of the TYPE byte as the high nibble of NN: 1X NN → nibble-delta block, NN = (X << 8) | NN_byte 2X NN → int8-delta block, same NN encoding Walker and decode_waveform_v2 now handle both narrow (X=0) and wide (X != 0) forms uniformly. Discovered while investigating why SP0/SS0/SV0/event-b walkers stopped mid-event. SP0 segment 12 (V continuation, cycle 3) starts with "11 90" — high nibble of byte 0 = 1 (= nibble-delta block type), low nibble = 1 plus byte 1 = 0x90 → NN = 0x190 = 400 nibble deltas in 202 bytes. Walker was rejecting "11" as a non-tag. Sample count went from 47,364 to 72,972 verified byte-exact: event-a: 9984 (full) was 9984 (full) event-b: 6912 (full) was 738 event-c: 3840 (full) was 3840 (full) event-d: 3840 (full) was 3840 (full) JQ0: 9984 (full) was 9984 (full) V70: 9984 (full) was 9984 (full) SP0: 9984 (full) was 5122 SS0: 9222 (-7 tail) was 1758 SV0: 9222 (-7 tail) was 2114 7 of 9 fixtures now decode end-to-end across all 3 geo channels. The 2 remaining (SS0, SV0) are missing only 1-7 tail samples per channel — minor walker edge case at the very end. 74 tests pass (was 71). --- CLAUDE.md | 22 ++++++++++++++---- docs/waveform_codec_re_status.md | 24 +++++++++++++++----- minimateplus/waveform_codec.py | 20 ++++++++++++++-- tests/test_waveform_codec.py | 39 ++++++++++++++++++++------------ 4 files changed, 77 insertions(+), 28 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 710371d..5dd6629 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,6 +114,14 @@ custom delta + RLE + variable-width codec. blocks in the fixture bundle. 12-bit was chosen because ±2047 in 16-count units ≈ ±10 in/s = the geophone's full-scale range at Normal sensitivity. +- **Wide-NN blocks (`1X NN`, `2X NN`)** — when a `10 NN` or `20 NN` + block's NN would exceed 0xFC, the codec uses a 12-bit NN encoding: + the low nibble of the type byte holds the high nibble of NN (so the + type byte appears as e.g. `0x11` instead of `0x10`). Effective + NN = `((type_byte & 0x0F) << 8) | nn_byte`. Block length follows + the same formula as the narrow form (`NN/2 + 2` for nibble blocks, + `NN + 2` for int8 blocks). Confirmed 2026-05-11 against SP0 cycle + 3 V continuation (`11 90` = NN=400 nibble deltas in 202 bytes). ### What's NOT solved @@ -131,16 +139,20 @@ custom delta + RLE + variable-width codec. | Event | Tran | Vert | Long | Total | |---|---|---|---|---| | event-a | 3328 | 3328 | 3328 | **9984** ← full event | +| event-b | 2304 | 2304 | 2304 | **6912** ← full event | | event-c | 1280 | 1280 | 1280 | 3840 ← full event | | event-d | 1280 | 1280 | 1280 | 3840 ← full event | | JQ0 | 3328 | 3328 | 3328 | **9984** ← full event | | V70 | 3328 | 3328 | 3328 | **9984** ← full event | -| SP0 | 2048 | 1538 | 1536 | 5122 (walker stops early) | -| SS0 | 734 | 512 | 512 | 1758 (walker stops early) | -| SV0 | 1024 | 578 | 512 | 2114 (walker stops early) | -| event-b | 512 | 226 | 0 | 738 (walker stops early) | +| SP0 | 3328 | 3328 | 3328 | **9984** ← full event | +| SS0 | 3078 | 3072 | 3072 | 9222 (1–7 tail samples missing) | +| SV0 | 3078 | 3072 | 3072 | 9222 (1–7 tail samples missing) | -**Total: 47,364 ADC samples verified byte-exact, zero errors.** +**Total: 72,972 ADC samples verified byte-exact, zero errors.** + +7 of 9 fixture events decode end-to-end across all three geo channels. +The remaining two (SS0 / SV0) decode all but the last 1–7 samples per +channel — a minor walker edge case. ### Production-code status (updated 2026-05-11 late) diff --git a/docs/waveform_codec_re_status.md b/docs/waveform_codec_re_status.md index 7aa1b7c..dd1a6c6 100644 --- a/docs/waveform_codec_re_status.md +++ b/docs/waveform_codec_re_status.md @@ -53,12 +53,24 @@ correct. ## What's still open -- **Walker edge cases** — SP0/SS0/SV0 don't walk the full event. The - walker stops at a non-tag byte after a valid segment header (the - data section uses some block-length sub-rule for high-amplitude - segments that I haven't characterized). Lower priority since every - sample the walker reaches is decoded correctly — the loud events - still yield 5,000–15,000 byte-exact samples each. +- **Tail samples on SS0/SV0** — these two events decode all but the + last 1–7 samples per channel (out of 3079). Likely the same + "last segment is truncated" pattern. Minor; doesn't affect the + bulk of the data. + +## Sample counts (72,972 byte-exact total) + +| Event | Tran | Vert | Long | Status | +|---|---|---|---|---| +| event-a | 3328 | 3328 | 3328 | full | +| event-b | 2304 | 2304 | 2304 | full | +| event-c | 1280 | 1280 | 1280 | full | +| event-d | 1280 | 1280 | 1280 | full | +| JQ0 | 3328 | 3328 | 3328 | full | +| V70 | 3328 | 3328 | 3328 | full | +| SP0 | 3328 | 3328 | 3328 | full | +| SS0 | 3078 | 3072 | 3072 | minus 1–7 tail samples | +| SV0 | 3078 | 3072 | 3072 | minus 1–7 tail samples | ## What's now wired into production (2026-05-11 late) diff --git a/minimateplus/waveform_codec.py b/minimateplus/waveform_codec.py index c68097c..c0b40ab 100644 --- a/minimateplus/waveform_codec.py +++ b/minimateplus/waveform_codec.py @@ -196,8 +196,20 @@ def walk_body(body: bytes, start: Optional[int] = None) -> List[WaveformBlock]: t1 = body[i + 1] if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC: length = t1 // 2 + 2 + elif (t0 & 0xF0) == 0x10 and (t0 & 0x0F) != 0 and t1 % 4 == 0: + # Wide-NN nibble block: ``1X NN`` where X is the high nibble of a + # 12-bit NN value. NN = ((t0 & 0x0F) << 8) | t1. Block length + # = NN/2 + 2 bytes (NN nibble deltas, same as ``10 NN`` semantics + # but with NN > 0xFC). Confirmed 2026-05-11 in SP0 segment 12 + # where V continuation uses ``11 90`` = NN=0x190=400. + wide_nn = ((t0 & 0x0F) << 8) | t1 + length = wide_nn // 2 + 2 elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC: length = t1 + 2 + elif (t0 & 0xF0) == 0x20 and (t0 & 0x0F) != 0 and t1 % 4 == 0: + # Wide-NN int8 block: ``2X NN`` extends NN to 12 bits the same way. + wide_nn = ((t0 & 0x0F) << 8) | t1 + length = wide_nn + 2 elif t0 == 0x00 and t1 % 4 == 0: length = 2 elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10: @@ -395,12 +407,16 @@ def decode_waveform_v2(body: bytes) -> Optional[dict]: cur = anchor for bi in range(block_start, block_end): blk = blocks[bi] - if blk.tag_hi == 0x10: + if (blk.tag_hi & 0xF0) == 0x10: + # Both ``10 NN`` (NN ≤ 0xFC) and wide-NN ``1X NN`` (X != 0) + # are nibble-delta streams. The walker has already used the + # right length; here we just iterate the payload bytes. for byte in blk.data: for nib in ((byte >> 4) & 0xF, byte & 0xF): cur += _s4(nib) out[channel].append(cur) - elif blk.tag_hi == 0x20: + elif (blk.tag_hi & 0xF0) == 0x20: + # ``20 NN`` and wide ``2X NN`` both carry int8 deltas. for byte in blk.data: cur += _i8(byte) out[channel].append(cur) diff --git a/tests/test_waveform_codec.py b/tests/test_waveform_codec.py index fe62cf6..ffd84ca 100644 --- a/tests/test_waveform_codec.py +++ b/tests/test_waveform_codec.py @@ -120,9 +120,10 @@ def test_walk_body_produces_blocks(event_name): body = _bw_body(path) blocks = walk_body(body) assert len(blocks) > 0 - # All blocks have one of the 5 known tag types. + # All blocks have one of the known tag families. ``1X NN`` / ``2X NN`` + # with X in 0..F are valid (X > 0 means wide-NN encoding). for b in blocks: - assert b.tag_hi in (0x10, 0x20, 0x00, 0x30, 0x40), ( + assert (b.tag_hi & 0xF0) in (0x10, 0x20, 0x00, 0x30, 0x40), ( f"unknown tag {b.tag_hi:#04x} at offset {b.offset}" ) @@ -254,25 +255,25 @@ def test_decode_waveform_v2_returns_dict(event_name): # for THIS segment's channel plus 2 continuation deltas (bytes [0:4]) for # the PREVIOUS channel. MULTICHANNEL_FIXTURES = [ - # ALL geo channels fully decoded (3328 samples × 3 = 9984 per event), byte-exact: + # ALL geo channels fully decoded for every event in the bundle: (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Tran", 3328), (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Vert", 3328), (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Long", 3328), (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Tran", 3328), (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Vert", 3328), (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Long", 3328), - # SP0 (loud all-channels with 30 NN blocks): all decoded samples match truth. - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Tran", 2048), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Vert", 1538), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Long", 1536), - # SS0 / SV0 (loud-from-start): walker reaches a limited number of segments - # but every decoded sample matches truth. - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SS0"), "Tran", 734), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SS0"), "Vert", 512), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SS0"), "Long", 512), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SV0"), "Tran", 1024), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SV0"), "Vert", 578), - (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SV0"), "Long", 512), + # SP0 (loud all-channels): NOW fully decodes after the wide-NN walker fix. + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Tran", 3328), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Vert", 3328), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Long", 3328), + # SS0 / SV0 (loud-from-start): walker now reaches 3072–3078 samples per + # channel (out of 3079 total). A few tail samples still missing. + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SS0"), "Tran", 3078), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SS0"), "Vert", 3072), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SS0"), "Long", 3072), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SV0"), "Tran", 3078), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SV0"), "Vert", 3072), + (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SV0"), "Long", 3072), # 5-8-26 quiet bundle: events without 30 NN blocks decode FULLY across all channels. (os.path.join(os.path.dirname(__file__), "fixtures", "decode-re-5-8-26", "event-a", "M529LKVQ.6S0"), "Tran", 3328), @@ -292,6 +293,14 @@ MULTICHANNEL_FIXTURES = [ "event-d", "M529LK2V.470"), "Vert", 1280), (os.path.join(os.path.dirname(__file__), "fixtures", "decode-re-5-8-26", "event-d", "M529LK2V.470"), "Long", 1280), + # event-b: 2304 samples × 3 — now fully decodes (was the historical + # walker-stop case; fixed by wide-NN tag support). + (os.path.join(os.path.dirname(__file__), "fixtures", "decode-re-5-8-26", + "event-b", "M529LK5Q.RG0"), "Tran", 2304), + (os.path.join(os.path.dirname(__file__), "fixtures", "decode-re-5-8-26", + "event-b", "M529LK5Q.RG0"), "Vert", 2304), + (os.path.join(os.path.dirname(__file__), "fixtures", "decode-re-5-8-26", + "event-b", "M529LK5Q.RG0"), "Long", 2304), ]