codec-re: crack Tran channel codec with high-amplitude May 11 bundle

User uploaded 3 high-amplitude events (PPV 6-7 in/s — shook the geophone hard) to decode-re/5-11-26/. These cracked the Tran codec: - Preamble bytes [3:5] and [5:7] = Tran[0] and Tran[1] as int16 BE in 16-count units (LSB = 0.005 in/s). Confirmed across all 7 fixtures. - First data block carries Tran deltas from sample 2 onward: * 10 NN block: NN/2 bytes of payload, each byte = two 4-bit signed nibble deltas (high nibble first) * 20 NN block: NN int8 signed deltas Verified 22+42+46 = 110 Tran samples across SP0/SS0/SV0 with 0 errors against BW's ASCII export. Why the earlier 96-combination brute force failed: the quiet 5-8 events all had T[0] = T[1] ≈ 0 so the preamble's per-channel encoding was undetectable. Loud events made the encoding obvious. What's solved: - minimateplus.waveform_codec.decode_tran_initial: returns first N Tran samples in 16-count units for any body. - Walker length formula for in-data 30 NN blocks (NN*2 instead of NN*4). - Walker now handles bodies that start with 20 NN (in addition to 10 NN). What's still open: - Tran past the first data block (multi-block channel switching). - Vert / Long / MicL channel encodings. - Walker correctness past offset ~427 in event-b. Tests: 36 pass. decode_waveform_v2 still returns None — the full multi-channel decoder is not wired up. decode_tran_initial is the new verified entry point. Files: minimateplus/waveform_codec.py, tests/test_waveform_codec.py (adds 5-11-26 fixtures + decode_tran_initial tests), and docs/instantel_protocol_reference.md §7.6.1 (Tran codec spec).
2026-05-11 18:30:56 +00:00
parent d3f77d1d96
commit 6ac126e05c
14 changed files with 10113 additions and 50 deletions
@@ -0,0 +1,50 @@
+"""Quick inspection of the new high-amplitude events."""
+import os, re, sys
+sys.path.insert(0, ".")
+from analysis.load_bundle import _parse_txt
+from minimateplus.waveform_codec import walk_body, find_data_start
+
+ROOT = "decode-re/5-11-26"
+
+
+def main():
+    for stem in ("M529LL1A.SP0", "M529LL1A.SS0", "M529LL1A.SV0"):
+        bin_path = os.path.join(ROOT, stem)
+        txt_path = bin_path + ".TXT"
+        with open(bin_path, "rb") as f:
+            raw = f.read()
+        body = raw[43:-26]
+        meta, samples = _parse_txt(txt_path)
+        n = len(samples["Tran"])
+
+        print(f"\n=== {stem} ===")
+        print(f"  file={len(raw)}, body={len(body)}, N_samples={n}")
+        print(f"  rectime={meta.get('Record Time')} pretrig={meta.get('Pre-trigger Length')}")
+        print(f"  PPV(T,V,L)={meta.get('Tran PPV')} / {meta.get('Vert PPV')} / {meta.get('Long PPV')}")
+        # Show first few non-trivial samples
+        print(f"  First 5 truth samples (in/s):")
+        for i in range(5):
+            print(f"    T={samples['Tran'][i]:8.3f}  V={samples['Vert'][i]:8.3f}  "
+                  f"L={samples['Long'][i]:8.3f}  M={samples['MicL'][i]:8.3f}")
+        # Peak sample positions
+        for ch in ("Tran", "Vert", "Long"):
+            vals = samples[ch]
+            peak_i = max(range(n), key=lambda i: abs(vals[i]))
+            print(f"  {ch}: peak {vals[peak_i]:.3f} at sample {peak_i} (t={peak_i/1024:.3f}s)")
+        # Body structure
+        start = find_data_start(body)
+        blocks = walk_body(body, start)
+        types = {}
+        for b in blocks:
+            types[b.tag_hi] = types.get(b.tag_hi, 0) + 1
+        print(f"  body start={start}, total blocks walked: {len(blocks)}")
+        print(f"  block tag counts: {types}")
+        # How far the walker got
+        if blocks:
+            last = blocks[-1]
+            walked = last.offset + last.length
+            print(f"  walker stopped at offset {walked}/{len(body)} ({100*walked/len(body):.0f}%)")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,71 @@
+"""Test: does the second '20 NN' block in SS0 continue Tran samples?"""
+import sys
+sys.path.insert(0, ".")
+from analysis.load_bundle import _parse_txt
+from minimateplus.waveform_codec import walk_body, find_data_start
+
+
+def s4(n):
+    return n if n < 8 else n - 16
+
+
+def i8(b):
+    return b if b < 128 else b - 256
+
+
+def main():
+    stem = "M529LL1A.SS0"
+    path = f"decode-re/5-11-26/{stem}"
+    with open(path, "rb") as f:
+        body = f.read()[43:-26]
+    _, samples = _parse_txt(path + ".TXT")
+    truth_T_16 = [round(v * 200) for v in samples["Tran"]]
+
+    # Preamble
+    T0 = int.from_bytes(body[3:5], "big", signed=True)
+    T1 = int.from_bytes(body[5:7], "big", signed=True)
+
+    # Walk blocks
+    start = find_data_start(body)
+    blocks = walk_body(body, start)
+
+    print(f"=== {stem} ===  T[0]={T0} T[1]={T1}")
+
+    # Hypothesis: Tran continues through ALL 10 NN and 20 NN blocks
+    # in order, until the next 40 02 segment header (which resets).
+    T = [T0, T1]
+    cur = T1
+    decoded_count = 2  # T[0], T[1] from preamble
+    for bi, blk in enumerate(blocks):
+        if blk.tag_hi == 0x10:
+            for byte in blk.data:
+                for nib in ((byte >> 4) & 0xF, byte & 0xF):
+                    cur += s4(nib)
+                    T.append(cur)
+                    decoded_count += 1
+        elif blk.tag_hi == 0x20:
+            for byte in blk.data:
+                cur += i8(byte)
+                T.append(cur)
+                decoded_count += 1
+        elif blk.tag_hi == 0x40:
+            # Segment header — stop here for this test
+            break
+        # 00 and 30 NN don't contribute to Tran (in this hypothesis)
+
+    # Compare to truth
+    print(f"  Decoded {len(T)} T samples up to first 40 02")
+    matches = sum(1 for i in range(min(len(T), len(truth_T_16))) if T[i] == truth_T_16[i])
+    print(f"  Matches in first {min(len(T), len(truth_T_16))}: {matches}")
+    # Print first divergence
+    for i in range(min(len(T), len(truth_T_16))):
+        if T[i] != truth_T_16[i]:
+            print(f"  First divergence: sample {i}: pred={T[i]}, truth={truth_T_16[i]}")
+            # Show context
+            print(f"    pred  [{i-3}:{i+5}]: {T[max(0,i-3):i+5]}")
+            print(f"    truth [{i-3}:{i+5}]: {truth_T_16[max(0,i-3):i+5]}")
+            break
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,71 @@
+"""Verify: preamble[3:7] = Tran[0], Tran[1] as int16 BE in 16-count units.
+And first 20/10 NN block = Tran deltas starting at sample 2.
+"""
+import os, sys
+sys.path.insert(0, ".")
+from analysis.load_bundle import _parse_txt
+from minimateplus.waveform_codec import walk_body, find_data_start
+
+
+def s4(n):
+    return n if n < 8 else n - 16
+
+
+def i8(b):
+    return b if b < 128 else b - 256
+
+
+def main():
+    for stem in ("M529LL1A.SP0", "M529LL1A.SS0", "M529LL1A.SV0"):
+        path = f"decode-re/5-11-26/{stem}"
+        with open(path, "rb") as f:
+            raw = f.read()
+        body = raw[43:-26]
+        _, samples = _parse_txt(path + ".TXT")
+        truth_T_16 = [round(v * 200) for v in samples["Tran"]]
+
+        # Preamble parse
+        T0_pre = int.from_bytes(body[3:5], "big", signed=True)
+        T1_pre = int.from_bytes(body[5:7], "big", signed=True)
+        print(f"\n=== {stem} ===")
+        print(f"  Preamble T[0]={T0_pre} (truth {truth_T_16[0]})  T[1]={T1_pre} (truth {truth_T_16[1]})  match={T0_pre==truth_T_16[0] and T1_pre==truth_T_16[1]}")
+
+        # First block
+        start = find_data_start(body)
+        blocks = walk_body(body, start)
+        if not blocks:
+            print(f"  no blocks found")
+            continue
+
+        # Assume first block = Tran deltas from sample 2
+        first = blocks[0]
+        T = [T0_pre, T1_pre]
+        cur_T = T1_pre
+        if first.tag_hi == 0x10:
+            # Nibble pairs
+            for byte in first.data:
+                for nib in ((byte >> 4) & 0xF, byte & 0xF):
+                    cur_T += s4(nib)
+                    T.append(cur_T)
+        elif first.tag_hi == 0x20:
+            # int8 per byte
+            for byte in first.data:
+                cur_T += i8(byte)
+                T.append(cur_T)
+
+        # Compare against truth
+        n_check = min(len(T), len(truth_T_16))
+        match_count = sum(1 for i in range(n_check) if T[i] == truth_T_16[i])
+        print(f"  First block type=0x{first.tag_hi:02x} NN=0x{first.tag_lo:02x} len={len(first.data)} → {len(T)} T samples decoded")
+        print(f"  Tran predicted[0:10]: {T[:10]}")
+        print(f"  Tran truth    [0:10]: {truth_T_16[:10]}")
+        print(f"  Matches in first {n_check}: {match_count} / {n_check}")
+        # Show where it diverges
+        for i in range(n_check):
+            if T[i] != truth_T_16[i]:
+                print(f"  First divergence: sample {i}: pred={T[i]}, truth={truth_T_16[i]}")
+                break
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,20 @@
+"""Walk blocks of the new 5-11-26 events and look at what comes after Tran block."""
+import sys
+sys.path.insert(0, ".")
+from minimateplus.waveform_codec import walk_body, find_data_start
+
+
+def main():
+    for stem in ("M529LL1A.SP0", "M529LL1A.SS0", "M529LL1A.SV0"):
+        with open(f"decode-re/5-11-26/{stem}", "rb") as f:
+            raw = f.read()
+        body = raw[43:-26]
+        start = find_data_start(body)
+        blocks = walk_body(body, start)
+        print(f"\n=== {stem} === body={len(body)} start={start} blocks walked={len(blocks)}")
+        for i, b in enumerate(blocks[:20]):
+            print(f"  block[{i:>2}] @ {b.offset:>5} tag={b.tag_hi:02x} NN=0x{b.tag_lo:02x}({b.tag_lo}) len={b.length} data[:24]={b.data[:24].hex(' ')}")
+
+
+if __name__ == "__main__":
+    main()