codec-re: solve waveform body block framing; per-byte sample mapping still open

Decoded the structural framing of the Blastware waveform body — the bytes between the 21-byte STRT record and the 26-byte file footer. The body is a sequence of tagged variable-length blocks, NOT raw int16 LE. Five tag types (10/20/00/30/40 NN) and their lengths are now confirmed against the 4-event May 2026 fixture bundle. Body splits cleanly into ~16 segments (for a 1280-sample event) separated by 40 02 segment headers carrying a monotonically incrementing uint32 LE counter at bytes [8:12]. What's done: - minimateplus/waveform_codec.py — block walker, segment splitter, segment header parser. decode_waveform_v2 is a stub returning None until the byte-to-sample mapping is solved; client.py is unchanged. - tests/test_waveform_codec.py — 31 tests covering block detection, lengths, contiguous-walk, segment splitting, segment-header parsing, and counter monotonicity. All pass. - tests/fixtures/decode-re-5-8-26/ — bundled fixtures (4 events, BW binary + Blastware ASCII export each). - docs/instantel_protocol_reference.md §7.6.1 — replaced retraction box with the verified structural decoding plus an explicit list of what's still open. What's still open: the per-byte mapping inside 10 NN / 20 NN blocks. 96 channel-permutation × nibble-order × sign-convention combinations were brute-force tested; none match BW's ASCII export to within ±1 ADC count. The codec is more elaborate than uniform 4-bit deltas — likely a hybrid variable-bit-width scheme with segment-anchor resync points. Next recommended step: capture an event with a known calibration tone to pin down magnitude scaling. Walker also bails out partway through event-b (open issue documented in both the module and the protocol reference).
2026-05-08 20:44:37 +00:00
parent 7bd0f8badf
commit d3f77d1d96
29 changed files with 10102 additions and 105 deletions
@@ -0,0 +1,93 @@
 """Brute-force test channel permutations / nibble orders on event-d (simplest signal)."""
 import sys
 import itertools
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 from minimateplus.waveform_codec import walk_body
 def s4(n):
    return n if n < 8 else n - 16
 def decode(body, channel_perm, nibble_order, sign_mode, init_from_header):
    """Try one decoder configuration on event-d. Returns first 8 cumulative samples per channel."""
    blocks = walk_body(body)
    # Initial values from bytes [4:7] if init_from_header else 0
    if init_from_header:
        init = [body[4] if body[4] < 128 else body[4] - 256,
                body[5] if body[5] < 128 else body[5] - 256,
                body[6] if body[6] < 128 else body[6] - 256,
                0]
    else:
        init = [0, 0, 0, 0]
    cur = list(init)
    out = [[init[0]], [init[1]], [init[2]], [init[3]]]  # sample 0 = init
    nibble_idx = 0  # within delta stream; channel = channel_perm[nibble_idx % 4]
    # Walk only the 10 NN data blocks
    for blk in blocks:
        if blk.tag_hi != 0x10:
            continue
        for byte in blk.data:
            if nibble_order == 'high_first':
                nib1, nib2 = (byte >> 4) & 0xF, byte & 0xF
            else:
                nib1, nib2 = byte & 0xF, (byte >> 4) & 0xF
            for nib in (nib1, nib2):
                if sign_mode == 'signed':
                    delta = s4(nib)
                else:
                    delta = nib
                ch = channel_perm[nibble_idx % 4]
                cur[ch] += delta
                if (nibble_idx + 1) % 4 == 0:
                    out[0].append(cur[0])
                    out[1].append(cur[1])
                    out[2].append(cur[2])
                    out[3].append(cur[3])
                nibble_idx += 1
                if len(out[0]) >= 16:
                    return out
    return out
 def best_match(pred, truth, n=10):
    """Sum of squared differences in first n samples."""
    n = min(n, len(pred), len(truth))
    return sum((pred[i] - truth[i])**2 for i in range(n))
 def main():
    b = load_bundle("event-d")
    # truth in 16-count units
    tr = {ch: [round(v * 200) for v in b.samples[ch]] for ch in ("Tran", "Vert", "Long")}
    print("Truth event-d first 10 samples:")
    for ch in ("Tran", "Vert", "Long"):
        print(f"  {ch}: {tr[ch][:10]}")
    # Test 96 combinations
    best = []
    for perm in itertools.permutations([0, 1, 2, 3]):
        for nibble_order in ('high_first', 'low_first'):
            for sign in ('signed', 'unsigned'):
                for init_h in (False, True):
                    decoded = decode(b.body, perm, nibble_order, sign, init_h)
                    # Score as TVL channel-sum
                    score = sum(
                        best_match(decoded[i], tr[ch], n=10)
                        for i, ch in enumerate(("Tran", "Vert", "Long"))
                        if i < 3
                    )
                    label = f"perm={perm} nib={nibble_order[:1]} sign={sign[:3]} init={init_h}"
                    best.append((score, label, decoded))
    best.sort(key=lambda x: x[0])
    print(f"\nTop 10 configurations:")
    for s, lbl, dec in best[:10]:
        print(f"  score={s:>5}  {lbl}  T={dec[0][:8]}  V={dec[1][:8]}  L={dec[2][:8]}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,42 @@
 """Compare event-c and event-d (same N_samples) to find header vs data bytes."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def main():
    bc = load_bundle("event-c")
    bd = load_bundle("event-d")
    # Compare prefixes
    nc, nd = len(bc.body), len(bd.body)
    n = min(nc, nd)
    diffs = []
    for i in range(n):
        if bc.body[i] != bd.body[i]:
            diffs.append(i)
    print(f"event-c body={nc}, event-d body={nd}")
    print(f"Total diffs (first {n}): {len(diffs)}")
    # Show common prefix
    same_prefix = 0
    for i in range(n):
        if bc.body[i] == bd.body[i]:
            same_prefix += 1
        else:
            break
    print(f"Common prefix length: {same_prefix}")
    print(f"event-c prefix: {bc.body[:same_prefix].hex(' ')}")
    # Look for runs of common bytes
    print(f"\nFirst 32 diff positions: {diffs[:32]}")
    # Show the "diff fingerprint" of the first 100 bytes
    print(f"\n  pos    c     d")
    for i in range(0, 100):
        marker = " " if bc.body[i] == bd.body[i] else "*"
        bd_b = bd.body[i] if i < nd else None
        print(f"  {i:>3}  {bc.body[i]:02x}{marker}  {bd_b:02x}" if bd_b is not None else f"  {i:>3}  {bc.body[i]:02x}{marker}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,99 @@
 """
 Decoder v1: nibble-pair signed deltas in 10 NN blocks, 4-channel round-robin.
 """
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def s4(n):
    return n if n < 8 else n - 16
 def walk_blocks(body, start):
    i = start
    blocks = []
    while i + 1 < len(body):
        t0, t1 = body[i], body[i + 1]
        if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 // 2 + 2
            data = bytes(body[i + 2 : i + length])
            blocks.append(("10", t1, data))
            i += length
        elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 + 2
            data = bytes(body[i + 2 : i + length])
            blocks.append(("20", t1, data))
            i += length
        elif t0 == 0x00 and t1 % 4 == 0:
            blocks.append(("00", t1, b""))
            i += 2
        elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10:
            length = t1 * 4
            data = bytes(body[i + 2 : i + length])
            blocks.append(("30", t1, data))
            i += length
        elif t0 == 0x40 and t1 == 0x02:
            length = 20
            data = bytes(body[i + 2 : i + length])
            blocks.append(("40", t1, data))
            i += length
        else:
            blocks.append(("??", t0, bytes(body[i:i+8])))
            break
    return blocks
 def decode_v1(body, start, n_samples):
    """Decode by accumulating nibble-pair deltas from all 10 NN blocks."""
    blocks = walk_blocks(body, start)
    # 4 channels: T, V, L, M
    cur = [0, 0, 0, 0]
    out = [[], [], [], []]
    sample_index = 0  # how many sample-sets emitted
    for typ, NN, data in blocks:
        if typ == "10":
            # 2 nibbles per byte, round-robin TVLM
            for byte in data:
                for nib in ((byte >> 4) & 0xF, byte & 0xF):
                    ch = sample_index % 4
                    cur[ch] += s4(nib)
                    out[ch].append(cur[ch])
                    sample_index = (sample_index + 1) // 4 * 4 + (sample_index + 1) % 4  # ?
                    sample_index += 1
                    # We emit per-nibble, but the structure is unclear
        elif typ == "20":
            # int8 absolute or delta?
            for byte in data:
                v = byte if byte < 128 else byte - 256
                ch = sample_index % 4
                cur[ch] = v  # treat as absolute
                out[ch].append(cur[ch])
                sample_index += 1
    return out
 def main():
    b = load_bundle("event-c")
    body = b.body
    truth_T = [round(v * 200) for v in b.samples["Tran"]]
    truth_V = [round(v * 200) for v in b.samples["Vert"]]
    truth_L = [round(v * 200) for v in b.samples["Long"]]
    # Find start
    for s in range(15):
        if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0xFC:
            start = s
            break
    blocks = walk_blocks(body, start)
    # Print block-by-block what's in each
    print(f"Total blocks: {len(blocks)}")
    bytes_processed = 0
    for typ, NN, data in blocks[:30]:
        print(f"  type={typ} NN=0x{NN:02x} data_len={len(data)} data_hex={data[:32].hex(' ')}{'...' if len(data) > 32 else ''}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,27 @@
 """Dump body bytes around a specific offset."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def dump_around(name: str, center: int, radius: int = 96):
    b = load_bundle(name)
    body = b.body
    start = max(0, center - radius)
    end = min(len(body), center + radius)
    print(f"\n=== {name} body[{start}:{end}] (full body={len(body)}) ===")
    for i in range(start, end, 32):
        row = body[i:i+32]
        marker = "  <-- center" if i <= center < i+32 else ""
        print(f"  +{i:>5}  {row.hex(' ')}{marker}")
 def main():
    # Look at the trailer transitions
    trailer_starts = {"event-a": 7047, "event-b": 6475, "event-c": 4043, "event-d": 3941}
    for name, off in trailer_starts.items():
        dump_around(name, off, 96)
 if __name__ == "__main__":
    main()
@@ -0,0 +1,18 @@
 """Dump the START of each body in 32-byte rows."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def main():
    for name in ("event-a", "event-c"):
        b = load_bundle(name)
        body = b.body
        print(f"\n=== {name} body[0:512] (full body={len(body)}, samples={len(b.samples['Tran'])}) ===")
        for i in range(0, min(512, len(body)), 32):
            row = body[i:i+32]
            print(f"  +{i:>5}  {row.hex(' ')}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,24 @@
 """Dump body bytes split into 32-byte rows starting from `start_offset`."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def dump(body: bytes, name: str, start: int, n_rows: int = 30):
    print(f"\n=== {name} body[{start}:] (full body={len(body)}) ===")
    end = min(start + 32 * n_rows, len(body))
    for i in range(start, end, 32):
        row = body[i:i+32]
        print(f"  +{i:>5}  {row.hex(' ')}")
 def main():
    for name in ("event-a", "event-b", "event-c", "event-d"):
        b = load_bundle(name)
        # Print the LAST ~600 bytes of the body to see the tail structure
        start = max(0, len(b.body) - 32 * 12)
        dump(b.body, name, start, 12)
 if __name__ == "__main__":
    main()
@@ -0,0 +1,41 @@
 """Search for structural repetition in the body bytes."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def find_pattern_offsets(body: bytes, pattern: bytes, max_count=20):
    out = []
    i = 0
    while True:
        i = body.find(pattern, i)
        if i < 0:
            break
        out.append(i)
        i += 1
        if len(out) >= max_count:
            break
    return out
 def main():
    for name in ("event-a", "event-b", "event-c", "event-d"):
        b = load_bundle(name)
        body = b.body
        print(f"\n=== {name} (body={len(body)}, N_samples={len(b.samples['Tran'])}) ===")
        # Try to find repeating substructures (look for 4-byte 0x10-prefixed markers)
        for prefix in [b"\x10\x10", b"\x10\x04", b"\x10\x08", b"\x10\x0c", b"\x10\x18",
                       b"\x10\x14", b"\x10\x20", b"\x10\x40", b"\x10\x80", b"\x10\x00",
                       b"\x10\x01", b"\x10\x03", b"\x10\xf0", b"\xf1\x10", b"\x00\x10",
                       b"\x40\x02", b"\x20\x04", b"\x30\x04", b"\x30\x08", b"\x00\x1a"]:
            offs = find_pattern_offsets(body, prefix, max_count=200)
            if 1 <= len(offs) <= 1000:
                # Print first 10 offsets
                first = offs[:6]
                last = offs[-3:]
                print(f"  '{prefix.hex()}' x{len(offs):>4}  first={first} last={last}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,34 @@
 """Find body byte ranges that look like absolute int8 sample data (smooth waveform)."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def looks_like_smooth_int8(buf):
    """Convert bytes to int8 and check if successive deltas are small (waveform-like)."""
    if len(buf) < 8:
        return 0.0
    vals = [b if b < 128 else b - 256 for b in buf]
    diffs = [abs(vals[i+1] - vals[i]) for i in range(len(vals)-1)]
    avg_diff = sum(diffs) / len(diffs)
    return avg_diff
 def main():
    for name in ("event-a", "event-c"):
        b = load_bundle(name)
        body = b.body
        # Scan with sliding window of 64 bytes; find segments where the bytes look like a smooth wave
        win = 64
        scores = []
        for i in range(len(body) - win):
            scores.append((i, looks_like_smooth_int8(body[i:i+win])))
        # Lowest avg_diff means smoothest
        scores.sort(key=lambda x: x[1])
        print(f"\n=== {name} (body={len(body)}) — smoothest 10 windows ===")
        for off, s in scores[:10]:
            print(f"  +{off:>5}  avg_diff={s:.2f}  bytes={body[off:off+24].hex(' ')}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,23 @@
 """Print raw body hex + byte-distribution stats for one event."""
 from collections import Counter
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def main():
    for name in ("event-a", "event-b", "event-c", "event-d"):
        b = load_bundle(name)
        body = b.body
        print(f"\n=== {name} ({len(body)} body bytes) ===")
        print(f"  STRT: {b.strt.hex()}")
        print(f"  body[0:64]:   {body[:64].hex()}")
        print(f"  body[64:128]: {body[64:128].hex()}")
        print(f"  body[-32:]:   {body[-32:].hex()}")
        cnt = Counter(body)
        print(f"  top 16 bytes: {[(f'0x{k:02x}', f'{v/len(body):.2%}') for k,v in cnt.most_common(16)]}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,142 @@
 """
 load_bundle.py — extract body bytes from BW binary + parse sample columns from TXT.
 Used by the codec reverse-engineering scripts in this directory.
 """
 from __future__ import annotations
 import os
 import re
 from dataclasses import dataclass
 BUNDLE_ROOT = os.path.join(os.path.dirname(__file__), "..", "decode-re", "5-8-26")
@dataclass
 class Bundle:
    name: str
    bin_path: str
    txt_path: str
    bin: bytes
    body: bytes  # bytes between STRT (43) and footer (last 26)
    strt: bytes  # 21-byte STRT record
    samples: dict  # {"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}
    sample_rate: int
    rectime_sec: float
    pretrig_sec: float
    geo_range_ips: float
    ppv: dict  # {"Tran": float, "Vert": float, "Long": float}
    mic_pspl: float
    serial: str
 def _parse_txt(path: str) -> dict:
    with open(path, "r", encoding="utf-8", errors="replace") as f:
        text = f.read()
    meta = {}
    samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
    # Find header line that starts the columns ("Tran   Vert   Long   MicL").
    # Then every line after is sample data (4 tab-separated floats).
    lines = text.splitlines()
    header_idx = None
    for i, line in enumerate(lines):
        if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
            # The columns header.  Sample lines start a few lines later.
            header_idx = i
            break
    if header_idx is None:
        raise ValueError(f"no Tran/Vert/Long/MicL header in {path}")
    # Parse meta — quoted lines with "Field : value"
    for line in lines[:header_idx]:
        m = re.match(r'^"([^"]+)\s*:\s*([^"]*)"', line.strip())
        if m:
            k, v = m.group(1).strip(), m.group(2).strip()
            meta[k] = v
    # Parse samples
    for line in lines[header_idx + 1 :]:
        line = line.strip()
        if not line:
            continue
        parts = re.split(r"\s+", line)
        if len(parts) < 4:
            continue
        try:
            t = float(parts[0])
            v = float(parts[1])
            l = float(parts[2])
            m = float(parts[3])
        except ValueError:
            continue
        samples["Tran"].append(t)
        samples["Vert"].append(v)
        samples["Long"].append(l)
        samples["MicL"].append(m)
    return meta, samples
 def load_bundle(name: str) -> Bundle:
    folder = os.path.join(BUNDLE_ROOT, name)
    files = os.listdir(folder)
    bin_name = next(f for f in files if not f.endswith(".TXT"))
    txt_name = next(f for f in files if f.endswith(".TXT"))
    bin_path = os.path.join(folder, bin_name)
    txt_path = os.path.join(folder, txt_name)
    with open(bin_path, "rb") as f:
        binary = f.read()
    # Header is 22 bytes; STRT at [22:43]; footer at last 26 bytes.
    strt = binary[22:43]
    body = binary[43:-26]
    meta, samples = _parse_txt(txt_path)
    sample_rate = int(re.search(r"(\d+)", meta.get("Sample Rate", "1024")).group(1))
    rectime_sec = float(re.search(r"([\d.]+)", meta.get("Record Time", "3.0")).group(1))
    pretrig_sec = float(re.search(r"-?[\d.]+", meta.get("Pre-trigger Length", "0")).group(0))
    geo_range_ips = float(re.search(r"([\d.]+)", meta.get("Geo Range", "10.0")).group(1))
    serial = meta.get("Serial Number", "").strip()
    def _f(s):
        return float(re.search(r"-?[\d.]+", s).group(0))
    ppv = {
        "Tran": _f(meta.get("Tran PPV", "0")),
        "Vert": _f(meta.get("Vert PPV", "0")),
        "Long": _f(meta.get("Long PPV", "0")),
    }
    mic_pspl = _f(meta.get("MicL PSPL", "0"))
    return Bundle(
        name=name,
        bin_path=bin_path,
        txt_path=txt_path,
        bin=binary,
        body=body,
        strt=strt,
        samples=samples,
        sample_rate=sample_rate,
        rectime_sec=rectime_sec,
        pretrig_sec=pretrig_sec,
        geo_range_ips=geo_range_ips,
        ppv=ppv,
        mic_pspl=mic_pspl,
        serial=serial,
    )
 if __name__ == "__main__":
    for name in ("event-a", "event-b", "event-c", "event-d"):
        b = load_bundle(name)
        n = len(b.samples["Tran"])
        print(f"{name}: body={len(b.body):>6}  N_samples={n}  rate={b.sample_rate}  "
              f"rectime={b.rectime_sec}  pretrig={b.pretrig_sec}  range={b.geo_range_ips}  "
              f"PPV(T,V,L)={b.ppv['Tran']:.3f},{b.ppv['Vert']:.3f},{b.ppv['Long']:.3f}  "
              f"MicL={b.mic_pspl}")
@@ -0,0 +1,67 @@
 """Try various nibble-level channel interleavings to find which one matches truth."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def s4(n):
    return n if n < 8 else n - 16
 def run_decoder(body, layout, skip, n_channels=4):
    """layout: function nibble_index -> channel_index. Returns list-of-lists per channel."""
    out = [[] for _ in range(n_channels)]
    cur = [0] * n_channels
    nibbles = []
    for byte in body[skip:]:
        nibbles.append((byte >> 4) & 0xF)
        nibbles.append(byte & 0xF)
    for i, n in enumerate(nibbles):
        ch = layout(i)
        cur[ch] += s4(n)
        out[ch].append(cur[ch])
    return out
 def cmp(pred, truth, n=24):
    n = min(n, len(pred), len(truth))
    return [(pred[i], truth[i]) for i in range(n)]
 def main():
    b = load_bundle("event-c")
    truth_T = [round(v * 200) for v in b.samples["Tran"]]
    truth_V = [round(v * 200) for v in b.samples["Vert"]]
    truth_L = [round(v * 200) for v in b.samples["Long"]]
    print(f"T truth[0:10]: {truth_T[:10]}")
    print(f"V truth[0:10]: {truth_V[:10]}")
    print(f"L truth[0:10]: {truth_L[:10]}")
    # Try several nibble->channel layouts (4 channels)
    layouts = {
        "interleaved TVLM (0,1,2,3,0,1,2,3,...)": lambda i: i % 4,
        "interleaved VLMT": lambda i: (i + 3) % 4,
        "interleaved LMTV": lambda i: (i + 2) % 4,
        "interleaved MTVL": lambda i: (i + 1) % 4,
        "byte-based TV LM TV LM (high T low V byte0; high L low M byte1)": lambda i: i % 4,
        # "chunks of 8 nibbles per channel": each channel gets 8 nibbles in a row
        "chunks-8 TVLM": lambda i: (i // 8) % 4,
        "chunks-16 TVLM": lambda i: (i // 16) % 4,
        # planar (full channel sequential)
        "planar T(0..N) V(N..2N) L(2N..3N) M(3N..4N)": None,  # special
    }
    for label, layout_fn in layouts.items():
        if layout_fn is None:
            continue
        for skip in (0, 4, 7, 8, 9, 11, 14):
            out = run_decoder(b.body, layout_fn, skip)
            # Check first 8 cumulative on each channel
            print(f"  skip={skip:2}  {label}")
            print(f"    T_cum[0:10]: {out[0][:10]}")
            print(f"    V_cum[0:10]: {out[1][:10]}")
            print(f"    L_cum[0:10]: {out[2][:10]}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,73 @@
 """Try decoding body as 4-bit signed nibble deltas, 4-channel round-robin."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 CHANNELS = ("Tran", "Vert", "Long", "MicL")
 def s4(n):
    """Sign-extend a 4-bit unsigned to int (0..7 → 0..7, 8..F → -8..-1)."""
    return n if n < 8 else n - 16
 def decode_nibbles(body: bytes, skip_bytes: int = 7, n_channels: int = 4):
    """Read body as 2 nibbles per byte; accumulate as deltas for n_channels round-robin."""
    out = [[] for _ in range(n_channels)]
    cur = [0] * n_channels
    ch = 0
    nibbles = []
    for byte in body[skip_bytes:]:
        nibbles.append((byte >> 4) & 0xF)
        nibbles.append(byte & 0xF)
    for n in nibbles:
        cur[ch] += s4(n)
        out[ch].append(cur[ch])
        ch = (ch + 1) % n_channels
    return out
 def cmp_to_truth(pred, truth, scale=16):
    """Compare predicted ints (in 16-count units) to truth (in 16-count units = txt * 200).
    Return (max_abs_err, mean_abs_err, n_compared).
    """
    n = min(len(pred), len(truth))
    errs = []
    for i in range(n):
        p = pred[i]
        t = truth[i]
        errs.append(abs(p - t))
    if not errs:
        return None
    return (max(errs), sum(errs) / len(errs), n)
 def main():
    for name in ("event-a", "event-c"):
        b = load_bundle(name)
        # Convert TXT samples (in/s) to 16-count units (multiply by 200, since 0.005 in/s = 1)
        # WAIT: 0.005 in/s = 16 ADC counts. 1 count = 0.000305 in/s.
        # So in 1-count units: count = txt * (1/0.0003052) ≈ txt * 3276.7
        # But TXT only has 0.005 resolution so equivalent to 16-count units = txt * 200.
        truth_in_16 = {ch: [round(v * 200) for v in b.samples[ch]] for ch in CHANNELS[:3]}
        # MicL is in dB, skip for now
        # Try decoder with skip_bytes = 7
        decoded = decode_nibbles(b.body, skip_bytes=7, n_channels=4)
        print(f"\n=== {name} ===")
        print(f"  body={len(b.body)}, nibbles={2*(len(b.body)-7)}, samples_per_ch={len(decoded[0])}")
        print(f"  truth samples per ch: {len(truth_in_16['Tran'])}")
        # Print first 24 of each
        for i, chan in enumerate(CHANNELS):
            pred_first = decoded[i][:24]
            if chan in truth_in_16:
                truth_first = truth_in_16[chan][:24]
                print(f"  {chan} pred: {pred_first}")
                print(f"  {chan} truth: {truth_first}")
            else:
                print(f"  {chan} pred: {pred_first}  (truth in dB, skipped)")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,44 @@
 """Walk the body assuming chunks delimited by 0x10 NN tags. Print each chunk's structure."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def walk(body: bytes, start_offset: int = 7, max_chunks: int = 30):
    """Find all positions where byte = 0x10 followed by a multiple-of-4 byte. Print chunks."""
    chunks = []
    i = start_offset
    while i < len(body) - 1:
        # Find next `10 NN` where NN is multiple of 4 (and not preceded by another 0x10 immediately, which would be data).
        if body[i] == 0x10 and (body[i+1] % 4 == 0):
            chunks.append(i)
        i += 1
    return chunks
 def main():
    for name in ("event-c", "event-d"):
        b = load_bundle(name)
        body = b.body
        positions = []
        i = 7  # skip 7-byte preamble
        while i < len(body) - 1:
            if body[i] == 0x10 and body[i+1] % 4 == 0 and body[i+1] > 0:
                positions.append(i)
                i += 2  # skip past tag
            else:
                i += 1
        print(f"\n=== {name} ===  body={len(body)}, total `10 NN` (NN%4==0, NN>0) tags: {len(positions)}")
        # Print first 20 chunks: show position, NN, gap to next tag
        for k in range(min(30, len(positions))):
            pos = positions[k]
            NN = body[pos + 1]
            next_pos = positions[k+1] if k+1 < len(positions) else len(body)
            gap = next_pos - pos
            data_bytes = body[pos+2 : next_pos]
            print(f"  chunk[{k:>3}] @ {pos:>5}  NN=0x{NN:02x} ({NN:>3}, NN/2={NN//2})  gap={gap:>3}  "
                  f"data={data_bytes[:24].hex(' ')}{'...' if len(data_bytes) > 24 else ''}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,50 @@
 """Deterministic chunk walker: each chunk = [10 NN][NN/2 bytes data][2 bytes trailer]."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def walk_chunks(body: bytes, start: int = 7):
    """Yield (offset, NN, data_bytes, trailer_bytes) tuples."""
    i = start
    while i + 1 < len(body):
        if body[i] != 0x10:
            break
        NN = body[i + 1]
        if NN == 0 or NN > 0x80 or NN % 4 != 0:
            break
        chunk_len = NN // 2 + 4
        if i + chunk_len > len(body):
            break
        data = bytes(body[i + 2 : i + 2 + NN // 2])
        trailer = bytes(body[i + 2 + NN // 2 : i + chunk_len])
        yield (i, NN, data, trailer)
        i += chunk_len
 def main():
    for name in ("event-c", "event-d", "event-a", "event-b"):
        b = load_bundle(name)
        body = b.body
        chunks = list(walk_chunks(body))
        print(f"\n=== {name} ===  body={len(body)}  N_samples={len(b.samples['Tran'])}")
        print(f"  chunks parsed: {len(chunks)}")
        if chunks:
            last = chunks[-1]
            end_of_walk = last[0] + last[1] // 2 + 4
            print(f"  walk ended at offset {end_of_walk} (= {len(body) - end_of_walk} bytes from end)")
            # Stats
            total_data_bytes = sum(len(c[2]) for c in chunks)
            print(f"  total data bytes: {total_data_bytes}, total nibbles: {2*total_data_bytes}")
            if name in ("event-c", "event-d"):
                ratio = (2 * total_data_bytes) / (len(b.samples['Tran']) * 4)
                print(f"  nibbles per (sample × channel): {ratio:.3f}")
            # Sum of trailer second-byte
            trailer_sums = [c[3][-1] if c[3] else None for c in chunks]
            print(f"  first 10 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[:10]]}")
            # Print last 10 chunks (likely transition to trailer)
            print(f"  last 10 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[-10:]]}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,51 @@
 """Walk chunks; auto-detect preamble length by finding first 10 NN."""
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def walk_chunks(body, start, max_NN=0x80):
    chunks = []
    i = start
    while i + 1 < len(body):
        if body[i] != 0x10:
            break
        NN = body[i + 1]
        if NN == 0 or NN > max_NN or NN % 4 != 0:
            break
        chunk_len = NN // 2 + 4
        if i + chunk_len > len(body):
            break
        data = bytes(body[i + 2 : i + 2 + NN // 2])
        trailer = bytes(body[i + 2 + NN // 2 : i + chunk_len])
        chunks.append((i, NN, data, trailer))
        i += chunk_len
    return chunks, i
 def find_first_chunk_start(body):
    """Locate first byte that begins a `10 NN` chunk (NN ∈ multiples of 4, 4..0x7C)."""
    for i in range(20):
        if body[i] == 0x10 and body[i + 1] % 4 == 0 and 0 < body[i + 1] <= 0x7C:
            return i
    return -1
 def main():
    for name in ("event-c", "event-d", "event-a", "event-b"):
        b = load_bundle(name)
        body = b.body
        start = find_first_chunk_start(body)
        chunks, end = walk_chunks(body, start)
        print(f"\n=== {name} ===  body={len(body)}  N_samples={len(b.samples['Tran'])}  start={start}")
        print(f"  chunks parsed: {len(chunks)}, walk ended at {end}")
        if chunks:
            print(f"  first 5 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[:5]]}")
            print(f"  last 5 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[-5:]]}")
            print(f"  bytes around end of walk: {body[end-4:end+12].hex(' ')}")
        else:
            print(f"  bytes at start: {body[start:start+16].hex(' ')}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,75 @@
 """
 Walker v4: alternate [10 NN] data chunks and [00 NN] (or other) marker tags.
 Hypothesis:
 - [10 NN]: data block, length NN/2 + 2 bytes (2-byte tag + NN/2 bytes data)
 - [00 NN]: 2-byte marker block (no data)
 - [20/30/40 NN]: special blocks with type-dependent length
 """
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 def walk(body, start):
    i = start
    blocks = []
    while i + 1 < len(body):
        t0 = body[i]
        t1 = body[i + 1]
        if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0x80:
            # data chunk: length NN/2 + 2
            length = t1 // 2 + 2
            blocks.append((i, "10", t1, bytes(body[i + 2 : i + length]), length))
            i += length
        elif t0 == 0x00 and t1 % 4 == 0:
            # 2-byte marker
            blocks.append((i, "00", t1, b"", 2))
            i += 2
        elif t0 == 0x20 and t1 % 4 == 0:
            # type 2 — try length 2+t1/2 (similar to 10) OR fixed
            length = t1 // 2 + 2
            blocks.append((i, "20", t1, bytes(body[i + 2 : i + length]), length))
            i += length
        elif t0 == 0x30 and t1 % 4 == 0:
            length = t1 // 2 + 2
            blocks.append((i, "30", t1, bytes(body[i + 2 : i + length]), length))
            i += length
        elif t0 == 0x40 and t1 == 0x02:
            # Special "footer transition" block — try fixed 22 bytes
            length = 22
            blocks.append((i, "40", t1, bytes(body[i + 2 : i + length]), length))
            i += length
        else:
            # Unknown tag — stop
            blocks.append((i, "??", t0, bytes(body[i:i+8]), 0))
            break
    return blocks, i
 def main():
    for name in ("event-c", "event-d", "event-a", "event-b"):
        b = load_bundle(name)
        body = b.body
        # Auto-detect start
        for s in range(15):
            if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0x80:
                start = s
                break
        else:
            start = 7
        blocks, end = walk(body, start)
        # Categorize
        from collections import Counter
        types = Counter(b[1] for b in blocks)
        print(f"\n=== {name} === body={len(body)} N={len(b.samples['Tran'])}  start={start}")
        print(f"  total blocks: {len(blocks)}, walk ended at {end}/{len(body)}")
        print(f"  type counts: {dict(types)}")
        # Print last 5 blocks
        print(f"  last 5 blocks: {[(bb[0], bb[1], bb[2]) for bb in blocks[-5:]]}")
        if end < len(body):
            print(f"  bytes at end: {body[end:end+24].hex(' ')}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,83 @@
 """
 Walker v5: flexible NN range and multiple block-type lengths.
 Hypothesis:
 - [10 NN]: 4-bit-delta data block, length = NN/2 + 2
 - [20 NN]: 8-bit-literal data block, length = NN + 2
 - [00 NN]: 2-byte marker (no payload)
 - [30 NN]: trailer/summary block, length = NN*4
 - [40 NN]: footer-marker block, fixed 22 bytes
 """
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 from collections import Counter
 def walk(body, start, max_blocks=10000):
    i = start
    blocks = []
    while i + 1 < len(body) and len(blocks) < max_blocks:
        t0 = body[i]
        t1 = body[i + 1]
        if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 // 2 + 2
            if i + length > len(body):
                break
            data = bytes(body[i + 2 : i + length])
            blocks.append((i, "10", t1, data, length))
            i += length
        elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 + 2
            if i + length > len(body):
                break
            data = bytes(body[i + 2 : i + length])
            blocks.append((i, "20", t1, data, length))
            i += length
        elif t0 == 0x00 and t1 % 4 == 0:
            # 2-byte marker
            blocks.append((i, "00", t1, b"", 2))
            i += 2
        elif t0 == 0x30 and t1 % 4 == 0:
            length = t1 * 4
            if i + length > len(body):
                break
            data = bytes(body[i + 2 : i + length])
            blocks.append((i, "30", t1, data, length))
            i += length
        elif t0 == 0x40 and t1 == 0x02:
            length = 22
            if i + length > len(body):
                break
            data = bytes(body[i + 2 : i + length])
            blocks.append((i, "40", t1, data, length))
            i += length
        else:
            blocks.append((i, "??", t0, bytes(body[i:i+8]), 0))
            break
    return blocks, i
 def main():
    for name in ("event-c", "event-d", "event-a", "event-b"):
        b = load_bundle(name)
        body = b.body
        for s in range(15):
            if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0xFC:
                start = s; break
        else:
            start = 7
        blocks, end = walk(body, start)
        types = Counter(bb[1] for bb in blocks)
        print(f"\n=== {name} === body={len(body)} N={len(b.samples['Tran'])}  start={start}")
        print(f"  total blocks: {len(blocks)}, walk ended at {end}/{len(body)}")
        print(f"  type counts: {dict(types)}")
        if blocks and blocks[-1][1] == "??":
            print(f"  stopped at byte: 0x{blocks[-1][2]:02x}, prev 5 blocks: {[(bb[0], bb[1], bb[2]) for bb in blocks[-6:-1]]}")
        # Sum payload sizes by type
        payload_sizes = {t: sum(len(bb[3]) for bb in blocks if bb[1] == t) for t in types}
        print(f"  payload bytes by type: {payload_sizes}")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,68 @@
 """
 Walker v6: handle 40 02 blocks correctly (length 20).
 Block formats:
 - [10 NN]: 4-bit nibble delta data, length = NN/2 + 2
 - [20 NN]: int8 literal data, length = NN + 2
 - [00 NN]: 2-byte marker
 - [30 NN]: trailer/summary block, length = NN*4
 - [40 02]: segment header, fixed length 20
 """
 import sys
 sys.path.insert(0, ".")
 from analysis.load_bundle import load_bundle
 from collections import Counter
 def walk(body, start, max_blocks=10000):
    i = start
    blocks = []
    while i + 1 < len(body) and len(blocks) < max_blocks:
        t0 = body[i]
        t1 = body[i + 1]
        if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 // 2 + 2
        elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 + 2
        elif t0 == 0x00 and t1 % 4 == 0:
            length = 2
        elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10:
            length = t1 * 4
        elif t0 == 0x40 and t1 == 0x02:
            length = 20
        else:
            blocks.append((i, "??", t0, bytes(body[i:i+8]), 0))
            break
        if i + length > len(body):
            break
        data = bytes(body[i + 2 : i + length])
        blocks.append((i, f"{t0:02x}", t1, data, length))
        i += length
    return blocks, i
 def main():
    for name in ("event-c", "event-d", "event-a", "event-b"):
        b = load_bundle(name)
        body = b.body
        for s in range(15):
            if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0xFC:
                start = s; break
        else:
            start = 7
        blocks, end = walk(body, start)
        types = Counter(bb[1] for bb in blocks)
        print(f"\n=== {name} === body={len(body)} N={len(b.samples['Tran'])}  start={start}")
        print(f"  total blocks: {len(blocks)}, walk ended at {end}/{len(body)}")
        print(f"  type counts: {dict(types)}")
        if blocks and blocks[-1][1] == "??":
            print(f"  stopped at byte: 0x{blocks[-1][2]:02x} at offset {blocks[-1][0]}")
            print(f"  prev 5 blocks: {[(bb[0], bb[1], bb[2]) for bb in blocks[-6:-1]]}")
            print(f"  bytes around stop: {body[end-4:end+24].hex(' ')}")
        # Sum
        payload_sizes = {t: sum(len(bb[3]) for bb in blocks if bb[1] == t) for t in types}
        print(f"  payload bytes by type: {payload_sizes}")
 if __name__ == "__main__":
    main()
@@ -860,127 +860,160 @@ MicL:  39 64 1D AA  =  0.0000875 psi
 ---
-#### 7.6.1 Blast / Waveform mode — ❌ NOT VERIFIED (retracted 2026-05-08)
+#### 7.6.1 Blast / Waveform mode — 🟡 STRUCTURAL FRAMING DECODED (2026-05-08)
-> ## ⚠️ RETRACTION (2026-05-08)
+> **Status (2026-05-08):** Block-level framing is solved and verified
 > against the 4-event May 8 2026 bundle (3 sec / 2 sec / 1 sec / 1 sec
 > events captured live from BE11529).  The per-byte mapping from block
 > data to ADC samples is **still open** — the previous int16 LE claim
 > is REFUTED (see history below).
 >
-> The "4-channel interleaved s16 LE, 8 bytes per sample-set" claim
+> The earlier "4-channel interleaved s16 LE, 8 bytes per sample-set"
-> below was **never actually validated**.  It got into this document
+> claim was never validated and was wrong.  No event in the project's
-> because the decoder built around that assumption produced full-scale
+> archive ever came close to ADC saturation, yet the int16 LE decoder
-> ±32K counts on every channel of the 4-2-26 capture, and the
+> consistently produced full-scale ±32K noise — that was the signature
-> ±32K-shaped output was misread as "the signal must have saturated."
+> of mis-aligned encoded data, not signal saturation.
 >
 > Cross-checking the BW-reported peaks proves the opposite:
 >
 > | Channel | BW PPV (in/s) | Expected ADC counts at 10 in/s FS |
 > |---|---|---|
 > | Tran | 0.420 | **1,376** |
 > | Vert | 3.870 | **12,686** |
 > | Long | 0.495 | **1,622** |
 >
 > None of these are anywhere near ±32K saturation.  No event in the
 > project's archive (across all captures from 1-2-26 onward) has
 > ever come close to saturation either.  Yet the decoder has
 > consistently produced ±32K-shaped noise on every event.  The right
 > conclusion is that the byte-to-sample interpretation has been wrong
 > the whole time, NOT that every event happened to saturate.
 >
 > What's actually known about the body bytes:
 >
 > - The byte distribution is heavily skewed (24% `0x00`, 10.5% `0x10`,
 >   plus high frequencies of `0x01 / 0x04 / 0x0F / 0xF0 / 0xF1`).  Lots
 >   of `10 XX` pairs.  Reading them as LE int16 produces uniform ±32K
 >   noise — the signature of mis-aligned or encoded data.
 > - The CHANGELOG note for v0.14.2 calls the body a "delta-encoded
 >   ADC stream" — that hint plus the byte distribution points toward
 >   a delta encoding with `0x10` as an escape marker, but no decoder
 >   has been worked out yet.
 > - The histogram-mode codec in §7.6.2 IS verified and decoded
 >   correctly (different format: 32-byte blocks with 9× int16 LE
 >   samples + metadata).  The same firmware emits both formats, so
 >   §7.6.2 may share encoding primitives with the waveform codec
 >   and is worth using as a structural hint when reverse-engineering.
 >
 > **Treat the spec below as a starting hypothesis to disprove, not
 > ground truth.**  The frame-layout pieces (STRT location, preamble,
 > chunk header) appear correct; the per-byte sample interpretation
 > is the open question.
-4-channel interleaved signed 16-bit little-endian, 8 bytes per sample-set:
+##### Body file layout
 A Blastware waveform-file body (the variable-length section between
 the 21-byte STRT record and the 26-byte file footer) is composed of
 **tagged variable-length blocks**, NOT raw int16 samples.
 ```
-[T_lo T_hi  V_lo V_hi  L_lo L_hi  M_lo M_hi]  × N sample-sets
+[preamble: 7 or 9 bytes]
 [stream of tagged blocks]
 [trailer: per-channel summary blocks]
 ```
- **T** = Transverse (Tran), **V** = Vertical (Vert), **L** = Longitudinal (Long), **M** = Microphone
+**Preamble:** starts with the 4-byte magic ``00 02 00 00``.  Single-shot
- Channel order follows the Blastware convention: Tran is always first (ch[0]).
+events have a 7-byte preamble; continuous events have a 9-byte preamble
- Encoding: signed int16 little-endian.  Full scale = ±32768 counts.
+(the 4 events in the May 8 2026 bundle split 2/2 between the two
- Sample rate: set by compliance config (typical: 1024 Hz for blast monitoring).
+lengths).  Bytes [4:9] of the preamble appear to encode initial
- Each A5 frame chunk carries a different number of waveform bytes.  Frame sizes
+per-channel state but the layout has not been pinned down — for some
-  are NOT multiples of 8, so naive concatenation scrambles channel assignments at
+events byte [4] equals truth Tran[0] in 16-count units (0.005 in/s
-  frame boundaries.  **Always track cumulative byte offset mod 8 to correct alignment.**
+LSB), but other channel-byte assignments don't fit consistently.
-**A5[0] frame layout:**
+##### Block tags (CONFIRMED 2026-05-08)
 Every block starts with a 2-byte tag.  Five tag types are confirmed:
 | Tag (hex) | Block type                          | On-wire length        |
 |-----------|-------------------------------------|-----------------------|
 | ``10 NN`` | Small-delta data block              | NN/2 + 2 bytes        |
 | ``20 NN`` | Literal data block (int8-shaped)    | NN + 2 bytes          |
 | ``00 NN`` | 2-byte marker between data blocks   | 2 bytes               |
 | ``30 NN`` | Trailer summary block               | NN × 4 bytes          |
 | ``40 02`` | Segment header                      | 20 bytes (fixed)      |
 NN is always a multiple of 4.  ``10 NN`` and ``20 NN`` data blocks
 alternate with ``00 NN`` markers — every ``10/20 NN`` block is
 followed by a ``00 NN`` marker before the next data block.
 ##### Segments
 The body is divided into ~16 SEGMENTS for a 1280-sample event (= 1
 segment per ~80 sample-sets), separated by ``40 02`` segment headers.
 A 3328-sample event has ~42 segments.
 The 18-byte ``40 02`` payload structure (CONFIRMED across all 4
 fixtures by inspecting the increment of bytes [8:12]):
 | Offset | Length | Field                                            |
 |--------|--------|--------------------------------------------------|
 | 0      | 4      | Anchor / channel state (open — see below)        |
 | 4      | 4      | Variable field (open)                            |
 | 8      | 4      | uint32 LE counter — increments by 1 per segment  |
 | 12     | 4      | Fixed pattern ``02 00 00 01``                    |
 | 16     | 2      | Variable tail                                    |
 The counter at bytes [8:12] starts in the 0x40s for a freshly-erased
 device and increments cleanly — useful as a structural sanity check.
 Examples from event-c (1 sec single-shot):
 ```
-db[7:]:   [11-byte header]  [21-byte STRT record]  [6-byte preamble]  [waveform ...]
+Segment header 1 (offset 235):
-STRT:     offset 11 in db[7:]
+  40 02 | 00 00 00 00 | 0a 4b 01 1e | 47 00 00 00 | 02 00 00 01 | 00 01
-           +0..3  b'STRT'     magic
+                                                  ^counter=0x47
-           +8..9  uint16 BE   total_samples  (full-record expected sample-set count)
+Segment header 2 (offset 523):
-          +16..17 uint16 BE   pretrig_samples (pre-trigger window, in sample-sets)
+  40 02 | ff fe ff fe | 13 f5 01 06 | 48 00 00 00 | 02 00 00 01 | 00 02
-          +18     uint8       rectime_seconds
+                                                  ^counter=0x48 (+1)
 preamble: +19..20 0x00 0x00   null padding
          +21..24 0xFF × 4    synchronisation sentinel
 Waveform: starts at strt_pos + 27 within db[7:]
 ```
-**A5[1..N] frame layout (non-metadata frames):**
+##### Trailer
-```
+The trailer (after the last segment's data) is a sequence of 32-byte
-db[7:]:   [8-byte per-frame header]  [waveform ...]
+``30 08`` blocks plus a final ``30 04`` / ``20 04`` / ``40 02`` summary
-Header:   [counter LE uint16, 0x00 × 6]  — frame sequence counter (0, 8, 12, 16, 20, …×0x400)
+ending in the constant 2-byte tail ``00 1A``.  These contain
-Waveform: starts at byte 8 of db[7:]
+per-channel statistics (peak times, peak values, mean offsets — bytes
-```
+in the form ``f3/f4/f5`` near ``20 10`` markers strongly resemble
 int8 channel-bias values around -12).  Detailed decoding of the
 trailer is outside the path needed for sample reconstruction.
-**Special frames:**
+##### What's still open
-| Frame index | Contents |
+- **The byte → sample mapping inside ``10 NN`` and ``20 NN`` blocks.**
  Tested hypotheses that did not match BW's ASCII export to within ±1
  ADC count:
  1. ``10 NN`` data = 4-bit signed nibble deltas, channel-interleaved,
     all 24 channel permutations × 2 nibble orders × 2 sign conventions
     × 2 init-from-header settings (= 96 combinations).  All produce
     values that diverge from truth after the first ~7 sample-sets.
  2. ``20 NN`` data = int8 absolute or delta samples for one channel.
     Magnitudes in observed blocks (peak ±34 in event-c at offset 351)
     do not match any channel's PPV at any plausible ADC quantization
     (1-count, 4-count, 8-count, 16-count).
  3. ``00 NN`` marker = "skip N sample-sets with zero deltas".  Sums
     of NN/4 across markers do not consistently match the 80
     sample-sets-per-segment count.
  The codec is more elaborate than uniform 4-bit deltas.  A hybrid
  variable-bit-width scheme (4-bit deltas in ``10 NN``, 8-bit deltas
  or absolutes in ``20 NN``, segment-header anchors after each
  ``40 02``) is the most plausible remaining hypothesis.
 - **The role of byte [4:9] of the preamble.**  Byte 4 == Tran[0]
  truth value (in 16-count units) for events a/b/d, but doesn't
  fit consistently for event-c.  Bytes [5:9] don't match a simple
  per-channel encoding.
 - **Walker correctness past offset ~427 in event-b.**  The walker
  bails out partway through event-b — there is at least one block
  whose length doesn't fit the lengths confirmed for the other
  three events.  Likely a ``20 NN`` with NN > 0xFC (currently
  rejected by the walker), or a different length formula in some
  context.
 ##### Recommended next step
 A capture with a known external waveform (calibration tone of known
 frequency and amplitude) would unlock the magnitude scaling and
 disambiguate which channel a ``20 NN`` block belongs to.  Multiple
 captures of the same signal at different ``geo_range`` settings
 (Normal 10 in/s vs Sensitive 1.25 in/s) would also pin down whether
 sample values are scaled at the codec layer or only at the BW
 display layer.
 ##### Reference module
 ``minimateplus/waveform_codec.py`` implements the verified block
 walker (:func:`walk_body`, :func:`split_segments`,
 :func:`parse_segment_header`).  ``decode_waveform_v2`` is a stub that
 returns ``None`` until a verified per-byte sample decoder is wired
 up; production code (``minimateplus/client.py``) continues to use
 the legacy int16 LE decoder, which produces wrong samples but stable
 output shape — keep the ``.h5`` sidecars marked as
 "sample-codec unverified" until the byte-to-sample mapping lands.
 ##### History (do not re-derive)
 | Date | Note |
 |---|---|
-| A5[0]  | Probe response: STRT record + first waveform chunk |
+| 2026-05-08 | Block tagging confirmed against the 4-event May 2026 bundle.  All bodies parse cleanly through `walk_body` for events a/c/d.  Event-b walks partway and stops at offset 427 (open issue). |
-| A5[7]  | Event-time metadata strings only (no waveform data) |
+| 2026-05-08 | Earlier "4-channel interleaved s16 LE" claim formally retracted — never validated, produced full-scale ±32K noise on every event because the bytes are encoded, not raw samples. |
-| A5[9]  | Terminator frame (page_key=0x0000) — ignored |
+| 2026-04-02 | "Frame 7 metadata", "Frame 9 terminator", and `0x0400`-step chunk-counter claims documented as-was; later proved to be artifacts of an over-reading 5A walk (now superseded by §7.8.5–7.8.7). |
 | A5[1..6,8] | Waveform chunks |
 **Confirmed from 4-2-26 blast capture (total_samples=9306, pretrig=298, rate=1024 Hz):**
 ```
 Frame  Waveform bytes  Cumulative  Align(mod 8)
 A5[0]       933B           933B        0
 A5[1]       963B          1896B        5
 A5[2]       946B          2842B        0
 A5[3]       960B          3802B        2
 A5[4]       952B          4754B        2
 A5[5]       946B          5700B        2
 A5[6]       941B          6641B        4
 A5[8]       992B          7633B        1
 Total:     7633B  → 954 naive sample-sets, 948 alignment-corrected
 ```
 Only 948 of 9306 sample-sets captured (10%) — `stop_after_metadata=True` terminated
 download after A5[7] was received.
 **Channel identification note:**  Channel ordering [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3]
 is the Blastware convention.  This ordering has not been independently verified end-to-end,
 since no decoder yet produces samples that match BW's own rendering of the same event (see
 the retraction at the top of §7.6.1).  Once the body codec is decoded, the per-channel PPV
 values from the 0C record (Tran=0.420, Vert=3.870, Long=0.495 in/s for the 4-2-26 capture)
 provide the cross-check that pins down channel order.
 > **Historical note:** earlier revisions of this section claimed the 4-2-26 blast had
 > "saturated all four channels to ~32000–32617 counts," citing that as evidence the s16 LE
 > interpretation was correct.  That claim was wrong — the ±32K values were the broken
 > decoder's output, not the actual signal amplitude (which the 0C peaks above show was
 > nowhere near saturation).  Retracted 2026-05-08.
 ---
@@ -0,0 +1,242 @@
 """
 waveform_codec.py — block-walker for the MiniMate Plus waveform body codec.
 PARTIAL REVERSE-ENGINEERING — 2026-05-08.
 Status: STRUCTURAL FRAMING confirmed; per-block sample interpretation OPEN.
 This module replaces the int16-LE assumption that produced full-scale ±32K
 noise on every event. The body is NOT raw int16 LE: it is a sequence of
 tagged variable-length blocks. The block framing is solved here. The
 mapping from block bytes to ADC samples is **NOT yet pinned down** — the
 work-in-progress decoder ``decode_waveform_v2`` returns ``None`` until
 a verified algorithm is wired in.
 Until ``decode_waveform_v2`` returns a verified result, callers that need
 sample data should keep relying on the legacy decoder in ``client.py``
 (known-broken, but at least stable in shape) and not consume this
 module's sample output.
 ────────────────────────────────────────────────────────────────────────────
 Body structure (CONFIRMED 2026-05-08 against decode-re/5-8-26 4-event bundle)
 ────────────────────────────────────────────────────────────────────────────
 The Blastware waveform-file body lives between bytes [22+21=43] and the
 26-byte file footer (``[: -26]``).  Layout:
    [preamble: 7 or 9 bytes]
    [data section: a stream of tagged blocks]
    [trailer: per-channel summary blocks]
 The preamble starts with the magic ``00 02 00 00``.  After that there is
 either 3 or 5 bytes of header before the first ``10 NN`` block tag — in
 the 4-event bundle, single-shot events have a 7-byte preamble and
 continuous events have 9.  The exact meaning of bytes [4:9] is open
 (empirically: byte [4] for event-a == truth Tran[0]; byte [4] for
 event-b == truth Tran[0]; events c/d = 0; treating it as a per-channel
 "initial value" partially matches but is inconsistent across events).
 Blocks have 2-byte tags and these confirmed lengths:
 | Tag (hex) | Block type                           | Total length    |
 |-----------|--------------------------------------|-----------------|
 | ``10 NN`` | Small-delta data block               | NN/2 + 2 bytes  |
 | ``20 NN`` | Literal data block (looks int8-ish)  | NN + 2 bytes    |
 | ``00 NN`` | 2-byte marker between data blocks    | 2 bytes         |
 | ``30 NN`` | Trailer summary block                | NN × 4 bytes    |
 | ``40 02`` | Segment header                       | 20 bytes        |
 In the 4-event bundle, every event's body parses as a clean sequence of
 these blocks all the way through the trailer (when the walker is given
 the right preamble length).  No "??" stops occur once the start offset
 is correct.
 Segments and the ``40 02`` header
 ────────────────────────────────────
 The body is divided into ~16 SEGMENTS, each separated by a ``40 02``
 header.  Each segment carries ~80 sample-sets (1280-sample event = 16
 segments × 80 sample-sets, 3328-sample event = ~42 segments).  The 18-byte
 ``40 02`` payload contains:
    bytes  0..3   4-byte channel anchor / state (varies per segment)
    bytes  4..7   4-byte field, varies (RMS/peak per channel?)
    bytes  8..11  4-byte uint32 LE counter (increments by 1 per segment;
                  starts at e.g. 0x47 for the first in-data segment)
    bytes 12..15  4-byte fixed pattern: 02 00 00 01
    bytes 16..17  2-byte segment-relative payload counter
 The counter at bytes [8..11] increments cleanly across segments — useful
 as a sanity check.  The role of bytes [0..3] (anchor candidates) and
 [4..7] is not pinned down: simple "channel state at segment boundary"
 hypotheses do NOT match truth across all four sample bundles tested.
 What's open
 ────────────
 The mapping ``block bytes → ADC samples`` is the open question.  Tested
 hypotheses that did **not** match BW's ASCII export to within the
 required ±1 ADC count:
 1. ``10 NN`` data = 4-bit signed nibble deltas, channel-interleaved
   (TVLM/VTLM/LMTV/all 24 permutations × 2 nibble orders × 2 sign
   conventions = 96 combinations tested).  All produce values that
   diverge from truth after the first ~7 sample-sets.
 2. ``20 NN`` data = int8 absolute samples for one channel.  Magnitudes
   in observed blocks (peak ~±34 in the smoothest event-c block at
   offset 351) do not match any channel's PPV at any plausible
   ADC-count quantization (1-count, 4-count, 8-count, 16-count).
 3. ``00 NN`` marker = "skip N sample-sets".  Sums of NN/4 across markers
   do not match 80 sample-sets per segment.
 4. Concatenating ALL ``10 NN`` payload bytes and reading as a continuous
   nibble stream (TVLM round-robin) produces the same 96-combination
   problem as (1).
 The most promising lead — that ``20 NN`` blocks carry literal int8
 sample-sequences for the largest-amplitude channel within a segment —
 is consistent with the smooth waveform shape of those payloads, but
 the magnitude scaling has not been pinned down.  It's possible that
 ``10 NN`` and ``20 NN`` blocks carry different bit-widths of the same
 channel-interleaved delta stream (variable-width like Rice coding)
 with 4-bit deltas as default and 8-bit deltas as escape.
 Potential next steps for whoever picks this up:
 - Capture an event with a KNOWN external waveform (e.g. a calibration
  signal of known frequency/amplitude) so the truth is unambiguous and
  the magnitude scaling is unambiguous.
 - Capture multiple events with the SAME signal but DIFFERENT geo_range
  (Normal 10 in/s vs Sensitive 1.25 in/s) to disambiguate scaling.
 - Examine sequential 0x10 segment headers for a single event — the
  4-byte "anchor" should reflect cumulative sample state at the
  boundary; matching it to truth at that sample index would unlock
  the per-segment delta decode.
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import List, Optional, Tuple
@dataclass
 class WaveformBlock:
    """One tagged block parsed out of a Blastware waveform-file body."""
    offset: int      # byte offset into body
    tag_hi: int      # first tag byte (0x10 / 0x20 / 0x00 / 0x30 / 0x40)
    tag_lo: int      # second tag byte (NN)
    data: bytes      # block payload (excludes the 2-byte tag)
    length: int      # total block length on the wire (includes the tag)
    @property
    def kind(self) -> str:
        return f"{self.tag_hi:02x} {self.tag_lo:02x}"
 def find_data_start(body: bytes) -> int:
    """Auto-detect the offset of the first ``10 NN`` block."""
    for i in range(min(20, len(body) - 1)):
        if body[i] == 0x10 and body[i + 1] % 4 == 0 and 0 < body[i + 1] <= 0xFC:
            return i
    return -1
 def walk_body(body: bytes, start: Optional[int] = None) -> List[WaveformBlock]:
    """Walk the tagged-block sequence starting at *start* (auto-detected by default).
    Stops when an unrecognized tag is encountered or end of body is reached.
    Returned blocks are in stream order.
    """
    if start is None:
        start = find_data_start(body)
        if start < 0:
            return []
    blocks: List[WaveformBlock] = []
    i = start
    while i + 1 < len(body):
        t0 = body[i]
        t1 = body[i + 1]
        if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 // 2 + 2
        elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
            length = t1 + 2
        elif t0 == 0x00 and t1 % 4 == 0:
            length = 2
        elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10:
            length = t1 * 4
        elif t0 == 0x40 and t1 == 0x02:
            length = 20
        else:
            # Unknown tag; stop.  Caller can inspect ``i`` to see where.
            break
        if i + length > len(body):
            break
        data = bytes(body[i + 2 : i + length])
        blocks.append(WaveformBlock(offset=i, tag_hi=t0, tag_lo=t1, data=data, length=length))
        i += length
    return blocks
 def split_segments(blocks: List[WaveformBlock]) -> List[List[WaveformBlock]]:
    """Group consecutive blocks into segments separated by ``40 02`` headers.
    The first segment is whatever runs before the first ``40 02`` header
    (typically the "segment 0" preamble data after the body preamble).
    Subsequent segments start with a ``40 02`` block, then have their
    own data blocks until the next ``40 02``.
    """
    segments: List[List[WaveformBlock]] = []
    current: List[WaveformBlock] = []
    for b in blocks:
        if b.tag_hi == 0x40 and b.tag_lo == 0x02:
            if current:
                segments.append(current)
            current = [b]
        else:
            current.append(b)
    if current:
        segments.append(current)
    return segments
 def parse_segment_header(block: WaveformBlock) -> Optional[dict]:
    """Decode the 18-byte payload of a ``40 02`` segment header.
    Returns a dict with the labelled fields, or None if *block* is not
    a ``40 02`` header.
    """
    if not (block.tag_hi == 0x40 and block.tag_lo == 0x02):
        return None
    if len(block.data) < 18:
        return None
    p = block.data
    counter = int.from_bytes(p[8:12], "little", signed=False)
    return {
        "anchor_bytes": p[0:4],          # 4-byte field, role unconfirmed
        "field2": p[4:8],                # 4-byte field, role unconfirmed
        "counter": counter,              # uint32 LE — increments by 1 per segment
        "fixed_pattern": p[12:16],       # always b"\x02\x00\x00\x01"
        "tail": p[16:18],                # last 2 bytes
    }
 def decode_waveform_v2(body: bytes) -> Optional[dict]:
    """
    Decode the body into per-channel sample arrays.
    Returns a dict ``{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}``
    when a verified decoder is wired up; returns ``None`` otherwise.
    Currently returns ``None`` because the byte-to-sample mapping is OPEN.
    The block framing in :func:`walk_body` is verified — callers can use
    that to inspect block-level structure without claiming the per-byte
    interpretation.
    """
    return None
@@ -0,0 +1,252 @@
 """
 Tests for minimateplus.waveform_codec — Blastware waveform-file body block walker.
 These tests lock in the STRUCTURAL framing of the body codec.  The byte-to-sample
 mapping is open (see waveform_codec module docstring) — until that's nailed down,
 :func:`decode_waveform_v2` returns ``None`` and there is no per-sample assertion
 to make.
 """
 from __future__ import annotations
 import os
 import pytest
 from minimateplus.waveform_codec import (
    WaveformBlock,
    find_data_start,
    parse_segment_header,
    split_segments,
    walk_body,
    decode_waveform_v2,
 )
 FIXTURES = os.path.join(
    os.path.dirname(__file__), "fixtures", "decode-re-5-8-26"
 )
 def _bw_body(path):
    """Strip the 22-byte header and 21-byte STRT and 26-byte footer to get the body."""
    with open(path, "rb") as f:
        binary = f.read()
    return binary[43:-26]
 # Fixture metadata — bundled BW binaries from a real BE11529 unit, May 8 2026.
 # Each is paired with a Blastware TXT export (the ASCII ground truth).
 FIXTURES_INFO = {
    "event-a": {
        "filename": "M529LKVQ.6S0",
        "n_samples": 3328,    # 3.0 s rectime + 0.25 s pretrig at 1024 sps
        "rectime": 3.0,
    },
    "event-b": {
        "filename": "M529LK5Q.RG0",
        "n_samples": 2304,    # 2.0 s
        "rectime": 2.0,
    },
    "event-c": {
        "filename": "M529LK44.AB0",
        "n_samples": 1280,    # 1.0 s
        "rectime": 1.0,
    },
    "event-d": {
        "filename": "M529LK2V.470",
        "n_samples": 1280,
        "rectime": 1.0,
    },
 }
 def _fixture_path(event_name):
    info = FIXTURES_INFO[event_name]
    return os.path.join(FIXTURES, event_name, info["filename"])
 # ── Find data start ──────────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
 def test_find_data_start_locates_first_block(event_name):
    """The walker auto-detects the first ``10 NN`` tag within the first 20 bytes."""
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
    start = find_data_start(body)
    assert 0 <= start < 20, f"expected start in [0, 20), got {start}"
    assert body[start] == 0x10
    assert body[start + 1] % 4 == 0
    assert 0 < body[start + 1] <= 0xFC
 def test_find_data_start_preamble_lengths():
    """All 4 events have either a 7-byte (single-shot) or 9-byte (continuous) preamble."""
    starts = {}
    for name in FIXTURES_INFO:
        path = _fixture_path(name)
        if not os.path.exists(path):
            pytest.skip(f"fixture missing: {path}")
        body = _bw_body(path)
        starts[name] = find_data_start(body)
    # Empirically: events a, b have 9-byte preamble; events c, d have 7-byte.
    assert starts["event-a"] == 9
    assert starts["event-b"] == 9
    assert starts["event-c"] == 7
    assert starts["event-d"] == 7
 # ── Block walker ─────────────────────────────────────────────────────────────
 def test_walk_body_empty_returns_empty():
    assert walk_body(b"") == []
 def test_walk_body_invalid_start_returns_empty():
    # Body that does not begin with a recognized tag.
    assert walk_body(b"\xff\xff\xff\xff", start=0) == []
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
 def test_walk_body_produces_blocks(event_name):
    """The walker should produce a non-empty stream of blocks for every fixture."""
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
    blocks = walk_body(body)
    assert len(blocks) > 0
    # All blocks have one of the 5 known tag types.
    for b in blocks:
        assert b.tag_hi in (0x10, 0x20, 0x00, 0x30, 0x40), (
            f"unknown tag {b.tag_hi:#04x} at offset {b.offset}"
        )
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
 def test_walk_body_block_lengths_consistent(event_name):
    """Each block's recorded length matches its on-wire footprint."""
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
    blocks = walk_body(body)
    for b in blocks:
        # Tag (2 bytes) + payload should equal length.
        assert 2 + len(b.data) == b.length, (
            f"block at {b.offset} length mismatch: tag(2) + data({len(b.data)}) != length({b.length})"
        )
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
 def test_walk_body_blocks_contiguous(event_name):
    """Block n+1 starts exactly where block n ends (no gaps, no overlaps)."""
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
    blocks = walk_body(body)
    for i in range(1, len(blocks)):
        prev = blocks[i - 1]
        cur = blocks[i]
        assert cur.offset == prev.offset + prev.length, (
            f"gap/overlap between block {i-1} (off={prev.offset} len={prev.length}) "
            f"and block {i} (off={cur.offset})"
        )
 # ── Segment splitting ────────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
 def test_split_segments_yields_at_least_one(event_name):
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
    blocks = walk_body(body)
    segments = split_segments(blocks)
    assert len(segments) > 0
 def test_split_segments_segment_count_at_least_one_per_event():
    """The walker should produce at least one ``40 02`` segment header per event.
    Note: the walker currently bails out partway through event-b (still an
    open issue — the body codec uses block lengths the walker doesn't
    handle correctly past offset ~427).  The other 3 events walk farther
    and have many segment headers.
    """
    for name in FIXTURES_INFO:
        path = _fixture_path(name)
        if not os.path.exists(path):
            continue
        body = _bw_body(path)
        blocks = walk_body(body)
        n_40 = sum(1 for b in blocks if b.tag_hi == 0x40)
        assert n_40 >= 1, f"{name}: no 40 02 segment header found"
 # ── Segment header parsing ───────────────────────────────────────────────────
 def test_parse_segment_header_returns_none_for_non_40():
    block = WaveformBlock(offset=0, tag_hi=0x10, tag_lo=0x04, data=b"\x00\x00", length=4)
    assert parse_segment_header(block) is None
 def test_parse_segment_header_decodes_fields():
    """Decode a known 40 02 block to verify field offsets."""
    # First segment header from event-c at body offset 235:
    # 40 02 00 00 00 00 0a 4b 01 1e 47 00 00 00 02 00 00 01 00 01
    payload = bytes.fromhex("00000000 0a4b011e 47000000 02000001 0001".replace(" ", ""))
    block = WaveformBlock(
        offset=235, tag_hi=0x40, tag_lo=0x02, data=payload, length=20
    )
    decoded = parse_segment_header(block)
    assert decoded is not None
    assert decoded["counter"] == 0x47       # uint32 LE
    assert decoded["fixed_pattern"] == b"\x02\x00\x00\x01"
    assert decoded["anchor_bytes"] == b"\x00\x00\x00\x00"
 def test_segment_counter_increments():
    """The 4-byte counter at bytes [8:12] of each 40 02 payload increments by 1."""
    path = _fixture_path("event-c")
    if not os.path.exists(path):
        pytest.skip("fixture missing")
    body = _bw_body(path)
    blocks = walk_body(body)
    headers = [b for b in blocks if b.tag_hi == 0x40 and b.tag_lo == 0x02]
    counters = [parse_segment_header(b)["counter"] for b in headers]
    assert len(counters) >= 5, "expect at least 5 segments to verify increments"
    # First few counters should be strictly monotonic (the BW counter is global,
    # incrementing across the whole flash buffer; some events may share counter
    # values with the previous event's tail block, so allow non-strict).
    for i in range(1, min(8, len(counters))):
        assert counters[i] >= counters[i - 1], (
            f"counter went backwards: {counters[i-1]} → {counters[i]}"
        )
 # ── decode_waveform_v2: currently a stub ─────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
 def test_decode_waveform_v2_returns_none_until_verified(event_name):
    """
    The verified per-byte sample decoder is not yet wired up.
    This test ensures decode_waveform_v2 returns ``None`` so callers know
    to keep using the legacy decoder.  When a verified decoder lands,
    flip this assertion and add ground-truth tests against the bundled
    TXT exports.
    """
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
    assert decode_waveform_v2(body) is None