codec-re: solve waveform body block framing; per-byte sample mapping still open

Decoded the structural framing of the Blastware waveform body — the bytes
between the 21-byte STRT record and the 26-byte file footer.  The body is
a sequence of tagged variable-length blocks, NOT raw int16 LE.  Five tag
types (10/20/00/30/40 NN) and their lengths are now confirmed against the
4-event May 2026 fixture bundle.  Body splits cleanly into ~16 segments
(for a 1280-sample event) separated by 40 02 segment headers carrying a
monotonically incrementing uint32 LE counter at bytes [8:12].

What's done:
- minimateplus/waveform_codec.py — block walker, segment splitter, segment
  header parser.  decode_waveform_v2 is a stub returning None until the
  byte-to-sample mapping is solved; client.py is unchanged.
- tests/test_waveform_codec.py — 31 tests covering block detection, lengths,
  contiguous-walk, segment splitting, segment-header parsing, and counter
  monotonicity.  All pass.
- tests/fixtures/decode-re-5-8-26/ — bundled fixtures (4 events, BW binary
  + Blastware ASCII export each).
- docs/instantel_protocol_reference.md §7.6.1 — replaced retraction box
  with the verified structural decoding plus an explicit list of what's
  still open.

What's still open: the per-byte mapping inside 10 NN / 20 NN blocks.  96
channel-permutation × nibble-order × sign-convention combinations were
brute-force tested; none match BW's ASCII export to within ±1 ADC count.
The codec is more elaborate than uniform 4-bit deltas — likely a hybrid
variable-bit-width scheme with segment-anchor resync points.  Next
recommended step: capture an event with a known calibration tone to pin
down magnitude scaling.

Walker also bails out partway through event-b (open issue documented in
both the module and the protocol reference).
This commit is contained in:
Claude
2026-05-08 20:44:37 +00:00
committed by serversdown
parent 7bd0f8badf
commit d3f77d1d96
29 changed files with 10102 additions and 105 deletions
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
+252
View File
@@ -0,0 +1,252 @@
"""
Tests for minimateplus.waveform_codec — Blastware waveform-file body block walker.
These tests lock in the STRUCTURAL framing of the body codec. The byte-to-sample
mapping is open (see waveform_codec module docstring) — until that's nailed down,
:func:`decode_waveform_v2` returns ``None`` and there is no per-sample assertion
to make.
"""
from __future__ import annotations
import os
import pytest
from minimateplus.waveform_codec import (
WaveformBlock,
find_data_start,
parse_segment_header,
split_segments,
walk_body,
decode_waveform_v2,
)
FIXTURES = os.path.join(
os.path.dirname(__file__), "fixtures", "decode-re-5-8-26"
)
def _bw_body(path):
"""Strip the 22-byte header and 21-byte STRT and 26-byte footer to get the body."""
with open(path, "rb") as f:
binary = f.read()
return binary[43:-26]
# Fixture metadata — bundled BW binaries from a real BE11529 unit, May 8 2026.
# Each is paired with a Blastware TXT export (the ASCII ground truth).
FIXTURES_INFO = {
"event-a": {
"filename": "M529LKVQ.6S0",
"n_samples": 3328, # 3.0 s rectime + 0.25 s pretrig at 1024 sps
"rectime": 3.0,
},
"event-b": {
"filename": "M529LK5Q.RG0",
"n_samples": 2304, # 2.0 s
"rectime": 2.0,
},
"event-c": {
"filename": "M529LK44.AB0",
"n_samples": 1280, # 1.0 s
"rectime": 1.0,
},
"event-d": {
"filename": "M529LK2V.470",
"n_samples": 1280,
"rectime": 1.0,
},
}
def _fixture_path(event_name):
info = FIXTURES_INFO[event_name]
return os.path.join(FIXTURES, event_name, info["filename"])
# ── Find data start ──────────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_find_data_start_locates_first_block(event_name):
"""The walker auto-detects the first ``10 NN`` tag within the first 20 bytes."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
start = find_data_start(body)
assert 0 <= start < 20, f"expected start in [0, 20), got {start}"
assert body[start] == 0x10
assert body[start + 1] % 4 == 0
assert 0 < body[start + 1] <= 0xFC
def test_find_data_start_preamble_lengths():
"""All 4 events have either a 7-byte (single-shot) or 9-byte (continuous) preamble."""
starts = {}
for name in FIXTURES_INFO:
path = _fixture_path(name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
starts[name] = find_data_start(body)
# Empirically: events a, b have 9-byte preamble; events c, d have 7-byte.
assert starts["event-a"] == 9
assert starts["event-b"] == 9
assert starts["event-c"] == 7
assert starts["event-d"] == 7
# ── Block walker ─────────────────────────────────────────────────────────────
def test_walk_body_empty_returns_empty():
assert walk_body(b"") == []
def test_walk_body_invalid_start_returns_empty():
# Body that does not begin with a recognized tag.
assert walk_body(b"\xff\xff\xff\xff", start=0) == []
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_walk_body_produces_blocks(event_name):
"""The walker should produce a non-empty stream of blocks for every fixture."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
assert len(blocks) > 0
# All blocks have one of the 5 known tag types.
for b in blocks:
assert b.tag_hi in (0x10, 0x20, 0x00, 0x30, 0x40), (
f"unknown tag {b.tag_hi:#04x} at offset {b.offset}"
)
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_walk_body_block_lengths_consistent(event_name):
"""Each block's recorded length matches its on-wire footprint."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
for b in blocks:
# Tag (2 bytes) + payload should equal length.
assert 2 + len(b.data) == b.length, (
f"block at {b.offset} length mismatch: tag(2) + data({len(b.data)}) != length({b.length})"
)
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_walk_body_blocks_contiguous(event_name):
"""Block n+1 starts exactly where block n ends (no gaps, no overlaps)."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
for i in range(1, len(blocks)):
prev = blocks[i - 1]
cur = blocks[i]
assert cur.offset == prev.offset + prev.length, (
f"gap/overlap between block {i-1} (off={prev.offset} len={prev.length}) "
f"and block {i} (off={cur.offset})"
)
# ── Segment splitting ────────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_split_segments_yields_at_least_one(event_name):
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
segments = split_segments(blocks)
assert len(segments) > 0
def test_split_segments_segment_count_at_least_one_per_event():
"""The walker should produce at least one ``40 02`` segment header per event.
Note: the walker currently bails out partway through event-b (still an
open issue — the body codec uses block lengths the walker doesn't
handle correctly past offset ~427). The other 3 events walk farther
and have many segment headers.
"""
for name in FIXTURES_INFO:
path = _fixture_path(name)
if not os.path.exists(path):
continue
body = _bw_body(path)
blocks = walk_body(body)
n_40 = sum(1 for b in blocks if b.tag_hi == 0x40)
assert n_40 >= 1, f"{name}: no 40 02 segment header found"
# ── Segment header parsing ───────────────────────────────────────────────────
def test_parse_segment_header_returns_none_for_non_40():
block = WaveformBlock(offset=0, tag_hi=0x10, tag_lo=0x04, data=b"\x00\x00", length=4)
assert parse_segment_header(block) is None
def test_parse_segment_header_decodes_fields():
"""Decode a known 40 02 block to verify field offsets."""
# First segment header from event-c at body offset 235:
# 40 02 00 00 00 00 0a 4b 01 1e 47 00 00 00 02 00 00 01 00 01
payload = bytes.fromhex("00000000 0a4b011e 47000000 02000001 0001".replace(" ", ""))
block = WaveformBlock(
offset=235, tag_hi=0x40, tag_lo=0x02, data=payload, length=20
)
decoded = parse_segment_header(block)
assert decoded is not None
assert decoded["counter"] == 0x47 # uint32 LE
assert decoded["fixed_pattern"] == b"\x02\x00\x00\x01"
assert decoded["anchor_bytes"] == b"\x00\x00\x00\x00"
def test_segment_counter_increments():
"""The 4-byte counter at bytes [8:12] of each 40 02 payload increments by 1."""
path = _fixture_path("event-c")
if not os.path.exists(path):
pytest.skip("fixture missing")
body = _bw_body(path)
blocks = walk_body(body)
headers = [b for b in blocks if b.tag_hi == 0x40 and b.tag_lo == 0x02]
counters = [parse_segment_header(b)["counter"] for b in headers]
assert len(counters) >= 5, "expect at least 5 segments to verify increments"
# First few counters should be strictly monotonic (the BW counter is global,
# incrementing across the whole flash buffer; some events may share counter
# values with the previous event's tail block, so allow non-strict).
for i in range(1, min(8, len(counters))):
assert counters[i] >= counters[i - 1], (
f"counter went backwards: {counters[i-1]}{counters[i]}"
)
# ── decode_waveform_v2: currently a stub ─────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_decode_waveform_v2_returns_none_until_verified(event_name):
"""
The verified per-byte sample decoder is not yet wired up.
This test ensures decode_waveform_v2 returns ``None`` so callers know
to keep using the legacy decoder. When a verified decoder lands,
flip this assertion and add ground-truth tests against the bundled
TXT exports.
"""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
assert decode_waveform_v2(body) is None