codec-re: solve waveform body block framing; per-byte sample mapping still open

Decoded the structural framing of the Blastware waveform body — the bytes
between the 21-byte STRT record and the 26-byte file footer.  The body is
a sequence of tagged variable-length blocks, NOT raw int16 LE.  Five tag
types (10/20/00/30/40 NN) and their lengths are now confirmed against the
4-event May 2026 fixture bundle.  Body splits cleanly into ~16 segments
(for a 1280-sample event) separated by 40 02 segment headers carrying a
monotonically incrementing uint32 LE counter at bytes [8:12].

What's done:
- minimateplus/waveform_codec.py — block walker, segment splitter, segment
  header parser.  decode_waveform_v2 is a stub returning None until the
  byte-to-sample mapping is solved; client.py is unchanged.
- tests/test_waveform_codec.py — 31 tests covering block detection, lengths,
  contiguous-walk, segment splitting, segment-header parsing, and counter
  monotonicity.  All pass.
- tests/fixtures/decode-re-5-8-26/ — bundled fixtures (4 events, BW binary
  + Blastware ASCII export each).
- docs/instantel_protocol_reference.md §7.6.1 — replaced retraction box
  with the verified structural decoding plus an explicit list of what's
  still open.

What's still open: the per-byte mapping inside 10 NN / 20 NN blocks.  96
channel-permutation × nibble-order × sign-convention combinations were
brute-force tested; none match BW's ASCII export to within ±1 ADC count.
The codec is more elaborate than uniform 4-bit deltas — likely a hybrid
variable-bit-width scheme with segment-anchor resync points.  Next
recommended step: capture an event with a known calibration tone to pin
down magnitude scaling.

Walker also bails out partway through event-b (open issue documented in
both the module and the protocol reference).
This commit is contained in:
Claude
2026-05-08 20:44:37 +00:00
committed by serversdown
parent 7bd0f8badf
commit d3f77d1d96
29 changed files with 10102 additions and 105 deletions
+142
View File
@@ -0,0 +1,142 @@
"""
load_bundle.py — extract body bytes from BW binary + parse sample columns from TXT.
Used by the codec reverse-engineering scripts in this directory.
"""
from __future__ import annotations
import os
import re
from dataclasses import dataclass
BUNDLE_ROOT = os.path.join(os.path.dirname(__file__), "..", "decode-re", "5-8-26")
@dataclass
class Bundle:
name: str
bin_path: str
txt_path: str
bin: bytes
body: bytes # bytes between STRT (43) and footer (last 26)
strt: bytes # 21-byte STRT record
samples: dict # {"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}
sample_rate: int
rectime_sec: float
pretrig_sec: float
geo_range_ips: float
ppv: dict # {"Tran": float, "Vert": float, "Long": float}
mic_pspl: float
serial: str
def _parse_txt(path: str) -> dict:
with open(path, "r", encoding="utf-8", errors="replace") as f:
text = f.read()
meta = {}
samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
# Find header line that starts the columns ("Tran Vert Long MicL").
# Then every line after is sample data (4 tab-separated floats).
lines = text.splitlines()
header_idx = None
for i, line in enumerate(lines):
if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
# The columns header. Sample lines start a few lines later.
header_idx = i
break
if header_idx is None:
raise ValueError(f"no Tran/Vert/Long/MicL header in {path}")
# Parse meta — quoted lines with "Field : value"
for line in lines[:header_idx]:
m = re.match(r'^"([^"]+)\s*:\s*([^"]*)"', line.strip())
if m:
k, v = m.group(1).strip(), m.group(2).strip()
meta[k] = v
# Parse samples
for line in lines[header_idx + 1 :]:
line = line.strip()
if not line:
continue
parts = re.split(r"\s+", line)
if len(parts) < 4:
continue
try:
t = float(parts[0])
v = float(parts[1])
l = float(parts[2])
m = float(parts[3])
except ValueError:
continue
samples["Tran"].append(t)
samples["Vert"].append(v)
samples["Long"].append(l)
samples["MicL"].append(m)
return meta, samples
def load_bundle(name: str) -> Bundle:
folder = os.path.join(BUNDLE_ROOT, name)
files = os.listdir(folder)
bin_name = next(f for f in files if not f.endswith(".TXT"))
txt_name = next(f for f in files if f.endswith(".TXT"))
bin_path = os.path.join(folder, bin_name)
txt_path = os.path.join(folder, txt_name)
with open(bin_path, "rb") as f:
binary = f.read()
# Header is 22 bytes; STRT at [22:43]; footer at last 26 bytes.
strt = binary[22:43]
body = binary[43:-26]
meta, samples = _parse_txt(txt_path)
sample_rate = int(re.search(r"(\d+)", meta.get("Sample Rate", "1024")).group(1))
rectime_sec = float(re.search(r"([\d.]+)", meta.get("Record Time", "3.0")).group(1))
pretrig_sec = float(re.search(r"-?[\d.]+", meta.get("Pre-trigger Length", "0")).group(0))
geo_range_ips = float(re.search(r"([\d.]+)", meta.get("Geo Range", "10.0")).group(1))
serial = meta.get("Serial Number", "").strip()
def _f(s):
return float(re.search(r"-?[\d.]+", s).group(0))
ppv = {
"Tran": _f(meta.get("Tran PPV", "0")),
"Vert": _f(meta.get("Vert PPV", "0")),
"Long": _f(meta.get("Long PPV", "0")),
}
mic_pspl = _f(meta.get("MicL PSPL", "0"))
return Bundle(
name=name,
bin_path=bin_path,
txt_path=txt_path,
bin=binary,
body=body,
strt=strt,
samples=samples,
sample_rate=sample_rate,
rectime_sec=rectime_sec,
pretrig_sec=pretrig_sec,
geo_range_ips=geo_range_ips,
ppv=ppv,
mic_pspl=mic_pspl,
serial=serial,
)
if __name__ == "__main__":
for name in ("event-a", "event-b", "event-c", "event-d"):
b = load_bundle(name)
n = len(b.samples["Tran"])
print(f"{name}: body={len(b.body):>6} N_samples={n} rate={b.sample_rate} "
f"rectime={b.rectime_sec} pretrig={b.pretrig_sec} range={b.geo_range_ips} "
f"PPV(T,V,L)={b.ppv['Tran']:.3f},{b.ppv['Vert']:.3f},{b.ppv['Long']:.3f} "
f"MicL={b.mic_pspl}")