merge full s3 codec decoded #23

Merged
serversdown merged 18 commits from codec-re into main 2026-05-20 13:45:33 -04:00
29 changed files with 10102 additions and 105 deletions
Showing only changes of commit d3f77d1d96 - Show all commits
+93
View File
@@ -0,0 +1,93 @@
"""Brute-force test channel permutations / nibble orders on event-d (simplest signal)."""
import sys
import itertools
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
from minimateplus.waveform_codec import walk_body
def s4(n):
return n if n < 8 else n - 16
def decode(body, channel_perm, nibble_order, sign_mode, init_from_header):
"""Try one decoder configuration on event-d. Returns first 8 cumulative samples per channel."""
blocks = walk_body(body)
# Initial values from bytes [4:7] if init_from_header else 0
if init_from_header:
init = [body[4] if body[4] < 128 else body[4] - 256,
body[5] if body[5] < 128 else body[5] - 256,
body[6] if body[6] < 128 else body[6] - 256,
0]
else:
init = [0, 0, 0, 0]
cur = list(init)
out = [[init[0]], [init[1]], [init[2]], [init[3]]] # sample 0 = init
nibble_idx = 0 # within delta stream; channel = channel_perm[nibble_idx % 4]
# Walk only the 10 NN data blocks
for blk in blocks:
if blk.tag_hi != 0x10:
continue
for byte in blk.data:
if nibble_order == 'high_first':
nib1, nib2 = (byte >> 4) & 0xF, byte & 0xF
else:
nib1, nib2 = byte & 0xF, (byte >> 4) & 0xF
for nib in (nib1, nib2):
if sign_mode == 'signed':
delta = s4(nib)
else:
delta = nib
ch = channel_perm[nibble_idx % 4]
cur[ch] += delta
if (nibble_idx + 1) % 4 == 0:
out[0].append(cur[0])
out[1].append(cur[1])
out[2].append(cur[2])
out[3].append(cur[3])
nibble_idx += 1
if len(out[0]) >= 16:
return out
return out
def best_match(pred, truth, n=10):
"""Sum of squared differences in first n samples."""
n = min(n, len(pred), len(truth))
return sum((pred[i] - truth[i])**2 for i in range(n))
def main():
b = load_bundle("event-d")
# truth in 16-count units
tr = {ch: [round(v * 200) for v in b.samples[ch]] for ch in ("Tran", "Vert", "Long")}
print("Truth event-d first 10 samples:")
for ch in ("Tran", "Vert", "Long"):
print(f" {ch}: {tr[ch][:10]}")
# Test 96 combinations
best = []
for perm in itertools.permutations([0, 1, 2, 3]):
for nibble_order in ('high_first', 'low_first'):
for sign in ('signed', 'unsigned'):
for init_h in (False, True):
decoded = decode(b.body, perm, nibble_order, sign, init_h)
# Score as TVL channel-sum
score = sum(
best_match(decoded[i], tr[ch], n=10)
for i, ch in enumerate(("Tran", "Vert", "Long"))
if i < 3
)
label = f"perm={perm} nib={nibble_order[:1]} sign={sign[:3]} init={init_h}"
best.append((score, label, decoded))
best.sort(key=lambda x: x[0])
print(f"\nTop 10 configurations:")
for s, lbl, dec in best[:10]:
print(f" score={s:>5} {lbl} T={dec[0][:8]} V={dec[1][:8]} L={dec[2][:8]}")
if __name__ == "__main__":
main()
+42
View File
@@ -0,0 +1,42 @@
"""Compare event-c and event-d (same N_samples) to find header vs data bytes."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def main():
bc = load_bundle("event-c")
bd = load_bundle("event-d")
# Compare prefixes
nc, nd = len(bc.body), len(bd.body)
n = min(nc, nd)
diffs = []
for i in range(n):
if bc.body[i] != bd.body[i]:
diffs.append(i)
print(f"event-c body={nc}, event-d body={nd}")
print(f"Total diffs (first {n}): {len(diffs)}")
# Show common prefix
same_prefix = 0
for i in range(n):
if bc.body[i] == bd.body[i]:
same_prefix += 1
else:
break
print(f"Common prefix length: {same_prefix}")
print(f"event-c prefix: {bc.body[:same_prefix].hex(' ')}")
# Look for runs of common bytes
print(f"\nFirst 32 diff positions: {diffs[:32]}")
# Show the "diff fingerprint" of the first 100 bytes
print(f"\n pos c d")
for i in range(0, 100):
marker = " " if bc.body[i] == bd.body[i] else "*"
bd_b = bd.body[i] if i < nd else None
print(f" {i:>3} {bc.body[i]:02x}{marker} {bd_b:02x}" if bd_b is not None else f" {i:>3} {bc.body[i]:02x}{marker}")
if __name__ == "__main__":
main()
+99
View File
@@ -0,0 +1,99 @@
"""
Decoder v1: nibble-pair signed deltas in 10 NN blocks, 4-channel round-robin.
"""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def s4(n):
return n if n < 8 else n - 16
def walk_blocks(body, start):
i = start
blocks = []
while i + 1 < len(body):
t0, t1 = body[i], body[i + 1]
if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 // 2 + 2
data = bytes(body[i + 2 : i + length])
blocks.append(("10", t1, data))
i += length
elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 + 2
data = bytes(body[i + 2 : i + length])
blocks.append(("20", t1, data))
i += length
elif t0 == 0x00 and t1 % 4 == 0:
blocks.append(("00", t1, b""))
i += 2
elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10:
length = t1 * 4
data = bytes(body[i + 2 : i + length])
blocks.append(("30", t1, data))
i += length
elif t0 == 0x40 and t1 == 0x02:
length = 20
data = bytes(body[i + 2 : i + length])
blocks.append(("40", t1, data))
i += length
else:
blocks.append(("??", t0, bytes(body[i:i+8])))
break
return blocks
def decode_v1(body, start, n_samples):
"""Decode by accumulating nibble-pair deltas from all 10 NN blocks."""
blocks = walk_blocks(body, start)
# 4 channels: T, V, L, M
cur = [0, 0, 0, 0]
out = [[], [], [], []]
sample_index = 0 # how many sample-sets emitted
for typ, NN, data in blocks:
if typ == "10":
# 2 nibbles per byte, round-robin TVLM
for byte in data:
for nib in ((byte >> 4) & 0xF, byte & 0xF):
ch = sample_index % 4
cur[ch] += s4(nib)
out[ch].append(cur[ch])
sample_index = (sample_index + 1) // 4 * 4 + (sample_index + 1) % 4 # ?
sample_index += 1
# We emit per-nibble, but the structure is unclear
elif typ == "20":
# int8 absolute or delta?
for byte in data:
v = byte if byte < 128 else byte - 256
ch = sample_index % 4
cur[ch] = v # treat as absolute
out[ch].append(cur[ch])
sample_index += 1
return out
def main():
b = load_bundle("event-c")
body = b.body
truth_T = [round(v * 200) for v in b.samples["Tran"]]
truth_V = [round(v * 200) for v in b.samples["Vert"]]
truth_L = [round(v * 200) for v in b.samples["Long"]]
# Find start
for s in range(15):
if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0xFC:
start = s
break
blocks = walk_blocks(body, start)
# Print block-by-block what's in each
print(f"Total blocks: {len(blocks)}")
bytes_processed = 0
for typ, NN, data in blocks[:30]:
print(f" type={typ} NN=0x{NN:02x} data_len={len(data)} data_hex={data[:32].hex(' ')}{'...' if len(data) > 32 else ''}")
if __name__ == "__main__":
main()
+27
View File
@@ -0,0 +1,27 @@
"""Dump body bytes around a specific offset."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def dump_around(name: str, center: int, radius: int = 96):
b = load_bundle(name)
body = b.body
start = max(0, center - radius)
end = min(len(body), center + radius)
print(f"\n=== {name} body[{start}:{end}] (full body={len(body)}) ===")
for i in range(start, end, 32):
row = body[i:i+32]
marker = " <-- center" if i <= center < i+32 else ""
print(f" +{i:>5} {row.hex(' ')}{marker}")
def main():
# Look at the trailer transitions
trailer_starts = {"event-a": 7047, "event-b": 6475, "event-c": 4043, "event-d": 3941}
for name, off in trailer_starts.items():
dump_around(name, off, 96)
if __name__ == "__main__":
main()
+18
View File
@@ -0,0 +1,18 @@
"""Dump the START of each body in 32-byte rows."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def main():
for name in ("event-a", "event-c"):
b = load_bundle(name)
body = b.body
print(f"\n=== {name} body[0:512] (full body={len(body)}, samples={len(b.samples['Tran'])}) ===")
for i in range(0, min(512, len(body)), 32):
row = body[i:i+32]
print(f" +{i:>5} {row.hex(' ')}")
if __name__ == "__main__":
main()
+24
View File
@@ -0,0 +1,24 @@
"""Dump body bytes split into 32-byte rows starting from `start_offset`."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def dump(body: bytes, name: str, start: int, n_rows: int = 30):
print(f"\n=== {name} body[{start}:] (full body={len(body)}) ===")
end = min(start + 32 * n_rows, len(body))
for i in range(start, end, 32):
row = body[i:i+32]
print(f" +{i:>5} {row.hex(' ')}")
def main():
for name in ("event-a", "event-b", "event-c", "event-d"):
b = load_bundle(name)
# Print the LAST ~600 bytes of the body to see the tail structure
start = max(0, len(b.body) - 32 * 12)
dump(b.body, name, start, 12)
if __name__ == "__main__":
main()
+41
View File
@@ -0,0 +1,41 @@
"""Search for structural repetition in the body bytes."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def find_pattern_offsets(body: bytes, pattern: bytes, max_count=20):
out = []
i = 0
while True:
i = body.find(pattern, i)
if i < 0:
break
out.append(i)
i += 1
if len(out) >= max_count:
break
return out
def main():
for name in ("event-a", "event-b", "event-c", "event-d"):
b = load_bundle(name)
body = b.body
print(f"\n=== {name} (body={len(body)}, N_samples={len(b.samples['Tran'])}) ===")
# Try to find repeating substructures (look for 4-byte 0x10-prefixed markers)
for prefix in [b"\x10\x10", b"\x10\x04", b"\x10\x08", b"\x10\x0c", b"\x10\x18",
b"\x10\x14", b"\x10\x20", b"\x10\x40", b"\x10\x80", b"\x10\x00",
b"\x10\x01", b"\x10\x03", b"\x10\xf0", b"\xf1\x10", b"\x00\x10",
b"\x40\x02", b"\x20\x04", b"\x30\x04", b"\x30\x08", b"\x00\x1a"]:
offs = find_pattern_offsets(body, prefix, max_count=200)
if 1 <= len(offs) <= 1000:
# Print first 10 offsets
first = offs[:6]
last = offs[-3:]
print(f" '{prefix.hex()}' x{len(offs):>4} first={first} last={last}")
if __name__ == "__main__":
main()
+34
View File
@@ -0,0 +1,34 @@
"""Find body byte ranges that look like absolute int8 sample data (smooth waveform)."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def looks_like_smooth_int8(buf):
"""Convert bytes to int8 and check if successive deltas are small (waveform-like)."""
if len(buf) < 8:
return 0.0
vals = [b if b < 128 else b - 256 for b in buf]
diffs = [abs(vals[i+1] - vals[i]) for i in range(len(vals)-1)]
avg_diff = sum(diffs) / len(diffs)
return avg_diff
def main():
for name in ("event-a", "event-c"):
b = load_bundle(name)
body = b.body
# Scan with sliding window of 64 bytes; find segments where the bytes look like a smooth wave
win = 64
scores = []
for i in range(len(body) - win):
scores.append((i, looks_like_smooth_int8(body[i:i+win])))
# Lowest avg_diff means smoothest
scores.sort(key=lambda x: x[1])
print(f"\n=== {name} (body={len(body)}) — smoothest 10 windows ===")
for off, s in scores[:10]:
print(f" +{off:>5} avg_diff={s:.2f} bytes={body[off:off+24].hex(' ')}")
if __name__ == "__main__":
main()
+23
View File
@@ -0,0 +1,23 @@
"""Print raw body hex + byte-distribution stats for one event."""
from collections import Counter
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def main():
for name in ("event-a", "event-b", "event-c", "event-d"):
b = load_bundle(name)
body = b.body
print(f"\n=== {name} ({len(body)} body bytes) ===")
print(f" STRT: {b.strt.hex()}")
print(f" body[0:64]: {body[:64].hex()}")
print(f" body[64:128]: {body[64:128].hex()}")
print(f" body[-32:]: {body[-32:].hex()}")
cnt = Counter(body)
print(f" top 16 bytes: {[(f'0x{k:02x}', f'{v/len(body):.2%}') for k,v in cnt.most_common(16)]}")
if __name__ == "__main__":
main()
+142
View File
@@ -0,0 +1,142 @@
"""
load_bundle.py — extract body bytes from BW binary + parse sample columns from TXT.
Used by the codec reverse-engineering scripts in this directory.
"""
from __future__ import annotations
import os
import re
from dataclasses import dataclass
BUNDLE_ROOT = os.path.join(os.path.dirname(__file__), "..", "decode-re", "5-8-26")
@dataclass
class Bundle:
name: str
bin_path: str
txt_path: str
bin: bytes
body: bytes # bytes between STRT (43) and footer (last 26)
strt: bytes # 21-byte STRT record
samples: dict # {"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}
sample_rate: int
rectime_sec: float
pretrig_sec: float
geo_range_ips: float
ppv: dict # {"Tran": float, "Vert": float, "Long": float}
mic_pspl: float
serial: str
def _parse_txt(path: str) -> dict:
with open(path, "r", encoding="utf-8", errors="replace") as f:
text = f.read()
meta = {}
samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
# Find header line that starts the columns ("Tran Vert Long MicL").
# Then every line after is sample data (4 tab-separated floats).
lines = text.splitlines()
header_idx = None
for i, line in enumerate(lines):
if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
# The columns header. Sample lines start a few lines later.
header_idx = i
break
if header_idx is None:
raise ValueError(f"no Tran/Vert/Long/MicL header in {path}")
# Parse meta — quoted lines with "Field : value"
for line in lines[:header_idx]:
m = re.match(r'^"([^"]+)\s*:\s*([^"]*)"', line.strip())
if m:
k, v = m.group(1).strip(), m.group(2).strip()
meta[k] = v
# Parse samples
for line in lines[header_idx + 1 :]:
line = line.strip()
if not line:
continue
parts = re.split(r"\s+", line)
if len(parts) < 4:
continue
try:
t = float(parts[0])
v = float(parts[1])
l = float(parts[2])
m = float(parts[3])
except ValueError:
continue
samples["Tran"].append(t)
samples["Vert"].append(v)
samples["Long"].append(l)
samples["MicL"].append(m)
return meta, samples
def load_bundle(name: str) -> Bundle:
folder = os.path.join(BUNDLE_ROOT, name)
files = os.listdir(folder)
bin_name = next(f for f in files if not f.endswith(".TXT"))
txt_name = next(f for f in files if f.endswith(".TXT"))
bin_path = os.path.join(folder, bin_name)
txt_path = os.path.join(folder, txt_name)
with open(bin_path, "rb") as f:
binary = f.read()
# Header is 22 bytes; STRT at [22:43]; footer at last 26 bytes.
strt = binary[22:43]
body = binary[43:-26]
meta, samples = _parse_txt(txt_path)
sample_rate = int(re.search(r"(\d+)", meta.get("Sample Rate", "1024")).group(1))
rectime_sec = float(re.search(r"([\d.]+)", meta.get("Record Time", "3.0")).group(1))
pretrig_sec = float(re.search(r"-?[\d.]+", meta.get("Pre-trigger Length", "0")).group(0))
geo_range_ips = float(re.search(r"([\d.]+)", meta.get("Geo Range", "10.0")).group(1))
serial = meta.get("Serial Number", "").strip()
def _f(s):
return float(re.search(r"-?[\d.]+", s).group(0))
ppv = {
"Tran": _f(meta.get("Tran PPV", "0")),
"Vert": _f(meta.get("Vert PPV", "0")),
"Long": _f(meta.get("Long PPV", "0")),
}
mic_pspl = _f(meta.get("MicL PSPL", "0"))
return Bundle(
name=name,
bin_path=bin_path,
txt_path=txt_path,
bin=binary,
body=body,
strt=strt,
samples=samples,
sample_rate=sample_rate,
rectime_sec=rectime_sec,
pretrig_sec=pretrig_sec,
geo_range_ips=geo_range_ips,
ppv=ppv,
mic_pspl=mic_pspl,
serial=serial,
)
if __name__ == "__main__":
for name in ("event-a", "event-b", "event-c", "event-d"):
b = load_bundle(name)
n = len(b.samples["Tran"])
print(f"{name}: body={len(b.body):>6} N_samples={n} rate={b.sample_rate} "
f"rectime={b.rectime_sec} pretrig={b.pretrig_sec} range={b.geo_range_ips} "
f"PPV(T,V,L)={b.ppv['Tran']:.3f},{b.ppv['Vert']:.3f},{b.ppv['Long']:.3f} "
f"MicL={b.mic_pspl}")
+67
View File
@@ -0,0 +1,67 @@
"""Try various nibble-level channel interleavings to find which one matches truth."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def s4(n):
return n if n < 8 else n - 16
def run_decoder(body, layout, skip, n_channels=4):
"""layout: function nibble_index -> channel_index. Returns list-of-lists per channel."""
out = [[] for _ in range(n_channels)]
cur = [0] * n_channels
nibbles = []
for byte in body[skip:]:
nibbles.append((byte >> 4) & 0xF)
nibbles.append(byte & 0xF)
for i, n in enumerate(nibbles):
ch = layout(i)
cur[ch] += s4(n)
out[ch].append(cur[ch])
return out
def cmp(pred, truth, n=24):
n = min(n, len(pred), len(truth))
return [(pred[i], truth[i]) for i in range(n)]
def main():
b = load_bundle("event-c")
truth_T = [round(v * 200) for v in b.samples["Tran"]]
truth_V = [round(v * 200) for v in b.samples["Vert"]]
truth_L = [round(v * 200) for v in b.samples["Long"]]
print(f"T truth[0:10]: {truth_T[:10]}")
print(f"V truth[0:10]: {truth_V[:10]}")
print(f"L truth[0:10]: {truth_L[:10]}")
# Try several nibble->channel layouts (4 channels)
layouts = {
"interleaved TVLM (0,1,2,3,0,1,2,3,...)": lambda i: i % 4,
"interleaved VLMT": lambda i: (i + 3) % 4,
"interleaved LMTV": lambda i: (i + 2) % 4,
"interleaved MTVL": lambda i: (i + 1) % 4,
"byte-based TV LM TV LM (high T low V byte0; high L low M byte1)": lambda i: i % 4,
# "chunks of 8 nibbles per channel": each channel gets 8 nibbles in a row
"chunks-8 TVLM": lambda i: (i // 8) % 4,
"chunks-16 TVLM": lambda i: (i // 16) % 4,
# planar (full channel sequential)
"planar T(0..N) V(N..2N) L(2N..3N) M(3N..4N)": None, # special
}
for label, layout_fn in layouts.items():
if layout_fn is None:
continue
for skip in (0, 4, 7, 8, 9, 11, 14):
out = run_decoder(b.body, layout_fn, skip)
# Check first 8 cumulative on each channel
print(f" skip={skip:2} {label}")
print(f" T_cum[0:10]: {out[0][:10]}")
print(f" V_cum[0:10]: {out[1][:10]}")
print(f" L_cum[0:10]: {out[2][:10]}")
if __name__ == "__main__":
main()
+73
View File
@@ -0,0 +1,73 @@
"""Try decoding body as 4-bit signed nibble deltas, 4-channel round-robin."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
CHANNELS = ("Tran", "Vert", "Long", "MicL")
def s4(n):
"""Sign-extend a 4-bit unsigned to int (0..7 → 0..7, 8..F → -8..-1)."""
return n if n < 8 else n - 16
def decode_nibbles(body: bytes, skip_bytes: int = 7, n_channels: int = 4):
"""Read body as 2 nibbles per byte; accumulate as deltas for n_channels round-robin."""
out = [[] for _ in range(n_channels)]
cur = [0] * n_channels
ch = 0
nibbles = []
for byte in body[skip_bytes:]:
nibbles.append((byte >> 4) & 0xF)
nibbles.append(byte & 0xF)
for n in nibbles:
cur[ch] += s4(n)
out[ch].append(cur[ch])
ch = (ch + 1) % n_channels
return out
def cmp_to_truth(pred, truth, scale=16):
"""Compare predicted ints (in 16-count units) to truth (in 16-count units = txt * 200).
Return (max_abs_err, mean_abs_err, n_compared).
"""
n = min(len(pred), len(truth))
errs = []
for i in range(n):
p = pred[i]
t = truth[i]
errs.append(abs(p - t))
if not errs:
return None
return (max(errs), sum(errs) / len(errs), n)
def main():
for name in ("event-a", "event-c"):
b = load_bundle(name)
# Convert TXT samples (in/s) to 16-count units (multiply by 200, since 0.005 in/s = 1)
# WAIT: 0.005 in/s = 16 ADC counts. 1 count = 0.000305 in/s.
# So in 1-count units: count = txt * (1/0.0003052) ≈ txt * 3276.7
# But TXT only has 0.005 resolution so equivalent to 16-count units = txt * 200.
truth_in_16 = {ch: [round(v * 200) for v in b.samples[ch]] for ch in CHANNELS[:3]}
# MicL is in dB, skip for now
# Try decoder with skip_bytes = 7
decoded = decode_nibbles(b.body, skip_bytes=7, n_channels=4)
print(f"\n=== {name} ===")
print(f" body={len(b.body)}, nibbles={2*(len(b.body)-7)}, samples_per_ch={len(decoded[0])}")
print(f" truth samples per ch: {len(truth_in_16['Tran'])}")
# Print first 24 of each
for i, chan in enumerate(CHANNELS):
pred_first = decoded[i][:24]
if chan in truth_in_16:
truth_first = truth_in_16[chan][:24]
print(f" {chan} pred: {pred_first}")
print(f" {chan} truth: {truth_first}")
else:
print(f" {chan} pred: {pred_first} (truth in dB, skipped)")
if __name__ == "__main__":
main()
+44
View File
@@ -0,0 +1,44 @@
"""Walk the body assuming chunks delimited by 0x10 NN tags. Print each chunk's structure."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def walk(body: bytes, start_offset: int = 7, max_chunks: int = 30):
"""Find all positions where byte = 0x10 followed by a multiple-of-4 byte. Print chunks."""
chunks = []
i = start_offset
while i < len(body) - 1:
# Find next `10 NN` where NN is multiple of 4 (and not preceded by another 0x10 immediately, which would be data).
if body[i] == 0x10 and (body[i+1] % 4 == 0):
chunks.append(i)
i += 1
return chunks
def main():
for name in ("event-c", "event-d"):
b = load_bundle(name)
body = b.body
positions = []
i = 7 # skip 7-byte preamble
while i < len(body) - 1:
if body[i] == 0x10 and body[i+1] % 4 == 0 and body[i+1] > 0:
positions.append(i)
i += 2 # skip past tag
else:
i += 1
print(f"\n=== {name} === body={len(body)}, total `10 NN` (NN%4==0, NN>0) tags: {len(positions)}")
# Print first 20 chunks: show position, NN, gap to next tag
for k in range(min(30, len(positions))):
pos = positions[k]
NN = body[pos + 1]
next_pos = positions[k+1] if k+1 < len(positions) else len(body)
gap = next_pos - pos
data_bytes = body[pos+2 : next_pos]
print(f" chunk[{k:>3}] @ {pos:>5} NN=0x{NN:02x} ({NN:>3}, NN/2={NN//2}) gap={gap:>3} "
f"data={data_bytes[:24].hex(' ')}{'...' if len(data_bytes) > 24 else ''}")
if __name__ == "__main__":
main()
+50
View File
@@ -0,0 +1,50 @@
"""Deterministic chunk walker: each chunk = [10 NN][NN/2 bytes data][2 bytes trailer]."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def walk_chunks(body: bytes, start: int = 7):
"""Yield (offset, NN, data_bytes, trailer_bytes) tuples."""
i = start
while i + 1 < len(body):
if body[i] != 0x10:
break
NN = body[i + 1]
if NN == 0 or NN > 0x80 or NN % 4 != 0:
break
chunk_len = NN // 2 + 4
if i + chunk_len > len(body):
break
data = bytes(body[i + 2 : i + 2 + NN // 2])
trailer = bytes(body[i + 2 + NN // 2 : i + chunk_len])
yield (i, NN, data, trailer)
i += chunk_len
def main():
for name in ("event-c", "event-d", "event-a", "event-b"):
b = load_bundle(name)
body = b.body
chunks = list(walk_chunks(body))
print(f"\n=== {name} === body={len(body)} N_samples={len(b.samples['Tran'])}")
print(f" chunks parsed: {len(chunks)}")
if chunks:
last = chunks[-1]
end_of_walk = last[0] + last[1] // 2 + 4
print(f" walk ended at offset {end_of_walk} (= {len(body) - end_of_walk} bytes from end)")
# Stats
total_data_bytes = sum(len(c[2]) for c in chunks)
print(f" total data bytes: {total_data_bytes}, total nibbles: {2*total_data_bytes}")
if name in ("event-c", "event-d"):
ratio = (2 * total_data_bytes) / (len(b.samples['Tran']) * 4)
print(f" nibbles per (sample × channel): {ratio:.3f}")
# Sum of trailer second-byte
trailer_sums = [c[3][-1] if c[3] else None for c in chunks]
print(f" first 10 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[:10]]}")
# Print last 10 chunks (likely transition to trailer)
print(f" last 10 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[-10:]]}")
if __name__ == "__main__":
main()
+51
View File
@@ -0,0 +1,51 @@
"""Walk chunks; auto-detect preamble length by finding first 10 NN."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def walk_chunks(body, start, max_NN=0x80):
chunks = []
i = start
while i + 1 < len(body):
if body[i] != 0x10:
break
NN = body[i + 1]
if NN == 0 or NN > max_NN or NN % 4 != 0:
break
chunk_len = NN // 2 + 4
if i + chunk_len > len(body):
break
data = bytes(body[i + 2 : i + 2 + NN // 2])
trailer = bytes(body[i + 2 + NN // 2 : i + chunk_len])
chunks.append((i, NN, data, trailer))
i += chunk_len
return chunks, i
def find_first_chunk_start(body):
"""Locate first byte that begins a `10 NN` chunk (NN ∈ multiples of 4, 4..0x7C)."""
for i in range(20):
if body[i] == 0x10 and body[i + 1] % 4 == 0 and 0 < body[i + 1] <= 0x7C:
return i
return -1
def main():
for name in ("event-c", "event-d", "event-a", "event-b"):
b = load_bundle(name)
body = b.body
start = find_first_chunk_start(body)
chunks, end = walk_chunks(body, start)
print(f"\n=== {name} === body={len(body)} N_samples={len(b.samples['Tran'])} start={start}")
print(f" chunks parsed: {len(chunks)}, walk ended at {end}")
if chunks:
print(f" first 5 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[:5]]}")
print(f" last 5 chunks: {[(c[0], c[1], c[3].hex()) for c in chunks[-5:]]}")
print(f" bytes around end of walk: {body[end-4:end+12].hex(' ')}")
else:
print(f" bytes at start: {body[start:start+16].hex(' ')}")
if __name__ == "__main__":
main()
+75
View File
@@ -0,0 +1,75 @@
"""
Walker v4: alternate [10 NN] data chunks and [00 NN] (or other) marker tags.
Hypothesis:
- [10 NN]: data block, length NN/2 + 2 bytes (2-byte tag + NN/2 bytes data)
- [00 NN]: 2-byte marker block (no data)
- [20/30/40 NN]: special blocks with type-dependent length
"""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
def walk(body, start):
i = start
blocks = []
while i + 1 < len(body):
t0 = body[i]
t1 = body[i + 1]
if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0x80:
# data chunk: length NN/2 + 2
length = t1 // 2 + 2
blocks.append((i, "10", t1, bytes(body[i + 2 : i + length]), length))
i += length
elif t0 == 0x00 and t1 % 4 == 0:
# 2-byte marker
blocks.append((i, "00", t1, b"", 2))
i += 2
elif t0 == 0x20 and t1 % 4 == 0:
# type 2 — try length 2+t1/2 (similar to 10) OR fixed
length = t1 // 2 + 2
blocks.append((i, "20", t1, bytes(body[i + 2 : i + length]), length))
i += length
elif t0 == 0x30 and t1 % 4 == 0:
length = t1 // 2 + 2
blocks.append((i, "30", t1, bytes(body[i + 2 : i + length]), length))
i += length
elif t0 == 0x40 and t1 == 0x02:
# Special "footer transition" block — try fixed 22 bytes
length = 22
blocks.append((i, "40", t1, bytes(body[i + 2 : i + length]), length))
i += length
else:
# Unknown tag — stop
blocks.append((i, "??", t0, bytes(body[i:i+8]), 0))
break
return blocks, i
def main():
for name in ("event-c", "event-d", "event-a", "event-b"):
b = load_bundle(name)
body = b.body
# Auto-detect start
for s in range(15):
if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0x80:
start = s
break
else:
start = 7
blocks, end = walk(body, start)
# Categorize
from collections import Counter
types = Counter(b[1] for b in blocks)
print(f"\n=== {name} === body={len(body)} N={len(b.samples['Tran'])} start={start}")
print(f" total blocks: {len(blocks)}, walk ended at {end}/{len(body)}")
print(f" type counts: {dict(types)}")
# Print last 5 blocks
print(f" last 5 blocks: {[(bb[0], bb[1], bb[2]) for bb in blocks[-5:]]}")
if end < len(body):
print(f" bytes at end: {body[end:end+24].hex(' ')}")
if __name__ == "__main__":
main()
+83
View File
@@ -0,0 +1,83 @@
"""
Walker v5: flexible NN range and multiple block-type lengths.
Hypothesis:
- [10 NN]: 4-bit-delta data block, length = NN/2 + 2
- [20 NN]: 8-bit-literal data block, length = NN + 2
- [00 NN]: 2-byte marker (no payload)
- [30 NN]: trailer/summary block, length = NN*4
- [40 NN]: footer-marker block, fixed 22 bytes
"""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
from collections import Counter
def walk(body, start, max_blocks=10000):
i = start
blocks = []
while i + 1 < len(body) and len(blocks) < max_blocks:
t0 = body[i]
t1 = body[i + 1]
if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 // 2 + 2
if i + length > len(body):
break
data = bytes(body[i + 2 : i + length])
blocks.append((i, "10", t1, data, length))
i += length
elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 + 2
if i + length > len(body):
break
data = bytes(body[i + 2 : i + length])
blocks.append((i, "20", t1, data, length))
i += length
elif t0 == 0x00 and t1 % 4 == 0:
# 2-byte marker
blocks.append((i, "00", t1, b"", 2))
i += 2
elif t0 == 0x30 and t1 % 4 == 0:
length = t1 * 4
if i + length > len(body):
break
data = bytes(body[i + 2 : i + length])
blocks.append((i, "30", t1, data, length))
i += length
elif t0 == 0x40 and t1 == 0x02:
length = 22
if i + length > len(body):
break
data = bytes(body[i + 2 : i + length])
blocks.append((i, "40", t1, data, length))
i += length
else:
blocks.append((i, "??", t0, bytes(body[i:i+8]), 0))
break
return blocks, i
def main():
for name in ("event-c", "event-d", "event-a", "event-b"):
b = load_bundle(name)
body = b.body
for s in range(15):
if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0xFC:
start = s; break
else:
start = 7
blocks, end = walk(body, start)
types = Counter(bb[1] for bb in blocks)
print(f"\n=== {name} === body={len(body)} N={len(b.samples['Tran'])} start={start}")
print(f" total blocks: {len(blocks)}, walk ended at {end}/{len(body)}")
print(f" type counts: {dict(types)}")
if blocks and blocks[-1][1] == "??":
print(f" stopped at byte: 0x{blocks[-1][2]:02x}, prev 5 blocks: {[(bb[0], bb[1], bb[2]) for bb in blocks[-6:-1]]}")
# Sum payload sizes by type
payload_sizes = {t: sum(len(bb[3]) for bb in blocks if bb[1] == t) for t in types}
print(f" payload bytes by type: {payload_sizes}")
if __name__ == "__main__":
main()
+68
View File
@@ -0,0 +1,68 @@
"""
Walker v6: handle 40 02 blocks correctly (length 20).
Block formats:
- [10 NN]: 4-bit nibble delta data, length = NN/2 + 2
- [20 NN]: int8 literal data, length = NN + 2
- [00 NN]: 2-byte marker
- [30 NN]: trailer/summary block, length = NN*4
- [40 02]: segment header, fixed length 20
"""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import load_bundle
from collections import Counter
def walk(body, start, max_blocks=10000):
i = start
blocks = []
while i + 1 < len(body) and len(blocks) < max_blocks:
t0 = body[i]
t1 = body[i + 1]
if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 // 2 + 2
elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 + 2
elif t0 == 0x00 and t1 % 4 == 0:
length = 2
elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10:
length = t1 * 4
elif t0 == 0x40 and t1 == 0x02:
length = 20
else:
blocks.append((i, "??", t0, bytes(body[i:i+8]), 0))
break
if i + length > len(body):
break
data = bytes(body[i + 2 : i + length])
blocks.append((i, f"{t0:02x}", t1, data, length))
i += length
return blocks, i
def main():
for name in ("event-c", "event-d", "event-a", "event-b"):
b = load_bundle(name)
body = b.body
for s in range(15):
if body[s] == 0x10 and body[s+1] % 4 == 0 and 0 < body[s+1] <= 0xFC:
start = s; break
else:
start = 7
blocks, end = walk(body, start)
types = Counter(bb[1] for bb in blocks)
print(f"\n=== {name} === body={len(body)} N={len(b.samples['Tran'])} start={start}")
print(f" total blocks: {len(blocks)}, walk ended at {end}/{len(body)}")
print(f" type counts: {dict(types)}")
if blocks and blocks[-1][1] == "??":
print(f" stopped at byte: 0x{blocks[-1][2]:02x} at offset {blocks[-1][0]}")
print(f" prev 5 blocks: {[(bb[0], bb[1], bb[2]) for bb in blocks[-6:-1]]}")
print(f" bytes around stop: {body[end-4:end+24].hex(' ')}")
# Sum
payload_sizes = {t: sum(len(bb[3]) for bb in blocks if bb[1] == t) for t in types}
print(f" payload bytes by type: {payload_sizes}")
if __name__ == "__main__":
main()
+138 -105
View File
@@ -860,127 +860,160 @@ MicL: 39 64 1D AA = 0.0000875 psi
--- ---
#### 7.6.1 Blast / Waveform mode — ❌ NOT VERIFIED (retracted 2026-05-08) #### 7.6.1 Blast / Waveform mode — 🟡 STRUCTURAL FRAMING DECODED (2026-05-08)
> ## ⚠️ RETRACTION (2026-05-08) > **Status (2026-05-08):** Block-level framing is solved and verified
> against the 4-event May 8 2026 bundle (3 sec / 2 sec / 1 sec / 1 sec
> events captured live from BE11529). The per-byte mapping from block
> data to ADC samples is **still open** — the previous int16 LE claim
> is REFUTED (see history below).
> >
> The "4-channel interleaved s16 LE, 8 bytes per sample-set" claim > The earlier "4-channel interleaved s16 LE, 8 bytes per sample-set"
> below was **never actually validated**. It got into this document > claim was never validated and was wrong. No event in the project's
> because the decoder built around that assumption produced full-scale > archive ever came close to ADC saturation, yet the int16 LE decoder
> ±32K counts on every channel of the 4-2-26 capture, and the > consistently produced full-scale ±32K noise — that was the signature
> ±32K-shaped output was misread as "the signal must have saturated." > of mis-aligned encoded data, not signal saturation.
>
> Cross-checking the BW-reported peaks proves the opposite:
>
> | Channel | BW PPV (in/s) | Expected ADC counts at 10 in/s FS |
> |---|---|---|
> | Tran | 0.420 | **1,376** |
> | Vert | 3.870 | **12,686** |
> | Long | 0.495 | **1,622** |
>
> None of these are anywhere near ±32K saturation. No event in the
> project's archive (across all captures from 1-2-26 onward) has
> ever come close to saturation either. Yet the decoder has
> consistently produced ±32K-shaped noise on every event. The right
> conclusion is that the byte-to-sample interpretation has been wrong
> the whole time, NOT that every event happened to saturate.
>
> What's actually known about the body bytes:
>
> - The byte distribution is heavily skewed (24% `0x00`, 10.5% `0x10`,
> plus high frequencies of `0x01 / 0x04 / 0x0F / 0xF0 / 0xF1`). Lots
> of `10 XX` pairs. Reading them as LE int16 produces uniform ±32K
> noise — the signature of mis-aligned or encoded data.
> - The CHANGELOG note for v0.14.2 calls the body a "delta-encoded
> ADC stream" — that hint plus the byte distribution points toward
> a delta encoding with `0x10` as an escape marker, but no decoder
> has been worked out yet.
> - The histogram-mode codec in §7.6.2 IS verified and decoded
> correctly (different format: 32-byte blocks with 9× int16 LE
> samples + metadata). The same firmware emits both formats, so
> §7.6.2 may share encoding primitives with the waveform codec
> and is worth using as a structural hint when reverse-engineering.
>
> **Treat the spec below as a starting hypothesis to disprove, not
> ground truth.** The frame-layout pieces (STRT location, preamble,
> chunk header) appear correct; the per-byte sample interpretation
> is the open question.
4-channel interleaved signed 16-bit little-endian, 8 bytes per sample-set: ##### Body file layout
A Blastware waveform-file body (the variable-length section between
the 21-byte STRT record and the 26-byte file footer) is composed of
**tagged variable-length blocks**, NOT raw int16 samples.
``` ```
[T_lo T_hi V_lo V_hi L_lo L_hi M_lo M_hi] × N sample-sets [preamble: 7 or 9 bytes]
[stream of tagged blocks]
[trailer: per-channel summary blocks]
``` ```
- **T** = Transverse (Tran), **V** = Vertical (Vert), **L** = Longitudinal (Long), **M** = Microphone **Preamble:** starts with the 4-byte magic ``00 02 00 00``. Single-shot
- Channel order follows the Blastware convention: Tran is always first (ch[0]). events have a 7-byte preamble; continuous events have a 9-byte preamble
- Encoding: signed int16 little-endian. Full scale = ±32768 counts. (the 4 events in the May 8 2026 bundle split 2/2 between the two
- Sample rate: set by compliance config (typical: 1024 Hz for blast monitoring). lengths). Bytes [4:9] of the preamble appear to encode initial
- Each A5 frame chunk carries a different number of waveform bytes. Frame sizes per-channel state but the layout has not been pinned down — for some
are NOT multiples of 8, so naive concatenation scrambles channel assignments at events byte [4] equals truth Tran[0] in 16-count units (0.005 in/s
frame boundaries. **Always track cumulative byte offset mod 8 to correct alignment.** LSB), but other channel-byte assignments don't fit consistently.
**A5[0] frame layout:** ##### Block tags (CONFIRMED 2026-05-08)
Every block starts with a 2-byte tag. Five tag types are confirmed:
| Tag (hex) | Block type | On-wire length |
|-----------|-------------------------------------|-----------------------|
| ``10 NN`` | Small-delta data block | NN/2 + 2 bytes |
| ``20 NN`` | Literal data block (int8-shaped) | NN + 2 bytes |
| ``00 NN`` | 2-byte marker between data blocks | 2 bytes |
| ``30 NN`` | Trailer summary block | NN × 4 bytes |
| ``40 02`` | Segment header | 20 bytes (fixed) |
NN is always a multiple of 4. ``10 NN`` and ``20 NN`` data blocks
alternate with ``00 NN`` markers — every ``10/20 NN`` block is
followed by a ``00 NN`` marker before the next data block.
##### Segments
The body is divided into ~16 SEGMENTS for a 1280-sample event (= 1
segment per ~80 sample-sets), separated by ``40 02`` segment headers.
A 3328-sample event has ~42 segments.
The 18-byte ``40 02`` payload structure (CONFIRMED across all 4
fixtures by inspecting the increment of bytes [8:12]):
| Offset | Length | Field |
|--------|--------|--------------------------------------------------|
| 0 | 4 | Anchor / channel state (open — see below) |
| 4 | 4 | Variable field (open) |
| 8 | 4 | uint32 LE counter — increments by 1 per segment |
| 12 | 4 | Fixed pattern ``02 00 00 01`` |
| 16 | 2 | Variable tail |
The counter at bytes [8:12] starts in the 0x40s for a freshly-erased
device and increments cleanly — useful as a structural sanity check.
Examples from event-c (1 sec single-shot):
``` ```
db[7:]: [11-byte header] [21-byte STRT record] [6-byte preamble] [waveform ...] Segment header 1 (offset 235):
STRT: offset 11 in db[7:] 40 02 | 00 00 00 00 | 0a 4b 01 1e | 47 00 00 00 | 02 00 00 01 | 00 01
+0..3 b'STRT' magic ^counter=0x47
+8..9 uint16 BE total_samples (full-record expected sample-set count) Segment header 2 (offset 523):
+16..17 uint16 BE pretrig_samples (pre-trigger window, in sample-sets) 40 02 | ff fe ff fe | 13 f5 01 06 | 48 00 00 00 | 02 00 00 01 | 00 02
+18 uint8 rectime_seconds ^counter=0x48 (+1)
preamble: +19..20 0x00 0x00 null padding
+21..24 0xFF × 4 synchronisation sentinel
Waveform: starts at strt_pos + 27 within db[7:]
``` ```
**A5[1..N] frame layout (non-metadata frames):** ##### Trailer
``` The trailer (after the last segment's data) is a sequence of 32-byte
db[7:]: [8-byte per-frame header] [waveform ...] ``30 08`` blocks plus a final ``30 04`` / ``20 04`` / ``40 02`` summary
Header: [counter LE uint16, 0x00 × 6] — frame sequence counter (0, 8, 12, 16, 20, …×0x400) ending in the constant 2-byte tail ``00 1A``. These contain
Waveform: starts at byte 8 of db[7:] per-channel statistics (peak times, peak values, mean offsets — bytes
``` in the form ``f3/f4/f5`` near ``20 10`` markers strongly resemble
int8 channel-bias values around -12). Detailed decoding of the
trailer is outside the path needed for sample reconstruction.
**Special frames:** ##### What's still open
| Frame index | Contents | - **The byte → sample mapping inside ``10 NN`` and ``20 NN`` blocks.**
Tested hypotheses that did not match BW's ASCII export to within ±1
ADC count:
1. ``10 NN`` data = 4-bit signed nibble deltas, channel-interleaved,
all 24 channel permutations × 2 nibble orders × 2 sign conventions
× 2 init-from-header settings (= 96 combinations). All produce
values that diverge from truth after the first ~7 sample-sets.
2. ``20 NN`` data = int8 absolute or delta samples for one channel.
Magnitudes in observed blocks (peak ±34 in event-c at offset 351)
do not match any channel's PPV at any plausible ADC quantization
(1-count, 4-count, 8-count, 16-count).
3. ``00 NN`` marker = "skip N sample-sets with zero deltas". Sums
of NN/4 across markers do not consistently match the 80
sample-sets-per-segment count.
The codec is more elaborate than uniform 4-bit deltas. A hybrid
variable-bit-width scheme (4-bit deltas in ``10 NN``, 8-bit deltas
or absolutes in ``20 NN``, segment-header anchors after each
``40 02``) is the most plausible remaining hypothesis.
- **The role of byte [4:9] of the preamble.** Byte 4 == Tran[0]
truth value (in 16-count units) for events a/b/d, but doesn't
fit consistently for event-c. Bytes [5:9] don't match a simple
per-channel encoding.
- **Walker correctness past offset ~427 in event-b.** The walker
bails out partway through event-b — there is at least one block
whose length doesn't fit the lengths confirmed for the other
three events. Likely a ``20 NN`` with NN > 0xFC (currently
rejected by the walker), or a different length formula in some
context.
##### Recommended next step
A capture with a known external waveform (calibration tone of known
frequency and amplitude) would unlock the magnitude scaling and
disambiguate which channel a ``20 NN`` block belongs to. Multiple
captures of the same signal at different ``geo_range`` settings
(Normal 10 in/s vs Sensitive 1.25 in/s) would also pin down whether
sample values are scaled at the codec layer or only at the BW
display layer.
##### Reference module
``minimateplus/waveform_codec.py`` implements the verified block
walker (:func:`walk_body`, :func:`split_segments`,
:func:`parse_segment_header`). ``decode_waveform_v2`` is a stub that
returns ``None`` until a verified per-byte sample decoder is wired
up; production code (``minimateplus/client.py``) continues to use
the legacy int16 LE decoder, which produces wrong samples but stable
output shape — keep the ``.h5`` sidecars marked as
"sample-codec unverified" until the byte-to-sample mapping lands.
##### History (do not re-derive)
| Date | Note |
|---|---| |---|---|
| A5[0] | Probe response: STRT record + first waveform chunk | | 2026-05-08 | Block tagging confirmed against the 4-event May 2026 bundle. All bodies parse cleanly through `walk_body` for events a/c/d. Event-b walks partway and stops at offset 427 (open issue). |
| A5[7] | Event-time metadata strings only (no waveform data) | | 2026-05-08 | Earlier "4-channel interleaved s16 LE" claim formally retracted — never validated, produced full-scale ±32K noise on every event because the bytes are encoded, not raw samples. |
| A5[9] | Terminator frame (page_key=0x0000) — ignored | | 2026-04-02 | "Frame 7 metadata", "Frame 9 terminator", and `0x0400`-step chunk-counter claims documented as-was; later proved to be artifacts of an over-reading 5A walk (now superseded by §7.8.57.8.7). |
| A5[1..6,8] | Waveform chunks |
**Confirmed from 4-2-26 blast capture (total_samples=9306, pretrig=298, rate=1024 Hz):**
```
Frame Waveform bytes Cumulative Align(mod 8)
A5[0] 933B 933B 0
A5[1] 963B 1896B 5
A5[2] 946B 2842B 0
A5[3] 960B 3802B 2
A5[4] 952B 4754B 2
A5[5] 946B 5700B 2
A5[6] 941B 6641B 4
A5[8] 992B 7633B 1
Total: 7633B → 954 naive sample-sets, 948 alignment-corrected
```
Only 948 of 9306 sample-sets captured (10%) — `stop_after_metadata=True` terminated
download after A5[7] was received.
**Channel identification note:** Channel ordering [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3]
is the Blastware convention. This ordering has not been independently verified end-to-end,
since no decoder yet produces samples that match BW's own rendering of the same event (see
the retraction at the top of §7.6.1). Once the body codec is decoded, the per-channel PPV
values from the 0C record (Tran=0.420, Vert=3.870, Long=0.495 in/s for the 4-2-26 capture)
provide the cross-check that pins down channel order.
> **Historical note:** earlier revisions of this section claimed the 4-2-26 blast had
> "saturated all four channels to ~3200032617 counts," citing that as evidence the s16 LE
> interpretation was correct. That claim was wrong — the ±32K values were the broken
> decoder's output, not the actual signal amplitude (which the 0C peaks above show was
> nowhere near saturation). Retracted 2026-05-08.
--- ---
+242
View File
@@ -0,0 +1,242 @@
"""
waveform_codec.py block-walker for the MiniMate Plus waveform body codec.
PARTIAL REVERSE-ENGINEERING 2026-05-08.
Status: STRUCTURAL FRAMING confirmed; per-block sample interpretation OPEN.
This module replaces the int16-LE assumption that produced full-scale ±32K
noise on every event. The body is NOT raw int16 LE: it is a sequence of
tagged variable-length blocks. The block framing is solved here. The
mapping from block bytes to ADC samples is **NOT yet pinned down** the
work-in-progress decoder ``decode_waveform_v2`` returns ``None`` until
a verified algorithm is wired in.
Until ``decode_waveform_v2`` returns a verified result, callers that need
sample data should keep relying on the legacy decoder in ``client.py``
(known-broken, but at least stable in shape) and not consume this
module's sample output.
Body structure (CONFIRMED 2026-05-08 against decode-re/5-8-26 4-event bundle)
The Blastware waveform-file body lives between bytes [22+21=43] and the
26-byte file footer (``[: -26]``). Layout:
[preamble: 7 or 9 bytes]
[data section: a stream of tagged blocks]
[trailer: per-channel summary blocks]
The preamble starts with the magic ``00 02 00 00``. After that there is
either 3 or 5 bytes of header before the first ``10 NN`` block tag in
the 4-event bundle, single-shot events have a 7-byte preamble and
continuous events have 9. The exact meaning of bytes [4:9] is open
(empirically: byte [4] for event-a == truth Tran[0]; byte [4] for
event-b == truth Tran[0]; events c/d = 0; treating it as a per-channel
"initial value" partially matches but is inconsistent across events).
Blocks have 2-byte tags and these confirmed lengths:
| Tag (hex) | Block type | Total length |
|-----------|--------------------------------------|-----------------|
| ``10 NN`` | Small-delta data block | NN/2 + 2 bytes |
| ``20 NN`` | Literal data block (looks int8-ish) | NN + 2 bytes |
| ``00 NN`` | 2-byte marker between data blocks | 2 bytes |
| ``30 NN`` | Trailer summary block | NN × 4 bytes |
| ``40 02`` | Segment header | 20 bytes |
In the 4-event bundle, every event's body parses as a clean sequence of
these blocks all the way through the trailer (when the walker is given
the right preamble length). No "??" stops occur once the start offset
is correct.
Segments and the ``40 02`` header
The body is divided into ~16 SEGMENTS, each separated by a ``40 02``
header. Each segment carries ~80 sample-sets (1280-sample event = 16
segments × 80 sample-sets, 3328-sample event = ~42 segments). The 18-byte
``40 02`` payload contains:
bytes 0..3 4-byte channel anchor / state (varies per segment)
bytes 4..7 4-byte field, varies (RMS/peak per channel?)
bytes 8..11 4-byte uint32 LE counter (increments by 1 per segment;
starts at e.g. 0x47 for the first in-data segment)
bytes 12..15 4-byte fixed pattern: 02 00 00 01
bytes 16..17 2-byte segment-relative payload counter
The counter at bytes [8..11] increments cleanly across segments useful
as a sanity check. The role of bytes [0..3] (anchor candidates) and
[4..7] is not pinned down: simple "channel state at segment boundary"
hypotheses do NOT match truth across all four sample bundles tested.
What's open
The mapping ``block bytes ADC samples`` is the open question. Tested
hypotheses that did **not** match BW's ASCII export to within the
required ±1 ADC count:
1. ``10 NN`` data = 4-bit signed nibble deltas, channel-interleaved
(TVLM/VTLM/LMTV/all 24 permutations × 2 nibble orders × 2 sign
conventions = 96 combinations tested). All produce values that
diverge from truth after the first ~7 sample-sets.
2. ``20 NN`` data = int8 absolute samples for one channel. Magnitudes
in observed blocks (peak ~±34 in the smoothest event-c block at
offset 351) do not match any channel's PPV at any plausible
ADC-count quantization (1-count, 4-count, 8-count, 16-count).
3. ``00 NN`` marker = "skip N sample-sets". Sums of NN/4 across markers
do not match 80 sample-sets per segment.
4. Concatenating ALL ``10 NN`` payload bytes and reading as a continuous
nibble stream (TVLM round-robin) produces the same 96-combination
problem as (1).
The most promising lead that ``20 NN`` blocks carry literal int8
sample-sequences for the largest-amplitude channel within a segment
is consistent with the smooth waveform shape of those payloads, but
the magnitude scaling has not been pinned down. It's possible that
``10 NN`` and ``20 NN`` blocks carry different bit-widths of the same
channel-interleaved delta stream (variable-width like Rice coding)
with 4-bit deltas as default and 8-bit deltas as escape.
Potential next steps for whoever picks this up:
- Capture an event with a KNOWN external waveform (e.g. a calibration
signal of known frequency/amplitude) so the truth is unambiguous and
the magnitude scaling is unambiguous.
- Capture multiple events with the SAME signal but DIFFERENT geo_range
(Normal 10 in/s vs Sensitive 1.25 in/s) to disambiguate scaling.
- Examine sequential 0x10 segment headers for a single event the
4-byte "anchor" should reflect cumulative sample state at the
boundary; matching it to truth at that sample index would unlock
the per-segment delta decode.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import List, Optional, Tuple
@dataclass
class WaveformBlock:
"""One tagged block parsed out of a Blastware waveform-file body."""
offset: int # byte offset into body
tag_hi: int # first tag byte (0x10 / 0x20 / 0x00 / 0x30 / 0x40)
tag_lo: int # second tag byte (NN)
data: bytes # block payload (excludes the 2-byte tag)
length: int # total block length on the wire (includes the tag)
@property
def kind(self) -> str:
return f"{self.tag_hi:02x} {self.tag_lo:02x}"
def find_data_start(body: bytes) -> int:
"""Auto-detect the offset of the first ``10 NN`` block."""
for i in range(min(20, len(body) - 1)):
if body[i] == 0x10 and body[i + 1] % 4 == 0 and 0 < body[i + 1] <= 0xFC:
return i
return -1
def walk_body(body: bytes, start: Optional[int] = None) -> List[WaveformBlock]:
"""Walk the tagged-block sequence starting at *start* (auto-detected by default).
Stops when an unrecognized tag is encountered or end of body is reached.
Returned blocks are in stream order.
"""
if start is None:
start = find_data_start(body)
if start < 0:
return []
blocks: List[WaveformBlock] = []
i = start
while i + 1 < len(body):
t0 = body[i]
t1 = body[i + 1]
if t0 == 0x10 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 // 2 + 2
elif t0 == 0x20 and t1 % 4 == 0 and 0 < t1 <= 0xFC:
length = t1 + 2
elif t0 == 0x00 and t1 % 4 == 0:
length = 2
elif t0 == 0x30 and t1 % 4 == 0 and 0 < t1 <= 0x10:
length = t1 * 4
elif t0 == 0x40 and t1 == 0x02:
length = 20
else:
# Unknown tag; stop. Caller can inspect ``i`` to see where.
break
if i + length > len(body):
break
data = bytes(body[i + 2 : i + length])
blocks.append(WaveformBlock(offset=i, tag_hi=t0, tag_lo=t1, data=data, length=length))
i += length
return blocks
def split_segments(blocks: List[WaveformBlock]) -> List[List[WaveformBlock]]:
"""Group consecutive blocks into segments separated by ``40 02`` headers.
The first segment is whatever runs before the first ``40 02`` header
(typically the "segment 0" preamble data after the body preamble).
Subsequent segments start with a ``40 02`` block, then have their
own data blocks until the next ``40 02``.
"""
segments: List[List[WaveformBlock]] = []
current: List[WaveformBlock] = []
for b in blocks:
if b.tag_hi == 0x40 and b.tag_lo == 0x02:
if current:
segments.append(current)
current = [b]
else:
current.append(b)
if current:
segments.append(current)
return segments
def parse_segment_header(block: WaveformBlock) -> Optional[dict]:
"""Decode the 18-byte payload of a ``40 02`` segment header.
Returns a dict with the labelled fields, or None if *block* is not
a ``40 02`` header.
"""
if not (block.tag_hi == 0x40 and block.tag_lo == 0x02):
return None
if len(block.data) < 18:
return None
p = block.data
counter = int.from_bytes(p[8:12], "little", signed=False)
return {
"anchor_bytes": p[0:4], # 4-byte field, role unconfirmed
"field2": p[4:8], # 4-byte field, role unconfirmed
"counter": counter, # uint32 LE — increments by 1 per segment
"fixed_pattern": p[12:16], # always b"\x02\x00\x00\x01"
"tail": p[16:18], # last 2 bytes
}
def decode_waveform_v2(body: bytes) -> Optional[dict]:
"""
Decode the body into per-channel sample arrays.
Returns a dict ``{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}``
when a verified decoder is wired up; returns ``None`` otherwise.
Currently returns ``None`` because the byte-to-sample mapping is OPEN.
The block framing in :func:`walk_body` is verified callers can use
that to inspect block-level structure without claiming the per-byte
interpretation.
"""
return None
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
File diff suppressed because it is too large Load Diff
+252
View File
@@ -0,0 +1,252 @@
"""
Tests for minimateplus.waveform_codec Blastware waveform-file body block walker.
These tests lock in the STRUCTURAL framing of the body codec. The byte-to-sample
mapping is open (see waveform_codec module docstring) until that's nailed down,
:func:`decode_waveform_v2` returns ``None`` and there is no per-sample assertion
to make.
"""
from __future__ import annotations
import os
import pytest
from minimateplus.waveform_codec import (
WaveformBlock,
find_data_start,
parse_segment_header,
split_segments,
walk_body,
decode_waveform_v2,
)
FIXTURES = os.path.join(
os.path.dirname(__file__), "fixtures", "decode-re-5-8-26"
)
def _bw_body(path):
"""Strip the 22-byte header and 21-byte STRT and 26-byte footer to get the body."""
with open(path, "rb") as f:
binary = f.read()
return binary[43:-26]
# Fixture metadata — bundled BW binaries from a real BE11529 unit, May 8 2026.
# Each is paired with a Blastware TXT export (the ASCII ground truth).
FIXTURES_INFO = {
"event-a": {
"filename": "M529LKVQ.6S0",
"n_samples": 3328, # 3.0 s rectime + 0.25 s pretrig at 1024 sps
"rectime": 3.0,
},
"event-b": {
"filename": "M529LK5Q.RG0",
"n_samples": 2304, # 2.0 s
"rectime": 2.0,
},
"event-c": {
"filename": "M529LK44.AB0",
"n_samples": 1280, # 1.0 s
"rectime": 1.0,
},
"event-d": {
"filename": "M529LK2V.470",
"n_samples": 1280,
"rectime": 1.0,
},
}
def _fixture_path(event_name):
info = FIXTURES_INFO[event_name]
return os.path.join(FIXTURES, event_name, info["filename"])
# ── Find data start ──────────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_find_data_start_locates_first_block(event_name):
"""The walker auto-detects the first ``10 NN`` tag within the first 20 bytes."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
start = find_data_start(body)
assert 0 <= start < 20, f"expected start in [0, 20), got {start}"
assert body[start] == 0x10
assert body[start + 1] % 4 == 0
assert 0 < body[start + 1] <= 0xFC
def test_find_data_start_preamble_lengths():
"""All 4 events have either a 7-byte (single-shot) or 9-byte (continuous) preamble."""
starts = {}
for name in FIXTURES_INFO:
path = _fixture_path(name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
starts[name] = find_data_start(body)
# Empirically: events a, b have 9-byte preamble; events c, d have 7-byte.
assert starts["event-a"] == 9
assert starts["event-b"] == 9
assert starts["event-c"] == 7
assert starts["event-d"] == 7
# ── Block walker ─────────────────────────────────────────────────────────────
def test_walk_body_empty_returns_empty():
assert walk_body(b"") == []
def test_walk_body_invalid_start_returns_empty():
# Body that does not begin with a recognized tag.
assert walk_body(b"\xff\xff\xff\xff", start=0) == []
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_walk_body_produces_blocks(event_name):
"""The walker should produce a non-empty stream of blocks for every fixture."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
assert len(blocks) > 0
# All blocks have one of the 5 known tag types.
for b in blocks:
assert b.tag_hi in (0x10, 0x20, 0x00, 0x30, 0x40), (
f"unknown tag {b.tag_hi:#04x} at offset {b.offset}"
)
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_walk_body_block_lengths_consistent(event_name):
"""Each block's recorded length matches its on-wire footprint."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
for b in blocks:
# Tag (2 bytes) + payload should equal length.
assert 2 + len(b.data) == b.length, (
f"block at {b.offset} length mismatch: tag(2) + data({len(b.data)}) != length({b.length})"
)
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_walk_body_blocks_contiguous(event_name):
"""Block n+1 starts exactly where block n ends (no gaps, no overlaps)."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
for i in range(1, len(blocks)):
prev = blocks[i - 1]
cur = blocks[i]
assert cur.offset == prev.offset + prev.length, (
f"gap/overlap between block {i-1} (off={prev.offset} len={prev.length}) "
f"and block {i} (off={cur.offset})"
)
# ── Segment splitting ────────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_split_segments_yields_at_least_one(event_name):
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
blocks = walk_body(body)
segments = split_segments(blocks)
assert len(segments) > 0
def test_split_segments_segment_count_at_least_one_per_event():
"""The walker should produce at least one ``40 02`` segment header per event.
Note: the walker currently bails out partway through event-b (still an
open issue the body codec uses block lengths the walker doesn't
handle correctly past offset ~427). The other 3 events walk farther
and have many segment headers.
"""
for name in FIXTURES_INFO:
path = _fixture_path(name)
if not os.path.exists(path):
continue
body = _bw_body(path)
blocks = walk_body(body)
n_40 = sum(1 for b in blocks if b.tag_hi == 0x40)
assert n_40 >= 1, f"{name}: no 40 02 segment header found"
# ── Segment header parsing ───────────────────────────────────────────────────
def test_parse_segment_header_returns_none_for_non_40():
block = WaveformBlock(offset=0, tag_hi=0x10, tag_lo=0x04, data=b"\x00\x00", length=4)
assert parse_segment_header(block) is None
def test_parse_segment_header_decodes_fields():
"""Decode a known 40 02 block to verify field offsets."""
# First segment header from event-c at body offset 235:
# 40 02 00 00 00 00 0a 4b 01 1e 47 00 00 00 02 00 00 01 00 01
payload = bytes.fromhex("00000000 0a4b011e 47000000 02000001 0001".replace(" ", ""))
block = WaveformBlock(
offset=235, tag_hi=0x40, tag_lo=0x02, data=payload, length=20
)
decoded = parse_segment_header(block)
assert decoded is not None
assert decoded["counter"] == 0x47 # uint32 LE
assert decoded["fixed_pattern"] == b"\x02\x00\x00\x01"
assert decoded["anchor_bytes"] == b"\x00\x00\x00\x00"
def test_segment_counter_increments():
"""The 4-byte counter at bytes [8:12] of each 40 02 payload increments by 1."""
path = _fixture_path("event-c")
if not os.path.exists(path):
pytest.skip("fixture missing")
body = _bw_body(path)
blocks = walk_body(body)
headers = [b for b in blocks if b.tag_hi == 0x40 and b.tag_lo == 0x02]
counters = [parse_segment_header(b)["counter"] for b in headers]
assert len(counters) >= 5, "expect at least 5 segments to verify increments"
# First few counters should be strictly monotonic (the BW counter is global,
# incrementing across the whole flash buffer; some events may share counter
# values with the previous event's tail block, so allow non-strict).
for i in range(1, min(8, len(counters))):
assert counters[i] >= counters[i - 1], (
f"counter went backwards: {counters[i-1]}{counters[i]}"
)
# ── decode_waveform_v2: currently a stub ─────────────────────────────────────
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_decode_waveform_v2_returns_none_until_verified(event_name):
"""
The verified per-byte sample decoder is not yet wired up.
This test ensures decode_waveform_v2 returns ``None`` so callers know
to keep using the legacy decoder. When a verified decoder lands,
flip this assertion and add ground-truth tests against the bundled
TXT exports.
"""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
assert decode_waveform_v2(body) is None