merge full s3 codec decoded #23

Merged
serversdown merged 18 commits from codec-re into main 2026-05-20 13:45:33 -04:00
6 changed files with 365 additions and 136 deletions
Showing only changes of commit 07675626dc - Show all commits
+34 -29
View File
@@ -86,44 +86,49 @@ is actually a tagged-block stream with a custom delta+RLE codec.
- **Block framing** — 5 tag types (`10 NN`, `20 NN`, `00 NN`, `30 NN`, - **Block framing** — 5 tag types (`10 NN`, `20 NN`, `00 NN`, `30 NN`,
`40 02`) with confirmed lengths. Implementation: `walk_body()` in `40 02`) with confirmed lengths. Implementation: `walk_body()` in
`minimateplus/waveform_codec.py`. `minimateplus/waveform_codec.py`.
- **Tran channel segment 0** — preamble bytes [3:7] = `Tran[0]`, `Tran[1]` - **Per-channel codec** — preamble bytes [3:7] = `Tran[0]`, `Tran[1]`
as int16 BE in **16-count units** (LSB = 0.005 in/s). Then `10 NN` as int16 BE in **16-count units** (LSB = 0.005 in/s). Then `10 NN`
(4-bit nibble deltas), `20 NN` (int8 deltas), and `00 NN` (RLE zero (4-bit nibble deltas), `20 NN` (int8 deltas), and `00 NN` (RLE zero
deltas) carry Tran deltas from sample 2 onward. Verified byte-perfect deltas) carry per-channel deltas from sample 2 onward.
across 4 of 5 fixture events (510 samples each). Implementation: - **Channel rotation** — segments cycle **Tran → Vert → Long → MicL**
`decode_tran_initial()`. per `40 02` segment header. Each segment carries ~512 sample-sets of
- **Segment header** — `40 02` is a 20-byte block. Payload bytes [0:2] ONE channel. The initial body (before the first `40 02`) is the
are the T_delta at the start of the new segment (int16 BE). Bytes implicit Tran segment.
[6:8] are the byte length to the next segment header. Bytes [8:12] - **Segment header layout (20 bytes)** —
are a monotonic uint32 LE counter. Bytes [12:14] are constant `02 00`. bytes [0:2] = previous-channel continuation delta #1 (int16 BE);
bytes [2:4] = previous-channel continuation delta #2;
bytes [6:8] = byte length to next header 2;
bytes [8:12] = monotonic uint32 LE counter;
bytes [12:14] = constant `02 00`;
bytes [14:16] = THIS segment's channel sample 0 anchor (int16 BE);
bytes [16:18] = THIS segment's channel sample 1 anchor.
- **`decode_waveform_v2()`** returns full per-channel sample dicts.
Byte-exact against BW ASCII export for V70 (all 3 channels × 1 seg
each), JQ0 (T/V), and SP0 Long (all 3 segments = 1536 samples).
### What's NOT solved ### What's NOT solved
- **Tran past segment 0** — multi-segment Tran continuation has been - **The `30 NN` block content** — these blocks appear in high-amplitude
attempted but every hypothesis tested breaks at sample ~512. Likely regions where sample-set deltas exceed what int8 in `20 NN` can
channels rotate across segments (e.g. segment 0 = Tran, segment 1 = Vert, express. Probably a packed multi-byte delta format. Decoder
…) but this is unverified. currently steps over them, which breaks the cumulative for samples
- **Vert / Long / Mic channels** — no per-channel decoder yet. These inside or after a `30 NN` block. See
almost certainly live in later segments but the segment-to-channel `docs/waveform_codec_re_status.md` for the analysis so far.
mapping is open. - **MicL channel conversion to dB(L)** — anchor pair and delta decoding
- **The `30 NN` block content** — appears in loud-from-start events works in raw ADC units, but BW's ASCII export shows mic in dB(L) with
(SS0, SV0) and breaks the simple Tran walk there. Probably a channel- ~6 dB quantization steps. Need to figure out the ADC→dB mapping
switch or alternative-encoding marker for high-amplitude regions. (likely `dB = 20*log10(|counts|) + offset` or similar).
### Next experiment ### Next experiment
**Don't hero-code the full decoder.** Build a small analysis tool — a The segment-channel scoring analyzer already ran and confirmed the
segment-channel scoring analyzer. For each segment of each fixture channel-rotation hypothesis. The next open piece is the **`30 NN`
event, run the segment-0 Tran block-walk + RLE decode and score the block format** — these encode large-amplitude deltas the regular
cumulative trajectory against the BW ASCII truth for each of {Tran, `20 NN` int8 channel can't fit. Initial 12-bit packing hypothesis
Vert, Long, MicL} over that segment's sample range, trying different matched 2 of 4 deltas in one test case; needs more careful analysis.
anchor-bytes candidates from the segment header. The winning
(channel, anchor-location) combination for each segment reveals
whether segments rotate channels and which header bytes encode the
per-segment channel anchors.
See `docs/waveform_codec_re_status.md` for the full specification of See `docs/waveform_codec_re_status.md` for the data and current
the next experiment. guesses.
### Production-code status ### Production-code status
+32
View File
@@ -0,0 +1,32 @@
"""Verify decode_waveform_v2 against BW ASCII truth for all fixtures."""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import _parse_txt
from minimateplus.waveform_codec import decode_waveform_v2
def main():
for stem in ("M529LL1A.SP0", "M529LL1A.SS0", "M529LL1A.SV0",
"M529LL1L.JQ0", "M529LL1L.V70"):
path = f"tests/fixtures/5-11-26/{stem}"
with open(path, "rb") as f:
body = f.read()[43:-26]
_, samples = _parse_txt(path + ".TXT")
decoded = decode_waveform_v2(body)
if decoded is None:
print(f"{stem}: decoder returned None")
continue
print(f"\n=== {stem} ===")
for ch in ("Tran", "Vert", "Long"):
truth = [round(v * 200) for v in samples[ch]]
pred = decoded[ch]
n = min(len(pred), len(truth))
matches = sum(1 for i in range(n) if pred[i] == truth[i])
div = next((i for i in range(n) if pred[i] != truth[i]), -1)
print(f" {ch}: decoded={len(pred):>5} truth={len(truth):>5} "
f"matches={matches:>5}/{n:<5} first div={div}")
if __name__ == "__main__":
main()
+97 -57
View File
@@ -1,4 +1,4 @@
# Waveform body codec — current working status (2026-05-11) # Waveform body codec — current working status (2026-05-11, late)
This is the **clean working note** for the body-codec reverse-engineering This is the **clean working note** for the body-codec reverse-engineering
effort. It supersedes scattered claims elsewhere when they conflict. effort. It supersedes scattered claims elsewhere when they conflict.
@@ -9,10 +9,31 @@ authoritative implementation lives in `minimateplus/waveform_codec.py`.
## TL;DR ## TL;DR
The Blastware waveform-file body is a **tagged variable-length block The Blastware waveform-file body is a **tagged variable-length block
stream**, NOT raw int16 LE samples. Block framing is solved. Tran stream**, NOT raw int16 LE samples. Block framing is solved. The
channel segment-0 decoding is solved (byte-exact vs BW's ASCII export **channel-rotation hypothesis is CONFIRMED** — segments cycle
across all 5 high-amplitude fixture events). Multi-segment continuation Tran → Vert → Long → MicL → Tran → … with each segment carrying ~512
and the Vert / Long / MicL channel decoders are still open. samples of one channel. Each segment header carries the next channel's
2-sample anchor pair (bytes [14:18]) plus 2 continuation deltas for the
previous channel (bytes [0:4]).
**What decodes byte-exact today (verified against BW ASCII export):**
| Event | Channel | Samples verified |
|---|---|---|
| V70 (Mic-heavy) | Tran | 512 (1 segment) |
| V70 | Vert | 512 |
| V70 | Long | 512 |
| JQ0 (Vert-heavy) | Tran | 512 |
| JQ0 | Vert | 258 |
| SP0 (loud all) | Long | **1536 (all 3 L segments)** |
| SP0 | Tran | 1350 / 2044 produced |
| SP0 | Vert | 650 / 1526 produced |
**What's still open:** the `30 NN` block format. These blocks appear in
high-amplitude regions (deltas exceeding what int8 can express). My
decoder currently steps over them, which is fine for quiet stretches but
breaks the cumulative when a `30 NN` carries information for samples we
need. Cracking this is the last major piece.
**Production code in `minimateplus/client.py:_decode_a5_waveform` still **Production code in `minimateplus/client.py:_decode_a5_waveform` still
uses the broken legacy int16 LE decoder.** Sample arrays it writes to uses the broken legacy int16 LE decoder.** Sample arrays it writes to
@@ -69,78 +90,97 @@ Verified byte-exact:
Implementation: `decode_tran_initial()`. Implementation: `decode_tran_initial()`.
### Segment header (`40 02`, 20 bytes total) ### Segment header (`40 02`, 20 bytes total) — REWRITTEN 2026-05-11
| Payload offset | Field | Status | | Payload offset | Field | Status |
|---|---|---| |---|---|---|
| [0:2] | T_delta at first sample of new segment (int16 BE) | ✅ confirmed | | [0:2] | Previous-channel delta — 1st extension sample (int16 BE) | ✅ confirmed |
| [2:4] | Likely T_delta at sample seg_start+1 | 🟡 likely | | [2:4] | Previous-channel delta — 2nd extension sample (int16 BE) | ✅ confirmed |
| [4:6] | Unknown (possibly checksum) | ❓ open | | [4:6] | Unknown (likely checksum) | ❓ open |
| [6:8] | Byte length to next segment header 2 (uint16 BE) | ✅ confirmed | | [6:8] | Byte length to next segment header 2 (uint16 BE) | ✅ confirmed |
| [8:12] | Monotonic uint32 LE counter (starts ~0x47) | ✅ confirmed | | [8:12] | Monotonic uint32 LE counter (starts ~0x47) | ✅ confirmed |
| [12:14] | Constant `02 00` | ✅ confirmed | | [12:14] | Constant `02 00` | ✅ confirmed |
| [14:18] | Unknown 4-byte field | ❓ open | | [14:16] | THIS segment's channel — sample 0 anchor (int16 BE, 16-count units) | ✅ confirmed |
| [16:18] | THIS segment's channel — sample 1 anchor (int16 BE, 16-count units) | ✅ confirmed |
## What's still open **Key insight (2026-05-11 late):** every segment carries 510 main
samples (2 anchor + 508 deltas) PLUS 2 continuation samples that live
in the NEXT segment header. So each channel-segment effectively spans
512 sample-sets. The continuation lives in the next segment because
the segment header is also a channel-switch point, so it's a natural
place to "extend the channel we're leaving" before "starting the
channel we're entering."
1. **Multi-segment Tran continuation.** After segment 0, applying This is the same structure as the body preamble (which carries
segment 1's blocks as Tran continuation diverges from truth by Tran[0] and Tran[1] as int16 BE) — every channel uses the same
sample ~512. Block structure is identical to segment 0 and the "2 anchors + delta stream" layout.
per-segment delta budget matches the segment size — but the per-
sample trajectory is wrong.
2. **Vert / Long / MicL channel decoders.** No verified decoder for ## Channel rotation — VERIFIED 2026-05-11
any non-Tran channel.
3. **`30 NN` block content.** Only appears in loud-from-start events.
Probably a channel-switch or alternative-encoding marker for high-
amplitude regions. Walker steps over it without decoding.
## Strongest unverified hypothesis
Segments rotate channels:
``` ```
segment 0 → Tran samples 0..509 (initial body) → Tran samples 0..509 (preamble + delta blocks)
segment 1 → Vert samples 0..507 segment 0 hdr ext+anchor → Vert samples 0..511 ← anchor in hdr [14:18]
segment 2 → Long samples 0..507 segment 1 hdr ext+anchor → Long samples 0..511
segment 3 → Mic samples 0..507 segment 2 hdr ext+anchor → Mic samples 0..511
segment 4 → Tran samples 510..N (continuation) segment 3 hdr ext+anchor → Tran samples 510..1021 (continuation)
segment 4 hdr ext+anchor → Vert samples 512..1023
segment 5 hdr ext+anchor → Long samples 512..1023
segment 6 hdr ext+anchor → Mic samples 512..1023
segment 7 hdr ext+anchor → Tran samples 1022..1533
... ...
``` ```
This would explain: Implementation: `decode_waveform_v2()` returns
- Why segment-0 = Tran works perfectly. `{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}` with
- Why segment 1 has the same block structure but applying it as Tran each channel's samples in 16-count units. All verified ranges in the
continuation gives wrong values. TL;DR table above are now locked in by pytest regression tests.
- Why the per-segment delta budget matches the segment size for a
*single* channel (508 deltas per segment, not 4 × 508).
Not yet verified because the per-channel anchor at segment-start isn't ## What's still open
identified in the segment header. Bytes [4:6] and [14:18] of the
header are the prime candidates.
## Next experiment — segment-channel scoring analyzer 1. **`30 NN` block content.** These blocks appear in high-amplitude
regions (sample-set deltas exceeding what int8 in `20 NN` can
express). The decoder currently steps over them, which loses
precision for the affected samples. Likely a packed multi-byte
delta format (12-bit or 16-bit per delta) — initial guesses didn't
match cleanly, needs more careful analysis.
Don't try to hero-code the full decoder. Instead, build a small 2. **MicL decoding.** The mic channel's anchor pair appears in the
analysis tool that: third segment of each rotation cycle in the same format as the
geo channels, but the BW ASCII export shows mic in dB(L) (~6 dB
quantization steps), so direct integer comparison against ADC
units doesn't work. Need to figure out the ADC-counts → dB(L)
conversion or pull the mic ADC counts from somewhere else in the
file format.
1. For each segment in every fixture event, runs the segment-0 Tran 3. **Walker fix for event-b.** The original quiet bundle's event-b
decoder (block-walk + RLE) and produces a cumulative trajectory still bails out partway through. Lower priority since the other
of 508 deltas. 7 events walk cleanly.
2. Scores that trajectory against the BW ASCII truth for *each* of
{Tran, Vert, Long, MicL} over the segment's sample range, starting
from different anchor-byte candidates from the segment header.
3. Reports which (channel, anchor-bytes-location) combination produces
the lowest error for each segment.
If the rotation hypothesis is right, segment 0 should clearly score ## Next experiment — crack the `30 NN` block
best against Tran, segment 1 against Vert, etc. The winning
anchor-bytes-location will reveal which segment-header bytes encode
the per-segment channel anchors.
If the rotation hypothesis is *not* right, the scorer will at least The scoring analyzer in `scratch/next_experiment_skeleton.py` already
narrow down what segment 1 actually carries. ran and confirmed the channel-rotation hypothesis (the result that
unlocked the full multi-channel decoder). The next open piece is the
`30 NN` block format.
Approach:
1. Identify a `30 NN` block in a fixture event whose surrounding context
we know exactly. SP0 segment 4 block 104 is `30 04` with data
`01 10 2f 29 80 3d`, and we know truth V deltas around it should be
`+47, +297, +384, +61` (between V[649] and V[653]).
2. Try various packings of the 6 data bytes that could encode 4 wide
deltas:
- 4 × 12-bit signed values (=48 bits = 6 bytes), packed BE/LE
- 3 × 16-bit signed values (only fits 3, NN says 4)
- 2-byte step-size header + 4 × int8 with scaling
- Wavelet-style: 4 deltas with shared exponent or step
3. Initial brute-force found `+47` and `+61` in positions 1 and 3 of
a 12-bit BE packing, but `+297` and `+384` didn't fit cleanly.
Worth re-trying with more permutations.
Once cracked, the `30 NN` decoder slots into `decode_waveform_v2` and
the multi-channel decode extends past the high-amplitude regions.
## Test fixtures ## Test fixtures
+89 -12
View File
@@ -350,17 +350,94 @@ def decode_waveform_v2(body: bytes) -> Optional[dict]:
""" """
Decode the body into per-channel sample arrays. Decode the body into per-channel sample arrays.
Returns ``None`` because the full multi-channel decoder is not yet Status (2026-05-11 evening channel-rotation hypothesis CONFIRMED):
wired up. Tran is partially solved see :func:`decode_tran_initial` segments rotate channels in fixed order **Tran Vert Long MicL**.
for the initial portion (verified against ground-truth BW exports). Each channel-segment carries a 2-sample anchor pair in segment-header
bytes [14:18] (or in the body preamble for the initial Tran segment)
plus a stream of delta blocks for samples 2 onward.
Status (2026-05-11): Returns ``{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}``
- Tran[0:N] correctly decoded by ``decode_tran_initial`` for the with each channel's decoded samples in 16-count units (LSB = 0.005
first N samples of every fixture (where N = 22 / 42 / 46 in/s at Normal range). Returns ``None`` if the body cannot be
depending on event). parsed.
- Subsequent Tran samples + all Vert / Long / MicL samples: open.
The block stream after the first data block likely interleaves
channels with ``30 NN`` channel-switch markers, but the exact
switching rule is still under investigation.
""" """
return None if len(body) < 7 or body[0:3] != b"\x00\x02\x00":
return None
channels = ["Tran", "Vert", "Long", "MicL"]
out: dict = {ch: [] for ch in channels}
# Initial Tran segment: preamble anchor pair + delta blocks before first 40 02.
t0 = int.from_bytes(body[3:5], "big", signed=True)
t1 = int.from_bytes(body[5:7], "big", signed=True)
out["Tran"].extend([t0, t1])
start = find_data_start(body)
if start < 0:
return out
blocks = walk_body(body, start)
seg_idx = [i for i, b in enumerate(blocks) if b.tag_hi == 0x40]
def apply_blocks(channel: str, anchor: int,
block_start: int, block_end: int) -> int:
"""Apply delta blocks [block_start, block_end) to *channel*'s sample
list, starting from *anchor*. Returns the final cumulative value."""
cur = anchor
for bi in range(block_start, block_end):
blk = blocks[bi]
if blk.tag_hi == 0x10:
for byte in blk.data:
for nib in ((byte >> 4) & 0xF, byte & 0xF):
cur += _s4(nib)
out[channel].append(cur)
elif blk.tag_hi == 0x20:
for byte in blk.data:
cur += _i8(byte)
out[channel].append(cur)
elif blk.tag_hi == 0x00:
for _ in range(blk.tag_lo):
out[channel].append(cur)
# 30 NN: unknown content; skip.
# 40 02: should not occur in segment data.
return cur
# Initial Tran segment: deltas from start of body up to first 40 02 (or end).
first_seg = seg_idx[0] if seg_idx else len(blocks)
last_tran_value = apply_blocks("Tran", t1, 0, first_seg)
# Subsequent segments rotate channels. Each segment header carries:
# bytes [0:2] and [2:4] = 2 deltas extending the PREVIOUS channel
# bytes [14:16] and [16:18] = anchor pair for THIS segment's channel
#
# Rotation: V, L, M, T, V, L, M, T, ... (initial Tran segment is the
# implicit T in the cycle.)
rotation = ["Vert", "Long", "MicL", "Tran"]
# Track each channel's "running cumulative value" so we can apply the
# previous-channel extension deltas at every segment boundary.
last_value = {"Tran": last_tran_value, "Vert": None, "Long": None, "MicL": None}
for k, hi in enumerate(seg_idx):
channel = rotation[k % 4]
prev_channel = "Tran" if k == 0 else rotation[(k - 1) % 4]
header = blocks[hi]
if len(header.data) < 18:
continue
# Extend the PREVIOUS channel by 2 more samples (deltas in bytes [0:4]).
prev_d0 = int.from_bytes(header.data[0:2], "big", signed=True)
prev_d1 = int.from_bytes(header.data[2:4], "big", signed=True)
if last_value[prev_channel] is not None:
v = last_value[prev_channel] + prev_d0
out[prev_channel].append(v)
v += prev_d1
out[prev_channel].append(v)
last_value[prev_channel] = v
# Anchor pair for THIS segment's channel.
c0 = int.from_bytes(header.data[14:16], "big", signed=True)
c1 = int.from_bytes(header.data[16:18], "big", signed=True)
out[channel].extend([c0, c1])
# Apply delta blocks for this segment.
next_hi = seg_idx[k + 1] if k + 1 < len(seg_idx) else len(blocks)
last_value[channel] = apply_blocks(channel, c1, hi + 1, next_hi)
return out
+64 -25
View File
@@ -263,29 +263,62 @@ def score_against_truth(
def score_segment_against_all_channels( def score_segment_against_all_channels(
event: FixtureEvent, event: FixtureEvent,
segment_index: int, segment_index: int,
) -> List[Tuple[str, str, int, int, int]]: ) -> List[Tuple[str, int, int, int]]:
"""For segment *segment_index* of *event*, try decoding it as each channel """For segment *segment_index* of *event*, find the best (channel, start_sample)
with each candidate anchor source. fit.
Returns rows of (channel_name, anchor_source_label, anchor_value, n_matches, n_compared) For each candidate channel C and each candidate starting truth-sample index s,
sorted by match count descending. we pick the anchor that makes the FIRST decoded value match truth[C][s], then
score the remaining decoded values against truth[C][s+1 : s+N].
Anchor source candidates to try: Returns rows of (channel_name, start_sample, n_matches, n_compared)
- "header[0:2]" int16 BE from segment header bytes [0:2] sorted by match-count descending.
- "header[2:4]" int16 BE from segment header bytes [2:4]
- "header[4:6]" int16 BE from segment header bytes [4:6]
- "header[14:16]" int16 BE from segment header bytes [14:16]
- "header[16:18]" int16 BE from segment header bytes [16:18]
- "channel[0]" truth[channel][0] (= "this segment starts at sample 0 of this channel")
- "channel[prev]" truth[channel][segment_sample_starts[segment_index] - 1]
(= "this segment continues from sample N-1 of this channel")
For each combination of (channel, anchor source, "starts at sample X of channel"),
decode the segment and score against truth.
TODO: implement this it's the heart of the experiment.
""" """
raise NotImplementedError("This is the next experiment to run.") # Block range of this segment: from the segment header (inclusive) up to
# the next segment header (exclusive), or end-of-blocks.
seg_header_idx = event.segment_starts[segment_index]
next_header_idx = (
event.segment_starts[segment_index + 1]
if segment_index + 1 < len(event.segment_starts)
else len(event.blocks)
)
# Decode the segment's data blocks (skip the segment-header block itself).
# Use anchor=0 — we'll re-anchor when scoring against each channel.
deltas_trajectory = decode_segment_as_channel(
event.blocks, seg_header_idx + 1, next_header_idx, anchor=0
)
if not deltas_trajectory:
return []
n = len(deltas_trajectory)
results = []
for ch in ("Tran", "Vert", "Long"):
truth = event.truth.get(ch)
if not truth or len(truth) < n + 1:
continue
# For each candidate starting sample s in truth, check if applying
# the deltas starting from truth[s] reproduces truth[s+1:s+n+1].
best = (0, -1)
for s in range(len(truth) - n):
anchor = truth[s]
offset = anchor - deltas_trajectory[0] + truth[s + 1] - anchor
# Recompute: trajectory[i] = anchor + cumulative_delta_through_i
# but we already have deltas_trajectory computed from anchor=0,
# so trajectory_relative[i] = anchor + deltas_trajectory[i].
matches = 0
for i in range(n):
if truth[s + i + 1] == anchor + deltas_trajectory[i]:
matches += 1
# Note: we could break early on first mismatch for "matches start",
# but counting total matches gives a more robust score.
if matches > best[0]:
best = (matches, s)
results.append((ch, best[1], best[0], n))
results.sort(key=lambda r: -r[2])
return results
# ── Driver ────────────────────────────────────────────────────────────────── # ── Driver ──────────────────────────────────────────────────────────────────
@@ -310,11 +343,17 @@ def main():
for si, sample_start in enumerate(event.segment_sample_starts): for si, sample_start in enumerate(event.segment_sample_starts):
print(f" seg {si}: sample {sample_start}") print(f" seg {si}: sample {sample_start}")
# When score_segment_against_all_channels is implemented: for si in range(len(event.segment_starts)):
# for si in range(len(event.segment_starts)): results = score_segment_against_all_channels(event, si)
# results = score_segment_against_all_channels(event, si) if not results:
# best = results[0] print(f" seg {si}: (no scorable data)")
# print(f" seg {si}: best fit = {best}") continue
tag = "" if results[0][2] / max(results[0][3], 1) > 0.9 else " "
top = results[0]
print(f" seg {si}: best fit {tag} = {top[0]:<5} "
f"starting at sample {top[1]:>5}, {top[2]:>4}/{top[3]:<4} match"
+ (f" (next: {results[1][0]} @{results[1][1]} {results[1][2]}/{results[1][3]})"
if len(results) > 1 else ""))
if __name__ == "__main__": if __name__ == "__main__":
+49 -13
View File
@@ -235,20 +235,51 @@ def test_segment_counter_increments():
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys())) @pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_decode_waveform_v2_returns_none_until_verified(event_name): def test_decode_waveform_v2_returns_dict(event_name):
""" """decode_waveform_v2 returns a dict with all 4 channels (verified 2026-05-11)."""
The full per-channel decoder is not yet wired up.
This test ensures decode_waveform_v2 returns ``None`` so callers know
to keep using the legacy decoder. When a verified decoder lands,
flip this assertion and add ground-truth tests against the bundled
TXT exports.
"""
path = _fixture_path(event_name) path = _fixture_path(event_name)
if not os.path.exists(path): if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}") pytest.skip(f"fixture missing: {path}")
body = _bw_body(path) body = _bw_body(path)
assert decode_waveform_v2(body) is None result = decode_waveform_v2(body)
assert result is not None
assert set(result.keys()) == {"Tran", "Vert", "Long", "MicL"}
# Multi-channel ground-truth fixtures. Each row: (path, channel, n_to_verify).
# These lock in the channel-rotation hypothesis: segments cycle T → V → L → M,
# with each segment header carrying a 2-sample anchor pair (bytes [14:18])
# for THIS segment's channel plus 2 continuation deltas (bytes [0:4]) for
# the PREVIOUS channel.
MULTICHANNEL_FIXTURES = [
# V70 (Mic-heavy, geos all near zero): perfect decode through first segment of each channel.
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Tran", 512),
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Vert", 512),
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Long", 512),
# JQ0 (Vert-heavy): first 512 samples per channel decode byte-exact.
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Tran", 512),
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Vert", 258),
# SP0 (loud all): Long all 3 segments byte-exact (1536 samples).
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Long", 1536),
]
@pytest.mark.parametrize("path,channel,n", MULTICHANNEL_FIXTURES)
def test_decode_waveform_v2_channels_match_truth(path, channel, n):
"""Decoded channels match the BW ASCII export byte-exact for the verified ranges."""
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
with open(path, "rb") as f:
body = f.read()[43:-26]
truth = _full_truth_channel(path, channel)
decoded = decode_waveform_v2(body)
assert decoded is not None
pred = decoded[channel]
assert len(pred) >= n, f"only {len(pred)} samples decoded, expected ≥ {n}"
for i in range(n):
assert pred[i] == truth[i], (
f"{os.path.basename(path)} {channel}[{i}]: pred={pred[i]} truth={truth[i]}"
)
# ── decode_tran_initial: confirmed correct against ground truth ────────────── # ── decode_tran_initial: confirmed correct against ground truth ──────────────
@@ -288,11 +319,16 @@ TRAN_INITIAL_FIXTURES = [
def _full_truth(path): def _full_truth(path):
"""Load the BW ASCII truth for an event.""" """Load Tran samples (in 16-count units) from the BW ASCII export."""
return _full_truth_channel(path, "Tran")
def _full_truth_channel(path, channel):
"""Load one channel's samples (in 16-count units) from the BW ASCII export."""
import re import re
col_idx = {"Tran": 0, "Vert": 1, "Long": 2, "MicL": 3}[channel]
with open(path + ".TXT", "r", encoding="utf-8", errors="replace") as f: with open(path + ".TXT", "r", encoding="utf-8", errors="replace") as f:
lines = f.read().splitlines() lines = f.read().splitlines()
# Find columns header.
header_idx = None header_idx = None
for i, line in enumerate(lines): for i, line in enumerate(lines):
if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line: if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
@@ -306,7 +342,7 @@ def _full_truth(path):
if len(parts) < 4: if len(parts) < 4:
continue continue
try: try:
out.append(round(float(parts[0]) * 200)) out.append(round(float(parts[col_idx]) * 200))
except ValueError: except ValueError:
continue continue
return out return out