codec-re: channel rotation CONFIRMED — full multi-channel decoder works

The segment-channel scoring analyzer (from scratch/next_experiment_skeleton.py)
ran and immediately confirmed the rotation hypothesis:

  SP0 seg 0: best fit Vert  508/508  ✓
  SP0 seg 1: best fit Long  508/508  ✓
  SP0 seg 3: best fit Tran  508/508  ✓  (Tran continuation)
  SP0 seg 5: best fit Long  508/508  ✓
  SP0 seg 9: best fit Long  508/508  ✓
  V70 seg 0: best fit Vert  508/508  ✓
  V70 seg 1: best fit Long  508/508  ✓

Channels rotate Tran → Vert → Long → MicL per 40 02 segment header.

Also discovered the segment header has DOUBLE duty: bytes [14:18] anchor
the NEW segment's channel (2 samples as int16 BE in 16-count units), AND
bytes [0:4] extend the PREVIOUS channel by 2 more samples (2 deltas as
int16 BE).  This is the same "2 anchors + delta stream" structure as the
body preamble for Tran.

decode_waveform_v2 now returns full per-channel sample dicts.
Byte-exact verified ranges:
  V70: Tran 512, Vert 512, Long 512   (all first segments)
  JQ0: Tran 512, Vert 258
  SP0: Long 1536 (all 3 L segments)

Still open: the 30 NN block format (high-amplitude packed deltas) —
appears mid-segment when single-byte deltas can't carry the magnitude.

6 new tests bring the count to 46.  All passing.
This commit is contained in:
Claude
2026-05-12 03:57:38 +00:00
committed by serversdown
parent ae0e17b5dc
commit 07675626dc
6 changed files with 365 additions and 136 deletions
+49 -13
View File
@@ -235,20 +235,51 @@ def test_segment_counter_increments():
@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
def test_decode_waveform_v2_returns_none_until_verified(event_name):
"""
The full per-channel decoder is not yet wired up.
This test ensures decode_waveform_v2 returns ``None`` so callers know
to keep using the legacy decoder. When a verified decoder lands,
flip this assertion and add ground-truth tests against the bundled
TXT exports.
"""
def test_decode_waveform_v2_returns_dict(event_name):
"""decode_waveform_v2 returns a dict with all 4 channels (verified 2026-05-11)."""
path = _fixture_path(event_name)
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
body = _bw_body(path)
assert decode_waveform_v2(body) is None
result = decode_waveform_v2(body)
assert result is not None
assert set(result.keys()) == {"Tran", "Vert", "Long", "MicL"}
# Multi-channel ground-truth fixtures. Each row: (path, channel, n_to_verify).
# These lock in the channel-rotation hypothesis: segments cycle T → V → L → M,
# with each segment header carrying a 2-sample anchor pair (bytes [14:18])
# for THIS segment's channel plus 2 continuation deltas (bytes [0:4]) for
# the PREVIOUS channel.
MULTICHANNEL_FIXTURES = [
# V70 (Mic-heavy, geos all near zero): perfect decode through first segment of each channel.
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Tran", 512),
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Vert", 512),
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Long", 512),
# JQ0 (Vert-heavy): first 512 samples per channel decode byte-exact.
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Tran", 512),
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Vert", 258),
# SP0 (loud all): Long all 3 segments byte-exact (1536 samples).
(os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Long", 1536),
]
@pytest.mark.parametrize("path,channel,n", MULTICHANNEL_FIXTURES)
def test_decode_waveform_v2_channels_match_truth(path, channel, n):
"""Decoded channels match the BW ASCII export byte-exact for the verified ranges."""
if not os.path.exists(path):
pytest.skip(f"fixture missing: {path}")
with open(path, "rb") as f:
body = f.read()[43:-26]
truth = _full_truth_channel(path, channel)
decoded = decode_waveform_v2(body)
assert decoded is not None
pred = decoded[channel]
assert len(pred) >= n, f"only {len(pred)} samples decoded, expected ≥ {n}"
for i in range(n):
assert pred[i] == truth[i], (
f"{os.path.basename(path)} {channel}[{i}]: pred={pred[i]} truth={truth[i]}"
)
# ── decode_tran_initial: confirmed correct against ground truth ──────────────
@@ -288,11 +319,16 @@ TRAN_INITIAL_FIXTURES = [
def _full_truth(path):
"""Load the BW ASCII truth for an event."""
"""Load Tran samples (in 16-count units) from the BW ASCII export."""
return _full_truth_channel(path, "Tran")
def _full_truth_channel(path, channel):
"""Load one channel's samples (in 16-count units) from the BW ASCII export."""
import re
col_idx = {"Tran": 0, "Vert": 1, "Long": 2, "MicL": 3}[channel]
with open(path + ".TXT", "r", encoding="utf-8", errors="replace") as f:
lines = f.read().splitlines()
# Find columns header.
header_idx = None
for i, line in enumerate(lines):
if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
@@ -306,7 +342,7 @@ def _full_truth(path):
if len(parts) < 4:
continue
try:
out.append(round(float(parts[0]) * 200))
out.append(round(float(parts[col_idx]) * 200))
except ValueError:
continue
return out