codec-re: channel rotation CONFIRMED — full multi-channel decoder works

The segment-channel scoring analyzer (from scratch/next_experiment_skeleton.py) ran and immediately confirmed the rotation hypothesis: SP0 seg 0: best fit Vert 508/508 ✓ SP0 seg 1: best fit Long 508/508 ✓ SP0 seg 3: best fit Tran 508/508 ✓ (Tran continuation) SP0 seg 5: best fit Long 508/508 ✓ SP0 seg 9: best fit Long 508/508 ✓ V70 seg 0: best fit Vert 508/508 ✓ V70 seg 1: best fit Long 508/508 ✓ Channels rotate Tran → Vert → Long → MicL per 40 02 segment header. Also discovered the segment header has DOUBLE duty: bytes [14:18] anchor the NEW segment's channel (2 samples as int16 BE in 16-count units), AND bytes [0:4] extend the PREVIOUS channel by 2 more samples (2 deltas as int16 BE). This is the same "2 anchors + delta stream" structure as the body preamble for Tran. decode_waveform_v2 now returns full per-channel sample dicts. Byte-exact verified ranges: V70: Tran 512, Vert 512, Long 512 (all first segments) JQ0: Tran 512, Vert 258 SP0: Long 1536 (all 3 L segments) Still open: the 30 NN block format (high-amplitude packed deltas) — appears mid-segment when single-byte deltas can't carry the magnitude. 6 new tests bring the count to 46. All passing.
2026-05-12 03:57:38 +00:00
parent ae0e17b5dc
commit 07675626dc
6 changed files with 365 additions and 136 deletions
@@ -235,20 +235,51 @@ def test_segment_counter_increments():


@pytest.mark.parametrize("event_name", list(FIXTURES_INFO.keys()))
-def test_decode_waveform_v2_returns_none_until_verified(event_name):
-    """
-    The full per-channel decoder is not yet wired up.
-
-    This test ensures decode_waveform_v2 returns ``None`` so callers know
-    to keep using the legacy decoder.  When a verified decoder lands,
-    flip this assertion and add ground-truth tests against the bundled
-    TXT exports.
-    """
+def test_decode_waveform_v2_returns_dict(event_name):
+    """decode_waveform_v2 returns a dict with all 4 channels (verified 2026-05-11)."""
    path = _fixture_path(event_name)
    if not os.path.exists(path):
        pytest.skip(f"fixture missing: {path}")
    body = _bw_body(path)
-    assert decode_waveform_v2(body) is None
+    result = decode_waveform_v2(body)
+    assert result is not None
+    assert set(result.keys()) == {"Tran", "Vert", "Long", "MicL"}
+
+
+# Multi-channel ground-truth fixtures.  Each row: (path, channel, n_to_verify).
+# These lock in the channel-rotation hypothesis: segments cycle T → V → L → M,
+# with each segment header carrying a 2-sample anchor pair (bytes [14:18])
+# for THIS segment's channel plus 2 continuation deltas (bytes [0:4]) for
+# the PREVIOUS channel.
+MULTICHANNEL_FIXTURES = [
+    # V70 (Mic-heavy, geos all near zero): perfect decode through first segment of each channel.
+    (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Tran", 512),
+    (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Vert", 512),
+    (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"), "Long", 512),
+    # JQ0 (Vert-heavy): first 512 samples per channel decode byte-exact.
+    (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Tran", 512),
+    (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"), "Vert", 258),
+    # SP0 (loud all): Long all 3 segments byte-exact (1536 samples).
+    (os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1A.SP0"), "Long", 1536),
+]
+
+
+@pytest.mark.parametrize("path,channel,n", MULTICHANNEL_FIXTURES)
+def test_decode_waveform_v2_channels_match_truth(path, channel, n):
+    """Decoded channels match the BW ASCII export byte-exact for the verified ranges."""
+    if not os.path.exists(path):
+        pytest.skip(f"fixture missing: {path}")
+    with open(path, "rb") as f:
+        body = f.read()[43:-26]
+    truth = _full_truth_channel(path, channel)
+    decoded = decode_waveform_v2(body)
+    assert decoded is not None
+    pred = decoded[channel]
+    assert len(pred) >= n, f"only {len(pred)} samples decoded, expected ≥ {n}"
+    for i in range(n):
+        assert pred[i] == truth[i], (
+            f"{os.path.basename(path)} {channel}[{i}]: pred={pred[i]} truth={truth[i]}"
+        )


 # ── decode_tran_initial: confirmed correct against ground truth ──────────────
@@ -288,11 +319,16 @@ TRAN_INITIAL_FIXTURES = [


 def _full_truth(path):
-    """Load the BW ASCII truth for an event."""
+    """Load Tran samples (in 16-count units) from the BW ASCII export."""
+    return _full_truth_channel(path, "Tran")
+
+
+def _full_truth_channel(path, channel):
+    """Load one channel's samples (in 16-count units) from the BW ASCII export."""
    import re
+    col_idx = {"Tran": 0, "Vert": 1, "Long": 2, "MicL": 3}[channel]
    with open(path + ".TXT", "r", encoding="utf-8", errors="replace") as f:
        lines = f.read().splitlines()
-    # Find columns header.
    header_idx = None
    for i, line in enumerate(lines):
        if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
@@ -306,7 +342,7 @@ def _full_truth(path):
        if len(parts) < 4:
            continue
        try:
-            out.append(round(float(parts[0]) * 200))
+            out.append(round(float(parts[col_idx]) * 200))
        except ValueError:
            continue
    return out