codec-re: 00 NN is RLE; full Tran segment-0 decode (4 of 5 events)

User uploaded a Vert-heavy event (JQ0) and a Mic-heavy event (V70). Those two were exactly what was needed to crack the next piece: - 00 NN block = run-length-encoded zero deltas in the current channel. Append NN copies of the current cumulative value (no change). - find_data_start now recognizes 00 NN as a valid first tag (some events begin with a leading 00 NN RLE block). - decode_tran_initial now decodes the FULL segment 0 (not just the first data block). Results across 5 fixture events: - M529LL1A.SP0 (loud-all-channels) : 510 / 510 ✓ - M529LL1L.JQ0 (Vert-heavy) : 510 / 510 ✓ - M529LL1L.V70 (Mic-heavy) : 510 / 510 ✓ - M529LL1A.SV0 (loud-from-start) : 58 / 58 ✓ - M529LL1A.SS0 (loud-from-start) : 42 / 502 (stops at first 30 04) The 30 04 block (only seen in loud-from-start events) hasn't been decoded yet — likely a channel-switch marker for the high-amplitude regime. Also discovered: segment header (40 02) payload bytes [0:2] = T_delta at first sample of new segment, [6:8] = byte length to next segment. Multi-segment Tran decoding still diverges after sample 512 because the per-segment channel ordering after the header is unknown. Tests: 40 pass (up from 36). Files: - minimateplus/waveform_codec.py: find_data_start fix, RLE handling, full segment-0 decode in decode_tran_initial - tests/test_waveform_codec.py: synthetic RLE test, full segment 0 tests for JQ0 and V70 - tests/fixtures/5-11-26/: M529LL1L.JQ0, M529LL1L.V70 + TXT exports - docs/instantel_protocol_reference.md §7.6.1: RLE + segment-header docs
2026-05-11 22:29:07 +00:00
parent 6ac126e05c
commit a0c9a482c7
10 changed files with 7195 additions and 62 deletions
@@ -78,25 +78,23 @@ def test_find_data_start_locates_first_block(event_name):
    body = _bw_body(path)
    start = find_data_start(body)
    assert 0 <= start < 20, f"expected start in [0, 20), got {start}"
-    assert body[start] == 0x10
-    assert body[start + 1] % 4 == 0
-    assert 0 < body[start + 1] <= 0xFC
+    assert body[start] in (0x00, 0x10, 0x20, 0x30, 0x40), (
+        f"first tag byte 0x{body[start]:02x} not a recognized block type"
+    )
+    assert body[start + 1] % 4 == 0 or (body[start] == 0x40 and body[start + 1] == 0x02)


-def test_find_data_start_preamble_lengths():
-    """All 4 events have either a 7-byte (single-shot) or 9-byte (continuous) preamble."""
-    starts = {}
+def test_find_data_start_canonical_offset_7():
+    """All events have a 7-byte preamble (3-byte magic + 4-byte Tran anchors)."""
    for name in FIXTURES_INFO:
        path = _fixture_path(name)
        if not os.path.exists(path):
            pytest.skip(f"fixture missing: {path}")
        body = _bw_body(path)
-        starts[name] = find_data_start(body)
-    # Empirically: events a, b have 9-byte preamble; events c, d have 7-byte.
-    assert starts["event-a"] == 9
-    assert starts["event-b"] == 9
-    assert starts["event-c"] == 7
-    assert starts["event-d"] == 7
+        # Sanity: magic
+        assert body[0:3] == b"\x00\x02\x00", f"{name}: bad magic"
+        # First tag at offset 7
+        assert find_data_start(body) == 7, f"{name}: expected start=7"


 # ── Block walker ─────────────────────────────────────────────────────────────
@@ -274,9 +272,46 @@ TRAN_INITIAL_FIXTURES = [
        [-745, -762, -771, -774, -779, -794, -808, -811, -811, -819],
        46,
    ),
+    # Vert-heavy event (T near zero) — segment 0 = 510 samples, all decode correctly.
+    (
+        os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.JQ0"),
+        [0] * 4 + [-1, 0, 0, -1, -1, 0],
+        38,
+    ),
+    # Mic-heavy event (geos all near zero) — segment 0 = 482 samples.
+    (
+        os.path.join(os.path.dirname(__file__), "fixtures", "5-11-26", "M529LL1L.V70"),
+        [0] * 10,
+        6,
+    ),
 ]


+def _full_truth(path):
+    """Load the BW ASCII truth for an event."""
+    import re
+    with open(path + ".TXT", "r", encoding="utf-8", errors="replace") as f:
+        lines = f.read().splitlines()
+    # Find columns header.
+    header_idx = None
+    for i, line in enumerate(lines):
+        if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
+            header_idx = i
+            break
+    if header_idx is None:
+        return None
+    out = []
+    for line in lines[header_idx + 1:]:
+        parts = re.split(r"\s+", line.strip())
+        if len(parts) < 4:
+            continue
+        try:
+            out.append(round(float(parts[0]) * 200))
+        except ValueError:
+            continue
+    return out
+
+
@pytest.mark.parametrize("path,expected,n_required", TRAN_INITIAL_FIXTURES)
 def test_decode_tran_initial_matches_ground_truth(path, expected, n_required):
    """The Tran initial decoder produces values matching the BW ASCII export exactly."""
@@ -312,3 +347,32 @@ def test_decode_tran_initial_synthetic_body():
    decoded = decode_tran_initial(body)
    # T[0]=10, T[1]=20, then deltas (+1, -1, +2, -2) from T[1]=20
    assert decoded == [10, 20, 21, 20, 22, 20]
+
+
+def test_decode_tran_initial_with_rle():
+    """A synthetic body with 00 NN RLE block runs the current Tran value forward."""
+    # T[0]=5, T[1]=5, then 00 08 RLE block = 8 zero deltas → T[2..9] = 5
+    body = b"\x00\x02\x00\x00\x05\x00\x05" + b"\x00\x08"
+    decoded = decode_tran_initial(body)
+    assert decoded == [5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
+
+
+def test_decode_tran_initial_full_segment_silent_events():
+    """For events with near-silent Tran, segment 0 (~482-510 samples) decodes fully."""
+    for path, _, _ in TRAN_INITIAL_FIXTURES[3:]:  # JQ0 (Vert-heavy) and V70 (Mic-heavy)
+        if not os.path.exists(path):
+            pytest.skip(f"fixture missing: {path}")
+        with open(path, "rb") as f:
+            body = f.read()[43:-26]
+        truth = _full_truth(path)
+        decoded = decode_tran_initial(body)
+        assert decoded is not None
+        # The decoder should produce a clean run of samples; check ALL of them
+        # match truth (segment 0 is fully solved for events where T is near zero).
+        n = len(decoded)
+        for i in range(n):
+            assert decoded[i] == truth[i], (
+                f"{os.path.basename(path)}: sample {i}: decoded={decoded[i]} truth={truth[i]}"
+            )
+        # And we should have decoded at least 400 samples (= segment 0 worth).
+        assert n >= 400, f"only {n} samples decoded for {path}"