07675626dc
The segment-channel scoring analyzer (from scratch/next_experiment_skeleton.py) ran and immediately confirmed the rotation hypothesis: SP0 seg 0: best fit Vert 508/508 ✓ SP0 seg 1: best fit Long 508/508 ✓ SP0 seg 3: best fit Tran 508/508 ✓ (Tran continuation) SP0 seg 5: best fit Long 508/508 ✓ SP0 seg 9: best fit Long 508/508 ✓ V70 seg 0: best fit Vert 508/508 ✓ V70 seg 1: best fit Long 508/508 ✓ Channels rotate Tran → Vert → Long → MicL per 40 02 segment header. Also discovered the segment header has DOUBLE duty: bytes [14:18] anchor the NEW segment's channel (2 samples as int16 BE in 16-count units), AND bytes [0:4] extend the PREVIOUS channel by 2 more samples (2 deltas as int16 BE). This is the same "2 anchors + delta stream" structure as the body preamble for Tran. decode_waveform_v2 now returns full per-channel sample dicts. Byte-exact verified ranges: V70: Tran 512, Vert 512, Long 512 (all first segments) JQ0: Tran 512, Vert 258 SP0: Long 1536 (all 3 L segments) Still open: the 30 NN block format (high-amplitude packed deltas) — appears mid-segment when single-byte deltas can't carry the magnitude. 6 new tests bring the count to 46. All passing.
361 lines
15 KiB
Python
361 lines
15 KiB
Python
"""
|
||
scratch/next_experiment_skeleton.py — segment-channel scoring analyzer.
|
||
|
||
This is the suggested NEXT EXPERIMENT for cracking the waveform body codec.
|
||
The goal is to figure out what segments 1+ contain, since segment 0 = Tran
|
||
is solved but multi-segment continuation diverges from truth at sample ~512.
|
||
|
||
────────────────────────────────────────────────────────────────────────────
|
||
The hypothesis to test
|
||
────────────────────────────────────────────────────────────────────────────
|
||
|
||
Segments rotate through channels:
|
||
|
||
segment 0 → Tran samples 0..509
|
||
segment 1 → Vert samples 0..507
|
||
segment 2 → Long samples 0..507
|
||
segment 3 → Mic samples 0..507
|
||
segment 4 → Tran samples 510..N (continuation)
|
||
...
|
||
|
||
This would explain why segment 0 works perfectly (it's pure Tran) and why
|
||
applying segment 1's blocks as Tran continuation gives wrong values
|
||
(it's actually Vert).
|
||
|
||
────────────────────────────────────────────────────────────────────────────
|
||
What the analyzer should do
|
||
────────────────────────────────────────────────────────────────────────────
|
||
|
||
For each segment in each fixture event:
|
||
|
||
1. Run the segment-0 block-walker + RLE decode (the same algorithm that
|
||
``decode_tran_initial`` uses) over the segment's blocks. Start from
|
||
some anchor value and produce a cumulative trajectory of length =
|
||
number-of-deltas-in-segment.
|
||
|
||
2. For each candidate channel C ∈ {Tran, Vert, Long, MicL}:
|
||
For each candidate anchor location in the segment-header payload
|
||
(try [0:2], [2:4], [4:6], [14:16], [16:18] as int16 BE):
|
||
Compare the decoded trajectory against truth[C] starting from
|
||
the segment's first sample index.
|
||
Score = number of matches (or sum of squared errors).
|
||
|
||
3. Report the best (channel, anchor-location) combination per segment.
|
||
|
||
If the rotation hypothesis is correct, you'll see:
|
||
segment 0 → best score for (Tran, preamble bytes [3:5]) ✓ already known
|
||
segment 1 → best score for (Vert, <some-header-byte>)
|
||
segment 2 → best score for (Long, <some-header-byte>)
|
||
segment 3 → best score for (MicL, <some-header-byte>)
|
||
segment 4 → best score for (Tran, continuing from segment 0's end)
|
||
|
||
If the rotation hypothesis is NOT correct, the scorer will at least narrow
|
||
down what segment 1 actually carries. Maybe channels interleave at finer
|
||
granularity, or maybe segments alternate by something other than channel.
|
||
|
||
────────────────────────────────────────────────────────────────────────────
|
||
Why this is a scoring analyzer, not a hand-written decoder
|
||
────────────────────────────────────────────────────────────────────────────
|
||
|
||
Direct hand-coding ("assume segment 1 is Vert with anchor at byte X") gets
|
||
stuck when the assumption is wrong because the failure mode is silent —
|
||
you get plausible-looking-but-wrong samples and have to manually diff
|
||
against truth to debug.
|
||
|
||
The scorer is brute-force but cheap: every fixture event × every segment ×
|
||
4 channels × 5 anchor-byte candidates is only ~hundreds of comparisons.
|
||
The winning combination jumps out by score.
|
||
|
||
────────────────────────────────────────────────────────────────────────────
|
||
Skeleton
|
||
────────────────────────────────────────────────────────────────────────────
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
from dataclasses import dataclass
|
||
from typing import List, Optional, Tuple
|
||
|
||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||
|
||
from minimateplus.waveform_codec import walk_body, find_data_start, WaveformBlock
|
||
|
||
|
||
# ── Reusable pieces ──────────────────────────────────────────────────────────
|
||
|
||
|
||
CHANNELS = ("Tran", "Vert", "Long", "MicL")
|
||
LSB_INV = 200 # 1 in/s / 0.005 in/s/LSB; multiply BW-export floats by this
|
||
# to get 16-count units (the body's native quantization).
|
||
|
||
|
||
@dataclass
|
||
class FixtureEvent:
|
||
name: str # e.g. "M529LL1A.SP0"
|
||
bin_path: str
|
||
txt_path: str
|
||
body: bytes
|
||
truth: dict # {channel: list of int16-quantized samples}
|
||
blocks: List[WaveformBlock]
|
||
segment_starts: List[int] # block indices of each 40 02 segment header
|
||
segment_sample_starts: List[int] # for each segment, the truth sample index it starts at
|
||
|
||
|
||
def s4(n: int) -> int:
|
||
"""4-bit signed nibble decode."""
|
||
return n if n < 8 else n - 16
|
||
|
||
|
||
def i8(b: int) -> int:
|
||
"""int8 reinterpret of unsigned byte."""
|
||
return b if b < 128 else b - 256
|
||
|
||
|
||
def load_fixture(name: str) -> FixtureEvent:
|
||
"""Load a fixture event with its truth values and parsed block stream."""
|
||
# Find the fixture (search both subdirs of tests/fixtures/).
|
||
base = os.path.join(os.path.dirname(__file__), "..", "tests", "fixtures")
|
||
candidates = [
|
||
os.path.join(base, "5-11-26", name),
|
||
os.path.join(base, "decode-re-5-8-26", "event-a", name), # not used directly
|
||
]
|
||
bin_path = next((c for c in candidates if os.path.exists(c)), None)
|
||
if bin_path is None:
|
||
# Try a glob walk for the 5-8 fixtures (they're in subdirs).
|
||
for root, _, files in os.walk(base):
|
||
if name in files:
|
||
bin_path = os.path.join(root, name)
|
||
break
|
||
if bin_path is None:
|
||
raise FileNotFoundError(name)
|
||
|
||
txt_path = bin_path + ".TXT"
|
||
with open(bin_path, "rb") as f:
|
||
raw = f.read()
|
||
body = raw[43:-26]
|
||
truth = _parse_txt(txt_path)
|
||
blocks = walk_body(body, find_data_start(body))
|
||
|
||
seg_idx = [i for i, b in enumerate(blocks) if b.tag_hi == 0x40]
|
||
# Segment 0 starts at sample 0; subsequent segments start at the
|
||
# cumulative sample count from previous segment(s). Tran's segment 0
|
||
# is N samples; if rotation hypothesis is correct, segment 1's data
|
||
# starts at sample 0 for a *different* channel. The analyzer should
|
||
# try both "continues from previous segment" and "starts at sample 0
|
||
# of a different channel."
|
||
seg_sample_starts = _compute_segment_sample_starts(blocks, seg_idx)
|
||
|
||
return FixtureEvent(
|
||
name=name, bin_path=bin_path, txt_path=txt_path,
|
||
body=body, truth=truth, blocks=blocks,
|
||
segment_starts=seg_idx, segment_sample_starts=seg_sample_starts,
|
||
)
|
||
|
||
|
||
def _parse_txt(path: str) -> dict:
|
||
"""Parse BW ASCII TXT export into {channel: [int_samples_in_16_count_units]}."""
|
||
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
||
lines = f.read().splitlines()
|
||
header_idx = next(
|
||
(i for i, l in enumerate(lines)
|
||
if all(c in l for c in CHANNELS)),
|
||
None,
|
||
)
|
||
if header_idx is None:
|
||
return {ch: [] for ch in CHANNELS}
|
||
out = {ch: [] for ch in CHANNELS}
|
||
for line in lines[header_idx + 1:]:
|
||
parts = re.split(r"\s+", line.strip())
|
||
if len(parts) < 4:
|
||
continue
|
||
try:
|
||
vals = [float(p) for p in parts[:4]]
|
||
except ValueError:
|
||
continue
|
||
for ch, v in zip(CHANNELS, vals):
|
||
# Multiply by LSB_INV; geo channels are in in/s, MicL is in dB(L)
|
||
# (which doesn't quantize the same way — leaving raw for MicL is fine,
|
||
# the scorer should treat MicL specially).
|
||
out[ch].append(round(v * LSB_INV) if ch != "MicL" else v)
|
||
return out
|
||
|
||
|
||
def _compute_segment_sample_starts(
|
||
blocks: List[WaveformBlock], seg_idx: List[int]
|
||
) -> List[int]:
|
||
"""Cumulative sample-count up to each segment header (if all blocks treated
|
||
as Tran continuation). Useful as one candidate for segment-1-Tran tests.
|
||
|
||
The scorer should ALSO try "segment 1 starts at sample 0 of a new channel"
|
||
as the rotation hypothesis predicts.
|
||
"""
|
||
starts = []
|
||
cum = 2 # T[0] + T[1] from preamble
|
||
for i, b in enumerate(blocks):
|
||
if i in seg_idx:
|
||
starts.append(cum)
|
||
if b.tag_hi == 0x10:
|
||
cum += b.tag_lo
|
||
elif b.tag_hi == 0x20:
|
||
cum += b.tag_lo
|
||
elif b.tag_hi == 0x00:
|
||
cum += b.tag_lo
|
||
# 30 NN and 40 02 don't contribute samples (for this hypothesis)
|
||
return starts
|
||
|
||
|
||
# ── The core algorithm: decode a segment's blocks as deltas ─────────────────
|
||
|
||
|
||
def decode_segment_as_channel(
|
||
blocks: List[WaveformBlock],
|
||
seg_start_block_idx: int,
|
||
seg_end_block_idx: int,
|
||
anchor: int,
|
||
) -> List[int]:
|
||
"""Apply the segment-0 codec rules to a range of blocks, starting from *anchor*.
|
||
|
||
Returns a list of cumulative sample values (one per delta). Does NOT include
|
||
the anchor itself in the output — the first returned value is anchor + first_delta.
|
||
"""
|
||
out = []
|
||
cur = anchor
|
||
for bi in range(seg_start_block_idx, seg_end_block_idx):
|
||
blk = blocks[bi]
|
||
if blk.tag_hi == 0x10:
|
||
for byte in blk.data:
|
||
for nib in ((byte >> 4) & 0xF, byte & 0xF):
|
||
cur += s4(nib)
|
||
out.append(cur)
|
||
elif blk.tag_hi == 0x20:
|
||
for byte in blk.data:
|
||
cur += i8(byte)
|
||
out.append(cur)
|
||
elif blk.tag_hi == 0x00:
|
||
for _ in range(blk.tag_lo):
|
||
out.append(cur)
|
||
# 30 NN: skip (content unknown)
|
||
# 40 02: shouldn't appear in segment data (it's the segment header)
|
||
return out
|
||
|
||
|
||
def score_against_truth(
|
||
decoded: List[int],
|
||
truth: List[int],
|
||
truth_start: int,
|
||
) -> Tuple[int, int]:
|
||
"""Compare *decoded* to truth[truth_start : truth_start + len(decoded)].
|
||
|
||
Returns (n_matches, n_compared).
|
||
"""
|
||
n = min(len(decoded), len(truth) - truth_start)
|
||
if n <= 0:
|
||
return (0, 0)
|
||
matches = sum(1 for i in range(n) if decoded[i] == truth[truth_start + i])
|
||
return (matches, n)
|
||
|
||
|
||
# ── TODO for the next pass ──────────────────────────────────────────────────
|
||
|
||
|
||
def score_segment_against_all_channels(
|
||
event: FixtureEvent,
|
||
segment_index: int,
|
||
) -> List[Tuple[str, int, int, int]]:
|
||
"""For segment *segment_index* of *event*, find the best (channel, start_sample)
|
||
fit.
|
||
|
||
For each candidate channel C and each candidate starting truth-sample index s,
|
||
we pick the anchor that makes the FIRST decoded value match truth[C][s], then
|
||
score the remaining decoded values against truth[C][s+1 : s+N].
|
||
|
||
Returns rows of (channel_name, start_sample, n_matches, n_compared)
|
||
sorted by match-count descending.
|
||
"""
|
||
# Block range of this segment: from the segment header (inclusive) up to
|
||
# the next segment header (exclusive), or end-of-blocks.
|
||
seg_header_idx = event.segment_starts[segment_index]
|
||
next_header_idx = (
|
||
event.segment_starts[segment_index + 1]
|
||
if segment_index + 1 < len(event.segment_starts)
|
||
else len(event.blocks)
|
||
)
|
||
|
||
# Decode the segment's data blocks (skip the segment-header block itself).
|
||
# Use anchor=0 — we'll re-anchor when scoring against each channel.
|
||
deltas_trajectory = decode_segment_as_channel(
|
||
event.blocks, seg_header_idx + 1, next_header_idx, anchor=0
|
||
)
|
||
if not deltas_trajectory:
|
||
return []
|
||
|
||
n = len(deltas_trajectory)
|
||
results = []
|
||
|
||
for ch in ("Tran", "Vert", "Long"):
|
||
truth = event.truth.get(ch)
|
||
if not truth or len(truth) < n + 1:
|
||
continue
|
||
# For each candidate starting sample s in truth, check if applying
|
||
# the deltas starting from truth[s] reproduces truth[s+1:s+n+1].
|
||
best = (0, -1)
|
||
for s in range(len(truth) - n):
|
||
anchor = truth[s]
|
||
offset = anchor - deltas_trajectory[0] + truth[s + 1] - anchor
|
||
# Recompute: trajectory[i] = anchor + cumulative_delta_through_i
|
||
# but we already have deltas_trajectory computed from anchor=0,
|
||
# so trajectory_relative[i] = anchor + deltas_trajectory[i].
|
||
matches = 0
|
||
for i in range(n):
|
||
if truth[s + i + 1] == anchor + deltas_trajectory[i]:
|
||
matches += 1
|
||
# Note: we could break early on first mismatch for "matches start",
|
||
# but counting total matches gives a more robust score.
|
||
if matches > best[0]:
|
||
best = (matches, s)
|
||
results.append((ch, best[1], best[0], n))
|
||
|
||
results.sort(key=lambda r: -r[2])
|
||
return results
|
||
|
||
|
||
# ── Driver ──────────────────────────────────────────────────────────────────
|
||
|
||
|
||
def main():
|
||
"""Run the analyzer on all loud-bundle events and print best scores."""
|
||
events = ["M529LL1A.SP0", "M529LL1A.SS0", "M529LL1A.SV0",
|
||
"M529LL1L.JQ0", "M529LL1L.V70"]
|
||
for name in events:
|
||
try:
|
||
event = load_fixture(name)
|
||
except FileNotFoundError:
|
||
print(f"{name}: fixture not found")
|
||
continue
|
||
|
||
print(f"\n=== {name} ===")
|
||
print(f" body bytes: {len(event.body)}")
|
||
print(f" blocks: {len(event.blocks)}")
|
||
print(f" segments: {len(event.segment_starts)}")
|
||
print(f" segment sample-starts (if all blocks are 1 channel):")
|
||
for si, sample_start in enumerate(event.segment_sample_starts):
|
||
print(f" seg {si}: sample {sample_start}")
|
||
|
||
for si in range(len(event.segment_starts)):
|
||
results = score_segment_against_all_channels(event, si)
|
||
if not results:
|
||
print(f" seg {si}: (no scorable data)")
|
||
continue
|
||
tag = "✓" if results[0][2] / max(results[0][3], 1) > 0.9 else " "
|
||
top = results[0]
|
||
print(f" seg {si}: best fit {tag} = {top[0]:<5} "
|
||
f"starting at sample {top[1]:>5}, {top[2]:>4}/{top[3]:<4} match"
|
||
+ (f" (next: {results[1][0]} @{results[1][1]} {results[1][2]}/{results[1][3]})"
|
||
if len(results) > 1 else ""))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|