series 4 codec work, inital decode success

2026-05-29 06:33:06 +00:00
parent 1bccc44b88
commit 9b71ead44b
20 changed files with 1578 additions and 76 deletions
@@ -0,0 +1,65 @@
+"""Run read_idf_file across the corpus and report per-channel accuracy vs sidecars."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from micromate.idf_file import read_idf_file
+from analysis_idf.recon import load_sidecar_samples
+
+
+def sidecar_path(idfw: Path) -> Path:
+    return idfw.parent / "TXT" / f"{idfw.name}.txt"
+
+
+def main():
+    root = REPO / "tests/fixtures/THORDATA_example"
+    files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]
+    files.sort()
+    GEO_LSB = 0.0003
+
+    n_ok = n_skip = 0
+    overall = {"Tran": [], "Vert": [], "Long": []}
+
+    for f in files:
+        try:
+            res = read_idf_file(f)
+        except Exception:
+            n_skip += 1
+            continue
+        sc_path = sidecar_path(f)
+        if not sc_path.exists():
+            n_skip += 1
+            continue
+        try:
+            sc = load_sidecar_samples(sc_path)
+        except Exception:
+            n_skip += 1
+            continue
+
+        per_file = {}
+        for ch in ("Tran", "Vert", "Long"):
+            sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
+            dec = res.samples.get(ch, [])
+            n = min(len(sc_counts), len(dec))
+            if n == 0:
+                per_file[ch] = 0.0
+                continue
+            exact = sum(1 for i in range(n) if sc_counts[i] == dec[i])
+            pct = 100.0 * exact / n
+            per_file[ch] = pct
+            overall[ch].append(pct)
+        n_ok += 1
+
+    print(f"Processed {n_ok} files (skipped {n_skip})")
+    print("Per-channel exact-match % (mean / min / max):")
+    for ch, vals in overall.items():
+        if vals:
+            avg = sum(vals) / len(vals)
+            print(f"  {ch}: mean={avg:.2f}%  min={min(vals):.2f}%  max={max(vals):.2f}%  n={len(vals)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,49 @@
+"""Find where decoded-vs-sidecar diverges for each channel."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from minimateplus.waveform_codec import decode_waveform_v2
+from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
+
+
+def main():
+    buf = TARGET.read_bytes()
+    sc = load_sidecar_samples(TXT)
+    decoded = decode_waveform_v2(buf[0x0f1f:])
+    GEO_LSB = 0.0003
+
+    for ch in ("Tran", "Vert", "Long"):
+        sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
+        dec = decoded[ch]
+        # Find ALL transitions where mismatches start/stop
+        first_diff = next((i for i in range(len(dec)) if dec[i] != sc_counts[i]), None)
+        if first_diff is None:
+            print(f"{ch}: NO MISMATCHES")
+            continue
+        print(f"{ch}: first diff at idx {first_diff}")
+        # Show 5 before, 5 after
+        for i in range(max(0, first_diff - 3), min(len(dec), first_diff + 8)):
+            mark = "  " if dec[i] == sc_counts[i] else "**"
+            print(f"  {mark} idx {i:4d}: sc={sc_counts[i]:6d}  dec={dec[i]:6d}  diff={dec[i]-sc_counts[i]:+d}")
+        # Where does cumulative diff exceed 100?
+        cum_match_run = 0
+        max_match_run = 0
+        match_run_start = 0
+        diff_count = 0
+        for i in range(len(dec)):
+            if dec[i] == sc_counts[i]:
+                cum_match_run += 1
+                max_match_run = max(max_match_run, cum_match_run)
+            else:
+                cum_match_run = 0
+                diff_count += 1
+        print(f"  total mismatches: {diff_count}/{len(dec)}, longest run of matches: {max_match_run}")
+        print()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,48 @@
+"""End-to-end IDFH ingest verification."""
+from __future__ import annotations
+import sys
+import tempfile
+import json
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from sfm.waveform_store import WaveformStore
+
+
+def main():
+    idfh = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
+    txt  = idfh.parent / "TXT" / f"{idfh.name}.txt"
+
+    with tempfile.TemporaryDirectory() as td:
+        store = WaveformStore(Path(td))
+        ev, rec = store.save_imported_idf(
+            idfh.read_bytes(),
+            idfh,
+            idf_report_text=txt.read_text(errors="replace"),
+        )
+        print("=== save_imported_idf (IDFH) ===")
+        print(f"  serial:        {rec['serial']}")
+        print(f"  filename:      {rec['filename']}")
+        print(f"  filesize:      {rec['filesize']}")
+        print(f"  h5:            {rec['hdf5_filename']}")  # expect None for histogram
+        print(f"  sidecar:       {rec['sidecar_filename']}")
+        print()
+        print("=== Event ===")
+        print(f"  timestamp:     {ev.timestamp}")
+        print(f"  record_type:   {ev.record_type}")
+        print(f"  sample_rate:   {ev.sample_rate}")
+        print()
+        # Inspect sidecar to confirm intervals were stashed
+        sc_path = Path(td) / "UM13981" / f"{idfh.name}.sfm.json"
+        sc = json.loads(sc_path.read_text())
+        intervals = sc.get("extensions", {}).get("idf_intervals", [])
+        print(f"  sidecar intervals: {len(intervals)}")
+        if intervals:
+            print(f"  first interval:    {intervals[0]}")
+            print(f"  last interval:     {intervals[-1]}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,40 @@
+"""Verify the had_report=False path: ingest IDFW with no .txt."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import tempfile
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from sfm.waveform_store import WaveformStore
+
+
+def main():
+    idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
+    with tempfile.TemporaryDirectory() as td:
+        store = WaveformStore(Path(td))
+        ev, rec = store.save_imported_idf(
+            idfw.read_bytes(),
+            idfw,
+            serial_hint=None,
+            idf_report_text=None,        # ← no .txt!
+        )
+        print("=== IDFW without .txt ingest ===")
+        print(f"  serial:        {rec['serial']}")
+        print(f"  timestamp:     {ev.timestamp}")
+        print(f"  sample_rate:   {ev.sample_rate}")
+        print(f"  record_type:   {ev.record_type}")
+        print(f"  rectime_sec:   {ev.rectime_seconds}")
+        nT = len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0
+        nV = len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0
+        nL = len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0
+        nM = len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0
+        print(f"  raw_samples:   Tran={nT} Vert={nV} Long={nL} MicL={nM}")
+        if ev.peak_values:
+            print(f"  peak_values:   tran={ev.peak_values.tran} vert={ev.peak_values.vert} long={ev.peak_values.long}")
+        print(f"  h5 written:    {rec['hdf5_filename']}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,52 @@
+"""End-to-end ingest test: feed an IDFW + .txt to save_imported_idf in a tmp store."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import tempfile
+import shutil
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from sfm.waveform_store import WaveformStore
+
+
+def main():
+    idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
+    txt  = idfw.parent / "TXT" / f"{idfw.name}.txt"
+
+    with tempfile.TemporaryDirectory() as td:
+        store = WaveformStore(Path(td))
+        ev, rec = store.save_imported_idf(
+            idfw.read_bytes(),
+            idfw,
+            serial_hint=None,
+            idf_report_text=txt.read_text(errors="replace"),
+        )
+        print("=== Save result ===")
+        print(f"  serial:    {rec['serial']}")
+        print(f"  filename:  {rec['filename']}")
+        print(f"  filesize:  {rec['filesize']}")
+        print(f"  h5:        {rec['hdf5_filename']}")
+        print(f"  sidecar:   {rec['sidecar_filename']}")
+        print()
+        print("=== Event ===")
+        print(f"  serial:        {ev.serial if hasattr(ev,'serial') else '(n/a)'}")
+        print(f"  timestamp:     {ev.timestamp}")
+        print(f"  sample_rate:   {ev.sample_rate}")
+        print(f"  record_type:   {ev.record_type}")
+        print(f"  rectime_sec:   {ev.rectime_seconds}")
+        print(f"  raw_samples:   Tran={len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0}, Vert={len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0}, Long={len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0}, MicL={len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0}")
+        if ev.peak_values:
+            print(f"  peaks (txt):   Tran={ev.peak_values.tran} Vert={ev.peak_values.vert} Long={ev.peak_values.long}")
+        print()
+
+        # Verify the h5 file actually got written
+        h5path = Path(td) / "UM11719" / f"{idfw.name}.h5"
+        print(f"  h5 exists:     {h5path.exists()}  size={h5path.stat().st_size if h5path.exists() else 0}")
+        sidecar = Path(td) / "UM11719" / f"{idfw.name}.sfm.json"
+        print(f"  sidecar exists:{sidecar.exists()}  size={sidecar.stat().st_size if sidecar.exists() else 0}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,137 @@
+"""Decode IDFH histogram intervals + verify against sidecar."""
+from __future__ import annotations
+import sys
+import struct
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+
+SEGMENT_MAGIC = b"\x02\xda\x0a\x00\x00\x00"
+SEGMENT_SIZE = 732   # = 10-byte header + 10 × 72-byte intervals + 2-byte tail
+INTERVAL_SIZE = 72
+CHANNELS = ("Tran", "Vert", "Long", "MicL")
+
+
+def decode_interval(buf72: bytes) -> dict:
+    """Decode one 72-byte interval into per-channel min/max/halfp."""
+    out = {}
+    for i, ch in enumerate(CHANNELS):
+        block = buf72[i*16 : (i+1)*16]
+        mn = struct.unpack_from(">h", block, 0)[0]
+        mx = struct.unpack_from(">h", block, 2)[0]
+        sb = struct.unpack_from(">h", block, 4)[0]
+        halfp = struct.unpack_from(">H", block, 6)[0]
+        f10 = struct.unpack_from(">H", block, 10)[0]
+        f14 = struct.unpack_from(">H", block, 14)[0]
+        peak_count = max(abs(mn), abs(mx))
+        out[ch] = {
+            "min":     mn,
+            "max":     mx,
+            "field4":  sb,
+            "halfp":   halfp,
+            "field10": f10,
+            "field14": f14,
+            "peak":    peak_count,
+            "freq_hz": (512.0 / halfp) if halfp > 5 else None,
+        }
+    out["_tail"] = buf72[64:].hex(" ")
+    return out
+
+
+def walk_idfh(buf: bytes) -> list:
+    """Walk all interval records in an IDFH file."""
+    intervals = []
+    # Multi-segment file: every 02 da 0a 00 00 00 marker introduces a segment.
+    # Single-interval file: just one body header at 0xf96 of form ?? ?? 0a 00 00 00.
+    # Find them all.
+    i = 0
+    while True:
+        j = buf.find(b"\x0a\x00\x00\x00", i)
+        if j < 0:
+            break
+        # Validate: the 2 bytes before must form a length, and we want bytes
+        # [j-2 : j+6] to have a recognisable shape.  Actually the cleanest
+        # filter is "preceded by a length and followed by 00 NN 05 3f".
+        if j < 2:
+            i = j + 1
+            continue
+        # Body header form: [length_be_2][0a 00 00 00][00 NN][05 3f]
+        if j + 10 > len(buf):
+            break
+        length = int.from_bytes(buf[j-2:j], "big")
+        # Verify the segment-marker shape: [length_be][0a 00 00 00][00 NN][05 3f]
+        if buf[j+4] != 0x00:
+            i = j + 1
+            continue
+        if buf[j+6:j+8] != b"\x05\x3f":
+            i = j + 1
+            continue
+        # Header layout (10 bytes): [length_be 2B][0a 00 00 00 4B][00 NN 2B][05 3f 2B]
+        # Followed by N interval records of 72 bytes each, then 2 tail bytes.
+        # length value = (N × 72) + 10  (counts bytes from 0x0a... through interval data).
+        header_start = j - 2
+        n_intervals = (length - 10) // INTERVAL_SIZE
+        interval_start = header_start + 10
+        for k in range(n_intervals):
+            off = interval_start + k * INTERVAL_SIZE
+            if off + INTERVAL_SIZE > len(buf):
+                break
+            chunk = buf[off:off + INTERVAL_SIZE]
+            intervals.append({"offset": off, **decode_interval(chunk)})
+        i = header_start + length + 2
+    return intervals
+
+
+def main():
+    # Test against multi-segment IDFH
+    target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
+    sc_path = target.parent / "TXT" / f"{target.name}.txt"
+    buf = target.read_bytes()
+    intervals = walk_idfh(buf)
+    print(f"=== {target.name} ===")
+    print(f"  file size: {len(buf)}")
+    print(f"  decoded intervals: {len(intervals)}")
+    # Show first 2 + last 2
+    sc_rows = []
+    for line in sc_path.read_text(errors="replace").splitlines():
+        if line.startswith("2022-") or line.startswith("2023-"):
+            sc_rows.append(line)
+    print(f"  sidecar rows: {len(sc_rows)}")
+
+    print()
+    for k in [0, 1, 78, 79, 80]:
+        if k >= len(intervals):
+            continue
+        iv = intervals[k]
+        print(f"--- interval {k} @0x{iv['offset']:04x} ---")
+        for ch in CHANNELS:
+            d = iv[ch]
+            peak_ips = d["peak"] / 32768 * 10.0
+            print(f"  {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s)  halfp={d['halfp']:5d}  freq={d['freq_hz']}")
+        # sidecar row
+        if k < len(sc_rows):
+            print(f"  SC: {sc_rows[k]}")
+
+    # Test single-interval IDFH
+    print()
+    target2 = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH"
+    sc2 = target2.parent / "TXT" / f"{target2.name}.txt"
+    buf2 = target2.read_bytes()
+    intervals2 = walk_idfh(buf2)
+    print(f"=== {target2.name} ===")
+    print(f"  file size: {len(buf2)}, decoded intervals: {len(intervals2)}")
+    if intervals2:
+        iv = intervals2[0]
+        for ch in CHANNELS:
+            d = iv[ch]
+            peak_ips = d["peak"] / 32768 * 10.0
+            print(f"  {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s)  halfp={d['halfp']:5d}  freq={d['freq_hz']}")
+        sc_rows2 = [l for l in sc2.read_text(errors='replace').splitlines() if l.startswith("2023-")]
+        if sc_rows2:
+            print(f"  SC: {sc_rows2[0]}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,41 @@
+"""Find IDFH interval period via auto-correlation of structural patterns."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+from collections import Counter
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+
+def main():
+    target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
+    buf = target.read_bytes()
+    body_start = 0xF96
+    body_end   = 0x270C
+    body = buf[body_start:body_end]
+    print(f"body size: {len(body)} bytes (file {len(buf)} bytes)")
+
+    # For each candidate interval size, count how many bytes at fixed offsets within
+    # each interval are zero (consistent column-zero pattern indicates correct size).
+    print()
+    print("=== zero-column score by interval size (higher = more likely) ===")
+    best = []
+    for sz in range(16, 100):
+        n = len(body) // sz
+        if n < 30:
+            continue
+        # For each column position within an interval, count how many of n intervals have zero
+        score = 0
+        for col in range(sz):
+            zeros = sum(1 for i in range(n) if body[i*sz + col] == 0)
+            if zeros >= n * 0.9:
+                score += 1
+        best.append((score, sz, n))
+    best.sort(reverse=True)
+    for score, sz, n in best[:10]:
+        print(f"  size={sz:3d}  n_intervals={n}  consistently-zero-cols={score}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,40 @@
+"""Per-file accuracy + sample-count details."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from micromate.idf_file import read_idf_file
+from analysis_idf.recon import load_sidecar_samples
+
+
+def main():
+    root = REPO / "tests/fixtures/THORDATA_example"
+    files = sorted([f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")])
+    GEO_LSB = 0.0003
+    # Limit to first 15 successful files for detail.
+    shown = 0
+    for f in files:
+        try:
+            res = read_idf_file(f)
+        except Exception:
+            continue
+        sc_path = f.parent / "TXT" / f"{f.name}.txt"
+        if not sc_path.exists():
+            continue
+        sc = load_sidecar_samples(sc_path)
+        sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]]
+        dec = res.samples.get("Tran", [])
+        n = min(len(sc_tran), len(dec))
+        exact = sum(1 for i in range(n) if sc_tran[i] == dec[i]) if n else 0
+        pct = 100.0 * exact / n if n else 0.0
+        print(f"{f.name:40s}  size={f.stat().st_size:6d}  sc_n={len(sc_tran):4d}  dec_n={len(dec):4d}  exact={pct:.1f}%")
+        shown += 1
+        if shown >= 20:
+            break
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,64 @@
+"""Look at what's at the divergence boundary."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from minimateplus.waveform_codec import walk_body, find_data_start, parse_segment_header
+from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
+
+
+def main():
+    buf = TARGET.read_bytes()
+    body = buf[0x0f1f:]
+    start = find_data_start(body)
+    print(f"data_start: {start}  (= file offset 0x{0x0f1f + start:04x})")
+
+    blocks = walk_body(body, start)
+    print(f"{len(blocks)} blocks total")
+    print()
+
+    # First 25 blocks
+    print("=== first 30 blocks ===")
+    for i, b in enumerate(blocks[:30]):
+        body_off = 0x0f1f + b.offset
+        if b.tag_hi == 0x40:
+            hdr = parse_segment_header(b)
+            print(f"  [{i:3d}] @0x{body_off:04x}  {b.kind}  (segment header)  counter={hdr['counter'] if hdr else '?'}  field2={hdr['field2'].hex() if hdr else '?'}  anchor={hdr['anchor_bytes'].hex() if hdr else '?'}  tail={hdr['tail'].hex() if hdr else '?'}")
+        else:
+            print(f"  [{i:3d}] @0x{body_off:04x}  {b.kind}  len={b.length}  data={b.data[:16].hex()}")
+    print()
+
+    # Cumulative sample counts per block to find which block contains sample 254
+    print("=== cumulative samples through blocks ===")
+    cur_ch = "Tran"
+    rotation = ["Vert", "Long", "MicL", "Tran"]
+    seg_count = 0
+    samples_in_curseg = 2  # preamble Tran[0], Tran[1]
+    for i, b in enumerate(blocks[:30]):
+        if b.tag_hi == 0x40:
+            seg_count += 1
+            prev_ch = cur_ch
+            cur_ch = rotation[(seg_count - 1) % 4]
+            print(f"  [{i:3d}] 40 02 -> end of {prev_ch} segment, start {cur_ch} (segment {seg_count})")
+            samples_in_curseg = 2  # anchors
+        elif (b.tag_hi & 0xF0) == 0x10:
+            nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo
+            samples_in_curseg += nn
+            print(f"  [{i:3d}] {b.kind} nibble: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
+        elif (b.tag_hi & 0xF0) == 0x20:
+            nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo
+            samples_in_curseg += nn
+            print(f"  [{i:3d}] {b.kind} int8: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
+        elif b.tag_hi == 0x00:
+            samples_in_curseg += b.tag_lo
+            print(f"  [{i:3d}] {b.kind} RLE: +{b.tag_lo}, ch={cur_ch}, ch_total~{samples_in_curseg}")
+        elif b.tag_hi == 0x30:
+            samples_in_curseg += b.tag_lo
+            print(f"  [{i:3d}] {b.kind} packed12: +{b.tag_lo} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,89 @@
+"""Reconnaissance helpers for cracking the Thor IDFW binary."""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
+TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt"
+
+
+def hex_at(buf: bytes, off: int, n: int = 32) -> str:
+    chunk = buf[off : off + n]
+    hexs = " ".join(f"{b:02x}" for b in chunk)
+    asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
+    return f"{off:04x}: {hexs}  {asc}"
+
+
+def find_all(buf: bytes, needle: bytes) -> list[int]:
+    out: list[int] = []
+    i = 0
+    while True:
+        j = buf.find(needle, i)
+        if j < 0:
+            break
+        out.append(j)
+        i = j + 1
+    return out
+
+
+def load_sidecar_samples(path: Path) -> dict[str, list[float]]:
+    """Parse the txt sample table — Tran/Vert/Long/MicL."""
+    out = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
+    in_block = False
+    for line in path.read_text(errors="replace").splitlines():
+        if not in_block:
+            if line.strip() == "Waveform Data Channels":
+                in_block = True
+            continue
+        if line.startswith("Waveform Data USB Channels"):
+            break
+        parts = line.split("\t")
+        # First row is the header "\tTran\tVert\tLong\tMicL"
+        if len(parts) >= 5 and parts[1] == "Tran":
+            continue
+        if len(parts) < 5:
+            continue
+        try:
+            out["Tran"].append(float(parts[1]))
+            out["Vert"].append(float(parts[2]))
+            out["Long"].append(float(parts[3]))
+            out["MicL"].append(float(parts[4]))
+        except ValueError:
+            continue
+    return out
+
+
+def main():
+    buf = TARGET.read_bytes()
+    samples = load_sidecar_samples(TXT)
+    print(f"file size: {len(buf)} bytes")
+    print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}")
+    print(f"first 6 Tran samples: {samples['Tran'][:6]}")
+    print(f"first 6 Vert samples: {samples['Vert'][:6]}")
+    print(f"first 6 Long samples: {samples['Long'][:6]}")
+    print(f"first 6 MicL samples: {samples['MicL'][:6]}")
+
+    print()
+    print("=== BW magic '00 02 00' positions ===")
+    hits = find_all(buf, b"\x00\x02\x00")
+    print(f"{len(hits)} hits")
+    for h in hits[:20]:
+        print(hex_at(buf, h, 24))
+
+    print()
+    print("=== '40 02' segment-header positions ===")
+    hits = find_all(buf, b"\x40\x02")
+    print(f"{len(hits)} hits")
+    for h in hits:
+        ctx_pre = buf[max(0, h - 4): h].hex()
+        ctx_post = buf[h: h + 20].hex()
+        # Show byte preceding to help identify real headers vs casual occurrences
+        print(f"  0x{h:04x}  pre={ctx_pre}  post={ctx_post}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,40 @@
+"""Find each segment boundary in the channel and check if errors reset there."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from minimateplus.waveform_codec import decode_waveform_v2
+from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
+
+
+def main():
+    buf = TARGET.read_bytes()
+    sc = load_sidecar_samples(TXT)
+    decoded = decode_waveform_v2(buf[0x0f1f:])
+    GEO_LSB = 0.0003
+
+    for ch in ("Tran", "Vert", "Long"):
+        sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
+        dec = decoded[ch]
+        # Find every transition where error becomes zero from nonzero (or grows from zero)
+        # Print indices where dec resyncs back to exact match.
+        n = min(len(sc_counts), len(dec))
+        events = []
+        prev_match = True
+        for i in range(n):
+            match = sc_counts[i] == dec[i]
+            if match != prev_match:
+                kind = "RESYNC" if match else "DIVERGE"
+                events.append((i, kind, sc_counts[i], dec[i]))
+                prev_match = match
+        print(f"{ch}: {len(events)} transitions")
+        for i, kind, sc_v, dec_v in events[:20]:
+            print(f"  idx {i:4d}  {kind:8s}  sc={sc_v:6d}  dec={dec_v:6d}  diff={dec_v-sc_v:+d}")
+        print()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,46 @@
+"""Smoke-test read_idf_file on IDFH across the corpus."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from micromate.idf_file import read_idf_file
+
+
+def main():
+    target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH"
+    result = read_idf_file(target)
+    ev = result.event
+    print(f"=== {target.name} ===")
+    print(f"  signature:   {result.signature}")
+    print(f"  serial:      {ev.serial}")
+    print(f"  timestamp:   {ev.timestamp}")
+    print(f"  sample_rate: {ev.sample_rate}")
+    print(f"  kind:        {ev.kind}")
+    print(f"  intervals:   {len(result.intervals or [])}")
+    print(f"  peaks:       T={ev.peaks.transverse_ips:.4f} V={ev.peaks.vertical_ips:.4f} L={ev.peaks.longitudinal_ips:.4f}")
+    print()
+
+    root = REPO / "tests/fixtures/THORDATA_example"
+    files = list(root.rglob("*.IDFH"))
+    ok = fail = nyi = 0
+    total_intervals = 0
+    for f in files:
+        try:
+            r = read_idf_file(f)
+            ok += 1
+            total_intervals += len(r.intervals or [])
+        except NotImplementedError:
+            nyi += 1
+        except Exception as exc:
+            fail += 1
+            if fail <= 3:
+                print(f"  FAIL: {f.name}: {type(exc).__name__}: {exc}")
+    print(f"Corpus: {len(files)} IDFH files | ok={ok} fail={fail} nyi={nyi}")
+    print(f"Total intervals decoded: {total_intervals}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,48 @@
+"""Smoke-test read_idf_file across the sample corpus."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from micromate.idf_file import read_idf_file, geo_count_to_ips, mic_count_to_psi
+
+
+def main():
+    target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
+    result = read_idf_file(target)
+    ev = result.event
+    print(f"=== {target.name} ===")
+    print(f"  signature: {result.signature}")
+    print(f"  serial:    {ev.serial}")
+    print(f"  timestamp: {ev.timestamp}")
+    print(f"  sample_rate: {ev.sample_rate}")
+    print(f"  record_time: {ev.record_time_sec}")
+    print(f"  calibration: {result.binary_metadata.calibration_date}")
+    print(f"  Tran samples: {len(result.samples['Tran'])}, peak_ips={ev.peaks.transverse_ips:.4f}")
+    print(f"  Vert samples: {len(result.samples['Vert'])}, peak_ips={ev.peaks.vertical_ips:.4f}")
+    print(f"  Long samples: {len(result.samples['Long'])}, peak_ips={ev.peaks.longitudinal_ips:.4f}")
+    print(f"  MicL samples: {len(result.samples['MicL'])}")
+    print()
+
+    # Corpus sweep
+    root = REPO / "tests/fixtures/THORDATA_example"
+    files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]
+    ok = fail = nyi = 0
+    for f in files:
+        try:
+            r = read_idf_file(f)
+            ok += 1
+        except NotImplementedError:
+            nyi += 1
+        except Exception as exc:
+            fail += 1
+            if fail <= 5:
+                print(f"  FAIL: {f.name}: {type(exc).__name__}: {exc}")
+    print()
+    print(f"Corpus: {len(files)} IDFW files | ok={ok} fail={fail} not-implemented={nyi}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,73 @@
+"""Trace Tran sample-by-sample to find exactly where the codec drifts."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
+
+
+def s4(n: int) -> int:
+    return n if n < 8 else n - 16
+
+
+def i8(b: int) -> int:
+    return b if b < 128 else b - 256
+
+
+def main():
+    buf = TARGET.read_bytes()
+    sc = load_sidecar_samples(TXT)
+    GEO_LSB = 0.0003
+    sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]]
+
+    body = buf[0x0f1f:]
+    # Tran[0], Tran[1] from preamble
+    t0 = int.from_bytes(body[3:5], "big", signed=True)
+    t1 = int.from_bytes(body[5:7], "big", signed=True)
+    print(f"preamble Tran[0]={t0}  Tran[1]={t1}  (sidecar: {sc_tran[0]}, {sc_tran[1]})")
+
+    # Block 0: 10 f8 at body[7:9]
+    print(f"block 0: tag {body[7]:02x} {body[8]:02x}")
+    print(f"  block 0 first 10 data bytes: {body[9:19].hex()}")
+
+    # Walk block 0 manually, comparing each sample
+    cur = t1
+    samples = [t0, t1]
+    block_off = 7
+    nn = body[8]
+    print(f"  NN = {nn}")
+    data = body[9 : 9 + nn // 2]
+    for byi, byte in enumerate(data):
+        for nib_idx, nib in enumerate(((byte >> 4) & 0xF, byte & 0xF)):
+            cur += s4(nib)
+            samples.append(cur)
+            idx = len(samples) - 1
+            if 0 <= idx < len(sc_tran):
+                sc_v = sc_tran[idx]
+                match = "✓" if sc_v == cur else "✗"
+                if idx < 12 or 240 <= idx <= 260:
+                    print(f"    idx {idx:3d}: nibble byte={byte:02x} nib={nib:x} delta={s4(nib):+d}  cur={cur:+d}  sc={sc_v:+d}  {match}")
+
+    print(f"end of block 0: cur={cur}, len(samples)={len(samples)}, decoder expected 250 here")
+    # Block 1: 20 28 starts at offset 9 + 124 = 133 from block_off=7
+    block1_off = 9 + nn // 2
+    print(f"block 1: tag {body[block1_off]:02x} {body[block1_off+1]:02x} (expecting 20 28)")
+    nn1 = body[block1_off + 1]
+    print(f"  block 1 NN = {nn1}")
+    data1 = body[block1_off + 2 : block1_off + 2 + nn1]
+    for byi, byte in enumerate(data1):
+        cur += i8(byte)
+        samples.append(cur)
+        idx = len(samples) - 1
+        if idx < len(sc_tran):
+            sc_v = sc_tran[idx]
+            match = "✓" if sc_v == cur else "✗"
+            if 248 <= idx <= 295:
+                print(f"    idx {idx:3d}: int8 byte={byte:02x} delta={i8(byte):+d}  cur={cur:+d}  sc={sc_v:+d}  {match}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,42 @@
+"""Feed candidate body offsets to the BW codec and compare with sidecar."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from minimateplus.waveform_codec import decode_waveform_v2, walk_body, find_data_start
+from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
+
+
+def main():
+    buf = TARGET.read_bytes()
+    sc = load_sidecar_samples(TXT)
+    # Sidecar samples in 0.0003 counts (Thor geo LSB).
+    sc_tran = [int(round(v / 0.0003)) for v in sc["Tran"][:30]]
+    sc_vert = [int(round(v / 0.0003)) for v in sc["Vert"][:30]]
+    sc_long = [int(round(v / 0.0003)) for v in sc["Long"][:30]]
+    sc_micl = [int(round(v / 1e-6)) for v in sc["MicL"][:30]]  # 1 µ unit for mic? Will iterate.
+    print(f"sidecar Tran (counts): {sc_tran}")
+    print(f"sidecar Vert (counts): {sc_vert}")
+    print(f"sidecar Long (counts): {sc_long}")
+    print(f"sidecar MicL (×1e-6):  {sc_micl}")
+    print()
+
+    # Try candidate body start offsets.
+    for off in (0x0f1f, 0x1057, 0x11f1, 0x1333, 0x1bde, 0x0d30):
+        print(f"=== body @ 0x{off:04x} ===")
+        body = buf[off:]
+        decoded = decode_waveform_v2(body)
+        if not decoded:
+            print("  decode_waveform_v2 returned None")
+            continue
+        for ch in ("Tran", "Vert", "Long", "MicL"):
+            arr = decoded.get(ch, [])
+            print(f"  {ch}[{len(arr)}]: {arr[:20]}")
+        print()
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,51 @@
+"""Verify decode_waveform_v2 against sidecar across all 2304 samples per channel."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(REPO))
+
+from minimateplus.waveform_codec import decode_waveform_v2
+from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
+
+
+def main():
+    buf = TARGET.read_bytes()
+    sc = load_sidecar_samples(TXT)
+    body = buf[0x0f1f:]
+    decoded = decode_waveform_v2(body)
+
+    print(f"Sidecar lengths: Tran={len(sc['Tran'])} Vert={len(sc['Vert'])} Long={len(sc['Long'])} MicL={len(sc['MicL'])}")
+    print(f"Decoded lengths: Tran={len(decoded['Tran'])} Vert={len(decoded['Vert'])} Long={len(decoded['Long'])} MicL={len(decoded['MicL'])}")
+    print()
+
+    GEO_LSB = 0.0003  # in/s per count
+    for ch in ("Tran", "Vert", "Long"):
+        sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
+        dec = decoded[ch]
+        n = min(len(sc_counts), len(dec))
+        matches = sum(1 for i in range(n) if sc_counts[i] == dec[i])
+        first_mismatch = next((i for i in range(n) if sc_counts[i] != dec[i]), None)
+        print(f"{ch}: compared {n}, exact matches {matches} ({100*matches/n:.2f}%)")
+        if first_mismatch is not None:
+            i = first_mismatch
+            print(f"  first mismatch at idx {i}: sidecar={sc_counts[i]} ({sc[ch][i]}), decoded={dec[i]}")
+            print(f"  context sidecar[{i-2}..{i+5}]: {sc_counts[max(0,i-2):i+5]}")
+            print(f"  context decoded[{i-2}..{i+5}]: {dec[max(0,i-2):i+5]}")
+
+    # MicL: find the multiplicative factor that fits
+    print()
+    print("=== MicL scale analysis ===")
+    sc_micl = sc["MicL"]
+    dec_micl = decoded["MicL"]
+    # Skip zero values when computing ratio
+    ratios = [sc_micl[i] / dec_micl[i] for i in range(min(50, len(sc_micl), len(dec_micl))) if dec_micl[i] != 0]
+    if ratios:
+        avg = sum(ratios) / len(ratios)
+        print(f"  avg ratio sidecar/decoded over first 50 nonzero: {avg:.4e} (n={len(ratios)})")
+        print(f"  ratios sample: {[f'{r:.4e}' for r in ratios[:6]]}")
+
+
+if __name__ == "__main__":
+    main()