series 4 codec work, inital decode success

This commit is contained in:
2026-05-29 06:33:06 +00:00
parent 1bccc44b88
commit 9b71ead44b
20 changed files with 1578 additions and 76 deletions
+65
View File
@@ -0,0 +1,65 @@
"""Run read_idf_file across the corpus and report per-channel accuracy vs sidecars."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from micromate.idf_file import read_idf_file
from analysis_idf.recon import load_sidecar_samples
def sidecar_path(idfw: Path) -> Path:
return idfw.parent / "TXT" / f"{idfw.name}.txt"
def main():
root = REPO / "tests/fixtures/THORDATA_example"
files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]
files.sort()
GEO_LSB = 0.0003
n_ok = n_skip = 0
overall = {"Tran": [], "Vert": [], "Long": []}
for f in files:
try:
res = read_idf_file(f)
except Exception:
n_skip += 1
continue
sc_path = sidecar_path(f)
if not sc_path.exists():
n_skip += 1
continue
try:
sc = load_sidecar_samples(sc_path)
except Exception:
n_skip += 1
continue
per_file = {}
for ch in ("Tran", "Vert", "Long"):
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
dec = res.samples.get(ch, [])
n = min(len(sc_counts), len(dec))
if n == 0:
per_file[ch] = 0.0
continue
exact = sum(1 for i in range(n) if sc_counts[i] == dec[i])
pct = 100.0 * exact / n
per_file[ch] = pct
overall[ch].append(pct)
n_ok += 1
print(f"Processed {n_ok} files (skipped {n_skip})")
print("Per-channel exact-match % (mean / min / max):")
for ch, vals in overall.items():
if vals:
avg = sum(vals) / len(vals)
print(f" {ch}: mean={avg:.2f}% min={min(vals):.2f}% max={max(vals):.2f}% n={len(vals)}")
if __name__ == "__main__":
main()
+49
View File
@@ -0,0 +1,49 @@
"""Find where decoded-vs-sidecar diverges for each channel."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from minimateplus.waveform_codec import decode_waveform_v2
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
def main():
buf = TARGET.read_bytes()
sc = load_sidecar_samples(TXT)
decoded = decode_waveform_v2(buf[0x0f1f:])
GEO_LSB = 0.0003
for ch in ("Tran", "Vert", "Long"):
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
dec = decoded[ch]
# Find ALL transitions where mismatches start/stop
first_diff = next((i for i in range(len(dec)) if dec[i] != sc_counts[i]), None)
if first_diff is None:
print(f"{ch}: NO MISMATCHES")
continue
print(f"{ch}: first diff at idx {first_diff}")
# Show 5 before, 5 after
for i in range(max(0, first_diff - 3), min(len(dec), first_diff + 8)):
mark = " " if dec[i] == sc_counts[i] else "**"
print(f" {mark} idx {i:4d}: sc={sc_counts[i]:6d} dec={dec[i]:6d} diff={dec[i]-sc_counts[i]:+d}")
# Where does cumulative diff exceed 100?
cum_match_run = 0
max_match_run = 0
match_run_start = 0
diff_count = 0
for i in range(len(dec)):
if dec[i] == sc_counts[i]:
cum_match_run += 1
max_match_run = max(max_match_run, cum_match_run)
else:
cum_match_run = 0
diff_count += 1
print(f" total mismatches: {diff_count}/{len(dec)}, longest run of matches: {max_match_run}")
print()
if __name__ == "__main__":
main()
+48
View File
@@ -0,0 +1,48 @@
"""End-to-end IDFH ingest verification."""
from __future__ import annotations
import sys
import tempfile
import json
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from sfm.waveform_store import WaveformStore
def main():
idfh = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
txt = idfh.parent / "TXT" / f"{idfh.name}.txt"
with tempfile.TemporaryDirectory() as td:
store = WaveformStore(Path(td))
ev, rec = store.save_imported_idf(
idfh.read_bytes(),
idfh,
idf_report_text=txt.read_text(errors="replace"),
)
print("=== save_imported_idf (IDFH) ===")
print(f" serial: {rec['serial']}")
print(f" filename: {rec['filename']}")
print(f" filesize: {rec['filesize']}")
print(f" h5: {rec['hdf5_filename']}") # expect None for histogram
print(f" sidecar: {rec['sidecar_filename']}")
print()
print("=== Event ===")
print(f" timestamp: {ev.timestamp}")
print(f" record_type: {ev.record_type}")
print(f" sample_rate: {ev.sample_rate}")
print()
# Inspect sidecar to confirm intervals were stashed
sc_path = Path(td) / "UM13981" / f"{idfh.name}.sfm.json"
sc = json.loads(sc_path.read_text())
intervals = sc.get("extensions", {}).get("idf_intervals", [])
print(f" sidecar intervals: {len(intervals)}")
if intervals:
print(f" first interval: {intervals[0]}")
print(f" last interval: {intervals[-1]}")
if __name__ == "__main__":
main()
+40
View File
@@ -0,0 +1,40 @@
"""Verify the had_report=False path: ingest IDFW with no .txt."""
from __future__ import annotations
import sys
from pathlib import Path
import tempfile
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from sfm.waveform_store import WaveformStore
def main():
idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
with tempfile.TemporaryDirectory() as td:
store = WaveformStore(Path(td))
ev, rec = store.save_imported_idf(
idfw.read_bytes(),
idfw,
serial_hint=None,
idf_report_text=None, # ← no .txt!
)
print("=== IDFW without .txt ingest ===")
print(f" serial: {rec['serial']}")
print(f" timestamp: {ev.timestamp}")
print(f" sample_rate: {ev.sample_rate}")
print(f" record_type: {ev.record_type}")
print(f" rectime_sec: {ev.rectime_seconds}")
nT = len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0
nV = len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0
nL = len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0
nM = len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0
print(f" raw_samples: Tran={nT} Vert={nV} Long={nL} MicL={nM}")
if ev.peak_values:
print(f" peak_values: tran={ev.peak_values.tran} vert={ev.peak_values.vert} long={ev.peak_values.long}")
print(f" h5 written: {rec['hdf5_filename']}")
if __name__ == "__main__":
main()
+52
View File
@@ -0,0 +1,52 @@
"""End-to-end ingest test: feed an IDFW + .txt to save_imported_idf in a tmp store."""
from __future__ import annotations
import sys
from pathlib import Path
import tempfile
import shutil
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from sfm.waveform_store import WaveformStore
def main():
idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
txt = idfw.parent / "TXT" / f"{idfw.name}.txt"
with tempfile.TemporaryDirectory() as td:
store = WaveformStore(Path(td))
ev, rec = store.save_imported_idf(
idfw.read_bytes(),
idfw,
serial_hint=None,
idf_report_text=txt.read_text(errors="replace"),
)
print("=== Save result ===")
print(f" serial: {rec['serial']}")
print(f" filename: {rec['filename']}")
print(f" filesize: {rec['filesize']}")
print(f" h5: {rec['hdf5_filename']}")
print(f" sidecar: {rec['sidecar_filename']}")
print()
print("=== Event ===")
print(f" serial: {ev.serial if hasattr(ev,'serial') else '(n/a)'}")
print(f" timestamp: {ev.timestamp}")
print(f" sample_rate: {ev.sample_rate}")
print(f" record_type: {ev.record_type}")
print(f" rectime_sec: {ev.rectime_seconds}")
print(f" raw_samples: Tran={len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0}, Vert={len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0}, Long={len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0}, MicL={len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0}")
if ev.peak_values:
print(f" peaks (txt): Tran={ev.peak_values.tran} Vert={ev.peak_values.vert} Long={ev.peak_values.long}")
print()
# Verify the h5 file actually got written
h5path = Path(td) / "UM11719" / f"{idfw.name}.h5"
print(f" h5 exists: {h5path.exists()} size={h5path.stat().st_size if h5path.exists() else 0}")
sidecar = Path(td) / "UM11719" / f"{idfw.name}.sfm.json"
print(f" sidecar exists:{sidecar.exists()} size={sidecar.stat().st_size if sidecar.exists() else 0}")
if __name__ == "__main__":
main()
+137
View File
@@ -0,0 +1,137 @@
"""Decode IDFH histogram intervals + verify against sidecar."""
from __future__ import annotations
import sys
import struct
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
SEGMENT_MAGIC = b"\x02\xda\x0a\x00\x00\x00"
SEGMENT_SIZE = 732 # = 10-byte header + 10 × 72-byte intervals + 2-byte tail
INTERVAL_SIZE = 72
CHANNELS = ("Tran", "Vert", "Long", "MicL")
def decode_interval(buf72: bytes) -> dict:
"""Decode one 72-byte interval into per-channel min/max/halfp."""
out = {}
for i, ch in enumerate(CHANNELS):
block = buf72[i*16 : (i+1)*16]
mn = struct.unpack_from(">h", block, 0)[0]
mx = struct.unpack_from(">h", block, 2)[0]
sb = struct.unpack_from(">h", block, 4)[0]
halfp = struct.unpack_from(">H", block, 6)[0]
f10 = struct.unpack_from(">H", block, 10)[0]
f14 = struct.unpack_from(">H", block, 14)[0]
peak_count = max(abs(mn), abs(mx))
out[ch] = {
"min": mn,
"max": mx,
"field4": sb,
"halfp": halfp,
"field10": f10,
"field14": f14,
"peak": peak_count,
"freq_hz": (512.0 / halfp) if halfp > 5 else None,
}
out["_tail"] = buf72[64:].hex(" ")
return out
def walk_idfh(buf: bytes) -> list:
"""Walk all interval records in an IDFH file."""
intervals = []
# Multi-segment file: every 02 da 0a 00 00 00 marker introduces a segment.
# Single-interval file: just one body header at 0xf96 of form ?? ?? 0a 00 00 00.
# Find them all.
i = 0
while True:
j = buf.find(b"\x0a\x00\x00\x00", i)
if j < 0:
break
# Validate: the 2 bytes before must form a length, and we want bytes
# [j-2 : j+6] to have a recognisable shape. Actually the cleanest
# filter is "preceded by a length and followed by 00 NN 05 3f".
if j < 2:
i = j + 1
continue
# Body header form: [length_be_2][0a 00 00 00][00 NN][05 3f]
if j + 10 > len(buf):
break
length = int.from_bytes(buf[j-2:j], "big")
# Verify the segment-marker shape: [length_be][0a 00 00 00][00 NN][05 3f]
if buf[j+4] != 0x00:
i = j + 1
continue
if buf[j+6:j+8] != b"\x05\x3f":
i = j + 1
continue
# Header layout (10 bytes): [length_be 2B][0a 00 00 00 4B][00 NN 2B][05 3f 2B]
# Followed by N interval records of 72 bytes each, then 2 tail bytes.
# length value = (N × 72) + 10 (counts bytes from 0x0a... through interval data).
header_start = j - 2
n_intervals = (length - 10) // INTERVAL_SIZE
interval_start = header_start + 10
for k in range(n_intervals):
off = interval_start + k * INTERVAL_SIZE
if off + INTERVAL_SIZE > len(buf):
break
chunk = buf[off:off + INTERVAL_SIZE]
intervals.append({"offset": off, **decode_interval(chunk)})
i = header_start + length + 2
return intervals
def main():
# Test against multi-segment IDFH
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
sc_path = target.parent / "TXT" / f"{target.name}.txt"
buf = target.read_bytes()
intervals = walk_idfh(buf)
print(f"=== {target.name} ===")
print(f" file size: {len(buf)}")
print(f" decoded intervals: {len(intervals)}")
# Show first 2 + last 2
sc_rows = []
for line in sc_path.read_text(errors="replace").splitlines():
if line.startswith("2022-") or line.startswith("2023-"):
sc_rows.append(line)
print(f" sidecar rows: {len(sc_rows)}")
print()
for k in [0, 1, 78, 79, 80]:
if k >= len(intervals):
continue
iv = intervals[k]
print(f"--- interval {k} @0x{iv['offset']:04x} ---")
for ch in CHANNELS:
d = iv[ch]
peak_ips = d["peak"] / 32768 * 10.0
print(f" {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s) halfp={d['halfp']:5d} freq={d['freq_hz']}")
# sidecar row
if k < len(sc_rows):
print(f" SC: {sc_rows[k]}")
# Test single-interval IDFH
print()
target2 = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH"
sc2 = target2.parent / "TXT" / f"{target2.name}.txt"
buf2 = target2.read_bytes()
intervals2 = walk_idfh(buf2)
print(f"=== {target2.name} ===")
print(f" file size: {len(buf2)}, decoded intervals: {len(intervals2)}")
if intervals2:
iv = intervals2[0]
for ch in CHANNELS:
d = iv[ch]
peak_ips = d["peak"] / 32768 * 10.0
print(f" {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s) halfp={d['halfp']:5d} freq={d['freq_hz']}")
sc_rows2 = [l for l in sc2.read_text(errors='replace').splitlines() if l.startswith("2023-")]
if sc_rows2:
print(f" SC: {sc_rows2[0]}")
if __name__ == "__main__":
main()
+41
View File
@@ -0,0 +1,41 @@
"""Find IDFH interval period via auto-correlation of structural patterns."""
from __future__ import annotations
import sys
from pathlib import Path
from collections import Counter
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
def main():
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
buf = target.read_bytes()
body_start = 0xF96
body_end = 0x270C
body = buf[body_start:body_end]
print(f"body size: {len(body)} bytes (file {len(buf)} bytes)")
# For each candidate interval size, count how many bytes at fixed offsets within
# each interval are zero (consistent column-zero pattern indicates correct size).
print()
print("=== zero-column score by interval size (higher = more likely) ===")
best = []
for sz in range(16, 100):
n = len(body) // sz
if n < 30:
continue
# For each column position within an interval, count how many of n intervals have zero
score = 0
for col in range(sz):
zeros = sum(1 for i in range(n) if body[i*sz + col] == 0)
if zeros >= n * 0.9:
score += 1
best.append((score, sz, n))
best.sort(reverse=True)
for score, sz, n in best[:10]:
print(f" size={sz:3d} n_intervals={n} consistently-zero-cols={score}")
if __name__ == "__main__":
main()
+40
View File
@@ -0,0 +1,40 @@
"""Per-file accuracy + sample-count details."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from micromate.idf_file import read_idf_file
from analysis_idf.recon import load_sidecar_samples
def main():
root = REPO / "tests/fixtures/THORDATA_example"
files = sorted([f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")])
GEO_LSB = 0.0003
# Limit to first 15 successful files for detail.
shown = 0
for f in files:
try:
res = read_idf_file(f)
except Exception:
continue
sc_path = f.parent / "TXT" / f"{f.name}.txt"
if not sc_path.exists():
continue
sc = load_sidecar_samples(sc_path)
sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]]
dec = res.samples.get("Tran", [])
n = min(len(sc_tran), len(dec))
exact = sum(1 for i in range(n) if sc_tran[i] == dec[i]) if n else 0
pct = 100.0 * exact / n if n else 0.0
print(f"{f.name:40s} size={f.stat().st_size:6d} sc_n={len(sc_tran):4d} dec_n={len(dec):4d} exact={pct:.1f}%")
shown += 1
if shown >= 20:
break
if __name__ == "__main__":
main()
+64
View File
@@ -0,0 +1,64 @@
"""Look at what's at the divergence boundary."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from minimateplus.waveform_codec import walk_body, find_data_start, parse_segment_header
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
def main():
buf = TARGET.read_bytes()
body = buf[0x0f1f:]
start = find_data_start(body)
print(f"data_start: {start} (= file offset 0x{0x0f1f + start:04x})")
blocks = walk_body(body, start)
print(f"{len(blocks)} blocks total")
print()
# First 25 blocks
print("=== first 30 blocks ===")
for i, b in enumerate(blocks[:30]):
body_off = 0x0f1f + b.offset
if b.tag_hi == 0x40:
hdr = parse_segment_header(b)
print(f" [{i:3d}] @0x{body_off:04x} {b.kind} (segment header) counter={hdr['counter'] if hdr else '?'} field2={hdr['field2'].hex() if hdr else '?'} anchor={hdr['anchor_bytes'].hex() if hdr else '?'} tail={hdr['tail'].hex() if hdr else '?'}")
else:
print(f" [{i:3d}] @0x{body_off:04x} {b.kind} len={b.length} data={b.data[:16].hex()}")
print()
# Cumulative sample counts per block to find which block contains sample 254
print("=== cumulative samples through blocks ===")
cur_ch = "Tran"
rotation = ["Vert", "Long", "MicL", "Tran"]
seg_count = 0
samples_in_curseg = 2 # preamble Tran[0], Tran[1]
for i, b in enumerate(blocks[:30]):
if b.tag_hi == 0x40:
seg_count += 1
prev_ch = cur_ch
cur_ch = rotation[(seg_count - 1) % 4]
print(f" [{i:3d}] 40 02 -> end of {prev_ch} segment, start {cur_ch} (segment {seg_count})")
samples_in_curseg = 2 # anchors
elif (b.tag_hi & 0xF0) == 0x10:
nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo
samples_in_curseg += nn
print(f" [{i:3d}] {b.kind} nibble: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
elif (b.tag_hi & 0xF0) == 0x20:
nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo
samples_in_curseg += nn
print(f" [{i:3d}] {b.kind} int8: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
elif b.tag_hi == 0x00:
samples_in_curseg += b.tag_lo
print(f" [{i:3d}] {b.kind} RLE: +{b.tag_lo}, ch={cur_ch}, ch_total~{samples_in_curseg}")
elif b.tag_hi == 0x30:
samples_in_curseg += b.tag_lo
print(f" [{i:3d}] {b.kind} packed12: +{b.tag_lo} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
if __name__ == "__main__":
main()
+89
View File
@@ -0,0 +1,89 @@
"""Reconnaissance helpers for cracking the Thor IDFW binary."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt"
def hex_at(buf: bytes, off: int, n: int = 32) -> str:
chunk = buf[off : off + n]
hexs = " ".join(f"{b:02x}" for b in chunk)
asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
return f"{off:04x}: {hexs} {asc}"
def find_all(buf: bytes, needle: bytes) -> list[int]:
out: list[int] = []
i = 0
while True:
j = buf.find(needle, i)
if j < 0:
break
out.append(j)
i = j + 1
return out
def load_sidecar_samples(path: Path) -> dict[str, list[float]]:
"""Parse the txt sample table — Tran/Vert/Long/MicL."""
out = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
in_block = False
for line in path.read_text(errors="replace").splitlines():
if not in_block:
if line.strip() == "Waveform Data Channels":
in_block = True
continue
if line.startswith("Waveform Data USB Channels"):
break
parts = line.split("\t")
# First row is the header "\tTran\tVert\tLong\tMicL"
if len(parts) >= 5 and parts[1] == "Tran":
continue
if len(parts) < 5:
continue
try:
out["Tran"].append(float(parts[1]))
out["Vert"].append(float(parts[2]))
out["Long"].append(float(parts[3]))
out["MicL"].append(float(parts[4]))
except ValueError:
continue
return out
def main():
buf = TARGET.read_bytes()
samples = load_sidecar_samples(TXT)
print(f"file size: {len(buf)} bytes")
print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}")
print(f"first 6 Tran samples: {samples['Tran'][:6]}")
print(f"first 6 Vert samples: {samples['Vert'][:6]}")
print(f"first 6 Long samples: {samples['Long'][:6]}")
print(f"first 6 MicL samples: {samples['MicL'][:6]}")
print()
print("=== BW magic '00 02 00' positions ===")
hits = find_all(buf, b"\x00\x02\x00")
print(f"{len(hits)} hits")
for h in hits[:20]:
print(hex_at(buf, h, 24))
print()
print("=== '40 02' segment-header positions ===")
hits = find_all(buf, b"\x40\x02")
print(f"{len(hits)} hits")
for h in hits:
ctx_pre = buf[max(0, h - 4): h].hex()
ctx_post = buf[h: h + 20].hex()
# Show byte preceding to help identify real headers vs casual occurrences
print(f" 0x{h:04x} pre={ctx_pre} post={ctx_post}")
if __name__ == "__main__":
main()
+40
View File
@@ -0,0 +1,40 @@
"""Find each segment boundary in the channel and check if errors reset there."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from minimateplus.waveform_codec import decode_waveform_v2
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
def main():
buf = TARGET.read_bytes()
sc = load_sidecar_samples(TXT)
decoded = decode_waveform_v2(buf[0x0f1f:])
GEO_LSB = 0.0003
for ch in ("Tran", "Vert", "Long"):
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
dec = decoded[ch]
# Find every transition where error becomes zero from nonzero (or grows from zero)
# Print indices where dec resyncs back to exact match.
n = min(len(sc_counts), len(dec))
events = []
prev_match = True
for i in range(n):
match = sc_counts[i] == dec[i]
if match != prev_match:
kind = "RESYNC" if match else "DIVERGE"
events.append((i, kind, sc_counts[i], dec[i]))
prev_match = match
print(f"{ch}: {len(events)} transitions")
for i, kind, sc_v, dec_v in events[:20]:
print(f" idx {i:4d} {kind:8s} sc={sc_v:6d} dec={dec_v:6d} diff={dec_v-sc_v:+d}")
print()
if __name__ == "__main__":
main()
+46
View File
@@ -0,0 +1,46 @@
"""Smoke-test read_idf_file on IDFH across the corpus."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from micromate.idf_file import read_idf_file
def main():
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH"
result = read_idf_file(target)
ev = result.event
print(f"=== {target.name} ===")
print(f" signature: {result.signature}")
print(f" serial: {ev.serial}")
print(f" timestamp: {ev.timestamp}")
print(f" sample_rate: {ev.sample_rate}")
print(f" kind: {ev.kind}")
print(f" intervals: {len(result.intervals or [])}")
print(f" peaks: T={ev.peaks.transverse_ips:.4f} V={ev.peaks.vertical_ips:.4f} L={ev.peaks.longitudinal_ips:.4f}")
print()
root = REPO / "tests/fixtures/THORDATA_example"
files = list(root.rglob("*.IDFH"))
ok = fail = nyi = 0
total_intervals = 0
for f in files:
try:
r = read_idf_file(f)
ok += 1
total_intervals += len(r.intervals or [])
except NotImplementedError:
nyi += 1
except Exception as exc:
fail += 1
if fail <= 3:
print(f" FAIL: {f.name}: {type(exc).__name__}: {exc}")
print(f"Corpus: {len(files)} IDFH files | ok={ok} fail={fail} nyi={nyi}")
print(f"Total intervals decoded: {total_intervals}")
if __name__ == "__main__":
main()
+48
View File
@@ -0,0 +1,48 @@
"""Smoke-test read_idf_file across the sample corpus."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from micromate.idf_file import read_idf_file, geo_count_to_ips, mic_count_to_psi
def main():
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
result = read_idf_file(target)
ev = result.event
print(f"=== {target.name} ===")
print(f" signature: {result.signature}")
print(f" serial: {ev.serial}")
print(f" timestamp: {ev.timestamp}")
print(f" sample_rate: {ev.sample_rate}")
print(f" record_time: {ev.record_time_sec}")
print(f" calibration: {result.binary_metadata.calibration_date}")
print(f" Tran samples: {len(result.samples['Tran'])}, peak_ips={ev.peaks.transverse_ips:.4f}")
print(f" Vert samples: {len(result.samples['Vert'])}, peak_ips={ev.peaks.vertical_ips:.4f}")
print(f" Long samples: {len(result.samples['Long'])}, peak_ips={ev.peaks.longitudinal_ips:.4f}")
print(f" MicL samples: {len(result.samples['MicL'])}")
print()
# Corpus sweep
root = REPO / "tests/fixtures/THORDATA_example"
files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]
ok = fail = nyi = 0
for f in files:
try:
r = read_idf_file(f)
ok += 1
except NotImplementedError:
nyi += 1
except Exception as exc:
fail += 1
if fail <= 5:
print(f" FAIL: {f.name}: {type(exc).__name__}: {exc}")
print()
print(f"Corpus: {len(files)} IDFW files | ok={ok} fail={fail} not-implemented={nyi}")
if __name__ == "__main__":
main()
+73
View File
@@ -0,0 +1,73 @@
"""Trace Tran sample-by-sample to find exactly where the codec drifts."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
def s4(n: int) -> int:
return n if n < 8 else n - 16
def i8(b: int) -> int:
return b if b < 128 else b - 256
def main():
buf = TARGET.read_bytes()
sc = load_sidecar_samples(TXT)
GEO_LSB = 0.0003
sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]]
body = buf[0x0f1f:]
# Tran[0], Tran[1] from preamble
t0 = int.from_bytes(body[3:5], "big", signed=True)
t1 = int.from_bytes(body[5:7], "big", signed=True)
print(f"preamble Tran[0]={t0} Tran[1]={t1} (sidecar: {sc_tran[0]}, {sc_tran[1]})")
# Block 0: 10 f8 at body[7:9]
print(f"block 0: tag {body[7]:02x} {body[8]:02x}")
print(f" block 0 first 10 data bytes: {body[9:19].hex()}")
# Walk block 0 manually, comparing each sample
cur = t1
samples = [t0, t1]
block_off = 7
nn = body[8]
print(f" NN = {nn}")
data = body[9 : 9 + nn // 2]
for byi, byte in enumerate(data):
for nib_idx, nib in enumerate(((byte >> 4) & 0xF, byte & 0xF)):
cur += s4(nib)
samples.append(cur)
idx = len(samples) - 1
if 0 <= idx < len(sc_tran):
sc_v = sc_tran[idx]
match = "" if sc_v == cur else ""
if idx < 12 or 240 <= idx <= 260:
print(f" idx {idx:3d}: nibble byte={byte:02x} nib={nib:x} delta={s4(nib):+d} cur={cur:+d} sc={sc_v:+d} {match}")
print(f"end of block 0: cur={cur}, len(samples)={len(samples)}, decoder expected 250 here")
# Block 1: 20 28 starts at offset 9 + 124 = 133 from block_off=7
block1_off = 9 + nn // 2
print(f"block 1: tag {body[block1_off]:02x} {body[block1_off+1]:02x} (expecting 20 28)")
nn1 = body[block1_off + 1]
print(f" block 1 NN = {nn1}")
data1 = body[block1_off + 2 : block1_off + 2 + nn1]
for byi, byte in enumerate(data1):
cur += i8(byte)
samples.append(cur)
idx = len(samples) - 1
if idx < len(sc_tran):
sc_v = sc_tran[idx]
match = "" if sc_v == cur else ""
if 248 <= idx <= 295:
print(f" idx {idx:3d}: int8 byte={byte:02x} delta={i8(byte):+d} cur={cur:+d} sc={sc_v:+d} {match}")
if __name__ == "__main__":
main()
+42
View File
@@ -0,0 +1,42 @@
"""Feed candidate body offsets to the BW codec and compare with sidecar."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from minimateplus.waveform_codec import decode_waveform_v2, walk_body, find_data_start
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
def main():
buf = TARGET.read_bytes()
sc = load_sidecar_samples(TXT)
# Sidecar samples in 0.0003 counts (Thor geo LSB).
sc_tran = [int(round(v / 0.0003)) for v in sc["Tran"][:30]]
sc_vert = [int(round(v / 0.0003)) for v in sc["Vert"][:30]]
sc_long = [int(round(v / 0.0003)) for v in sc["Long"][:30]]
sc_micl = [int(round(v / 1e-6)) for v in sc["MicL"][:30]] # 1 µ unit for mic? Will iterate.
print(f"sidecar Tran (counts): {sc_tran}")
print(f"sidecar Vert (counts): {sc_vert}")
print(f"sidecar Long (counts): {sc_long}")
print(f"sidecar MicL (×1e-6): {sc_micl}")
print()
# Try candidate body start offsets.
for off in (0x0f1f, 0x1057, 0x11f1, 0x1333, 0x1bde, 0x0d30):
print(f"=== body @ 0x{off:04x} ===")
body = buf[off:]
decoded = decode_waveform_v2(body)
if not decoded:
print(" decode_waveform_v2 returned None")
continue
for ch in ("Tran", "Vert", "Long", "MicL"):
arr = decoded.get(ch, [])
print(f" {ch}[{len(arr)}]: {arr[:20]}")
print()
if __name__ == "__main__":
main()
+51
View File
@@ -0,0 +1,51 @@
"""Verify decode_waveform_v2 against sidecar across all 2304 samples per channel."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
from minimateplus.waveform_codec import decode_waveform_v2
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
def main():
buf = TARGET.read_bytes()
sc = load_sidecar_samples(TXT)
body = buf[0x0f1f:]
decoded = decode_waveform_v2(body)
print(f"Sidecar lengths: Tran={len(sc['Tran'])} Vert={len(sc['Vert'])} Long={len(sc['Long'])} MicL={len(sc['MicL'])}")
print(f"Decoded lengths: Tran={len(decoded['Tran'])} Vert={len(decoded['Vert'])} Long={len(decoded['Long'])} MicL={len(decoded['MicL'])}")
print()
GEO_LSB = 0.0003 # in/s per count
for ch in ("Tran", "Vert", "Long"):
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
dec = decoded[ch]
n = min(len(sc_counts), len(dec))
matches = sum(1 for i in range(n) if sc_counts[i] == dec[i])
first_mismatch = next((i for i in range(n) if sc_counts[i] != dec[i]), None)
print(f"{ch}: compared {n}, exact matches {matches} ({100*matches/n:.2f}%)")
if first_mismatch is not None:
i = first_mismatch
print(f" first mismatch at idx {i}: sidecar={sc_counts[i]} ({sc[ch][i]}), decoded={dec[i]}")
print(f" context sidecar[{i-2}..{i+5}]: {sc_counts[max(0,i-2):i+5]}")
print(f" context decoded[{i-2}..{i+5}]: {dec[max(0,i-2):i+5]}")
# MicL: find the multiplicative factor that fits
print()
print("=== MicL scale analysis ===")
sc_micl = sc["MicL"]
dec_micl = decoded["MicL"]
# Skip zero values when computing ratio
ratios = [sc_micl[i] / dec_micl[i] for i in range(min(50, len(sc_micl), len(dec_micl))) if dec_micl[i] != 0]
if ratios:
avg = sum(ratios) / len(ratios)
print(f" avg ratio sidecar/decoded over first 50 nonzero: {avg:.4e} (n={len(ratios)})")
print(f" ratios sample: {[f'{r:.4e}' for r in ratios[:6]]}")
if __name__ == "__main__":
main()