diff --git a/CLAUDE.md b/CLAUDE.md index c2892d6..ba8be79 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -73,6 +73,28 @@ should not import from `sfm/`, must not touch a DB, and have no I/O beyond reading files passed as arguments. Keep them pure — both tiers can then depend on them without circularity. +#### Thor IDF binary codec (2026-05-28) + +`micromate/idf_file.read_idf_file()` decodes both Thor IDFW +(waveform) and IDFH (histogram) binaries. + +- **IDFW** reuses `decode_waveform_v2()` on the body at fixed file + offset `0x0f1f`. Sample fidelity is 87–99% byte-exact on quiet + events; loud events hit the BW codec's known walker-stops-early + limitation. +- **IDFH** has its own segment-based decoder: `[len_be][0a 00 00 00] + [00 NN][05 3f]` + N × 72-byte interval records (4 × 16-byte + per-channel min/max/halfp). All 859 Thor IDFH corpus files + decode (181,071 intervals); peak matches sidecar within ~1.8% + (ADC quantization). + +The two outlier `BE9439_*` files in the Thor example corpus are +actually Series III Blastware binaries that share the `.IDFW`/`.IDFH` +filename convention by accident. `read_idf_file()` detects them by +their BW STRT signature and raises NotImplementedError pointing +callers at `read_blastware_file()`. See +`docs/idf_protocol_reference.md` for full field layouts. + ### Practical consequences When deciding where new code goes, ask: diff --git a/analysis_idf/corpus_accuracy.py b/analysis_idf/corpus_accuracy.py new file mode 100644 index 0000000..acdd0e9 --- /dev/null +++ b/analysis_idf/corpus_accuracy.py @@ -0,0 +1,65 @@ +"""Run read_idf_file across the corpus and report per-channel accuracy vs sidecars.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from micromate.idf_file import read_idf_file +from analysis_idf.recon import load_sidecar_samples + + +def sidecar_path(idfw: Path) -> Path: + return idfw.parent / "TXT" / f"{idfw.name}.txt" + + +def main(): + root = REPO / "tests/fixtures/THORDATA_example" + files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")] + files.sort() + GEO_LSB = 0.0003 + + n_ok = n_skip = 0 + overall = {"Tran": [], "Vert": [], "Long": []} + + for f in files: + try: + res = read_idf_file(f) + except Exception: + n_skip += 1 + continue + sc_path = sidecar_path(f) + if not sc_path.exists(): + n_skip += 1 + continue + try: + sc = load_sidecar_samples(sc_path) + except Exception: + n_skip += 1 + continue + + per_file = {} + for ch in ("Tran", "Vert", "Long"): + sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]] + dec = res.samples.get(ch, []) + n = min(len(sc_counts), len(dec)) + if n == 0: + per_file[ch] = 0.0 + continue + exact = sum(1 for i in range(n) if sc_counts[i] == dec[i]) + pct = 100.0 * exact / n + per_file[ch] = pct + overall[ch].append(pct) + n_ok += 1 + + print(f"Processed {n_ok} files (skipped {n_skip})") + print("Per-channel exact-match % (mean / min / max):") + for ch, vals in overall.items(): + if vals: + avg = sum(vals) / len(vals) + print(f" {ch}: mean={avg:.2f}% min={min(vals):.2f}% max={max(vals):.2f}% n={len(vals)}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/diff_trail.py b/analysis_idf/diff_trail.py new file mode 100644 index 0000000..a64295b --- /dev/null +++ b/analysis_idf/diff_trail.py @@ -0,0 +1,49 @@ +"""Find where decoded-vs-sidecar diverges for each channel.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from minimateplus.waveform_codec import decode_waveform_v2 +from analysis_idf.recon import TARGET, TXT, load_sidecar_samples + + +def main(): + buf = TARGET.read_bytes() + sc = load_sidecar_samples(TXT) + decoded = decode_waveform_v2(buf[0x0f1f:]) + GEO_LSB = 0.0003 + + for ch in ("Tran", "Vert", "Long"): + sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]] + dec = decoded[ch] + # Find ALL transitions where mismatches start/stop + first_diff = next((i for i in range(len(dec)) if dec[i] != sc_counts[i]), None) + if first_diff is None: + print(f"{ch}: NO MISMATCHES") + continue + print(f"{ch}: first diff at idx {first_diff}") + # Show 5 before, 5 after + for i in range(max(0, first_diff - 3), min(len(dec), first_diff + 8)): + mark = " " if dec[i] == sc_counts[i] else "**" + print(f" {mark} idx {i:4d}: sc={sc_counts[i]:6d} dec={dec[i]:6d} diff={dec[i]-sc_counts[i]:+d}") + # Where does cumulative diff exceed 100? + cum_match_run = 0 + max_match_run = 0 + match_run_start = 0 + diff_count = 0 + for i in range(len(dec)): + if dec[i] == sc_counts[i]: + cum_match_run += 1 + max_match_run = max(max_match_run, cum_match_run) + else: + cum_match_run = 0 + diff_count += 1 + print(f" total mismatches: {diff_count}/{len(dec)}, longest run of matches: {max_match_run}") + print() + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/e2e_idfh.py b/analysis_idf/e2e_idfh.py new file mode 100644 index 0000000..3f5ec43 --- /dev/null +++ b/analysis_idf/e2e_idfh.py @@ -0,0 +1,48 @@ +"""End-to-end IDFH ingest verification.""" +from __future__ import annotations +import sys +import tempfile +import json +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from sfm.waveform_store import WaveformStore + + +def main(): + idfh = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH" + txt = idfh.parent / "TXT" / f"{idfh.name}.txt" + + with tempfile.TemporaryDirectory() as td: + store = WaveformStore(Path(td)) + ev, rec = store.save_imported_idf( + idfh.read_bytes(), + idfh, + idf_report_text=txt.read_text(errors="replace"), + ) + print("=== save_imported_idf (IDFH) ===") + print(f" serial: {rec['serial']}") + print(f" filename: {rec['filename']}") + print(f" filesize: {rec['filesize']}") + print(f" h5: {rec['hdf5_filename']}") # expect None for histogram + print(f" sidecar: {rec['sidecar_filename']}") + print() + print("=== Event ===") + print(f" timestamp: {ev.timestamp}") + print(f" record_type: {ev.record_type}") + print(f" sample_rate: {ev.sample_rate}") + print() + # Inspect sidecar to confirm intervals were stashed + sc_path = Path(td) / "UM13981" / f"{idfh.name}.sfm.json" + sc = json.loads(sc_path.read_text()) + intervals = sc.get("extensions", {}).get("idf_intervals", []) + print(f" sidecar intervals: {len(intervals)}") + if intervals: + print(f" first interval: {intervals[0]}") + print(f" last interval: {intervals[-1]}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/e2e_no_txt.py b/analysis_idf/e2e_no_txt.py new file mode 100644 index 0000000..a9c81b6 --- /dev/null +++ b/analysis_idf/e2e_no_txt.py @@ -0,0 +1,40 @@ +"""Verify the had_report=False path: ingest IDFW with no .txt.""" +from __future__ import annotations +import sys +from pathlib import Path +import tempfile + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from sfm.waveform_store import WaveformStore + + +def main(): + idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW" + with tempfile.TemporaryDirectory() as td: + store = WaveformStore(Path(td)) + ev, rec = store.save_imported_idf( + idfw.read_bytes(), + idfw, + serial_hint=None, + idf_report_text=None, # ← no .txt! + ) + print("=== IDFW without .txt ingest ===") + print(f" serial: {rec['serial']}") + print(f" timestamp: {ev.timestamp}") + print(f" sample_rate: {ev.sample_rate}") + print(f" record_type: {ev.record_type}") + print(f" rectime_sec: {ev.rectime_seconds}") + nT = len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0 + nV = len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0 + nL = len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0 + nM = len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0 + print(f" raw_samples: Tran={nT} Vert={nV} Long={nL} MicL={nM}") + if ev.peak_values: + print(f" peak_values: tran={ev.peak_values.tran} vert={ev.peak_values.vert} long={ev.peak_values.long}") + print(f" h5 written: {rec['hdf5_filename']}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/e2e_save_idf.py b/analysis_idf/e2e_save_idf.py new file mode 100644 index 0000000..87e9650 --- /dev/null +++ b/analysis_idf/e2e_save_idf.py @@ -0,0 +1,52 @@ +"""End-to-end ingest test: feed an IDFW + .txt to save_imported_idf in a tmp store.""" +from __future__ import annotations +import sys +from pathlib import Path +import tempfile +import shutil + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from sfm.waveform_store import WaveformStore + + +def main(): + idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW" + txt = idfw.parent / "TXT" / f"{idfw.name}.txt" + + with tempfile.TemporaryDirectory() as td: + store = WaveformStore(Path(td)) + ev, rec = store.save_imported_idf( + idfw.read_bytes(), + idfw, + serial_hint=None, + idf_report_text=txt.read_text(errors="replace"), + ) + print("=== Save result ===") + print(f" serial: {rec['serial']}") + print(f" filename: {rec['filename']}") + print(f" filesize: {rec['filesize']}") + print(f" h5: {rec['hdf5_filename']}") + print(f" sidecar: {rec['sidecar_filename']}") + print() + print("=== Event ===") + print(f" serial: {ev.serial if hasattr(ev,'serial') else '(n/a)'}") + print(f" timestamp: {ev.timestamp}") + print(f" sample_rate: {ev.sample_rate}") + print(f" record_type: {ev.record_type}") + print(f" rectime_sec: {ev.rectime_seconds}") + print(f" raw_samples: Tran={len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0}, Vert={len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0}, Long={len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0}, MicL={len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0}") + if ev.peak_values: + print(f" peaks (txt): Tran={ev.peak_values.tran} Vert={ev.peak_values.vert} Long={ev.peak_values.long}") + print() + + # Verify the h5 file actually got written + h5path = Path(td) / "UM11719" / f"{idfw.name}.h5" + print(f" h5 exists: {h5path.exists()} size={h5path.stat().st_size if h5path.exists() else 0}") + sidecar = Path(td) / "UM11719" / f"{idfw.name}.sfm.json" + print(f" sidecar exists:{sidecar.exists()} size={sidecar.stat().st_size if sidecar.exists() else 0}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/idfh_decode.py b/analysis_idf/idfh_decode.py new file mode 100644 index 0000000..ae4354a --- /dev/null +++ b/analysis_idf/idfh_decode.py @@ -0,0 +1,137 @@ +"""Decode IDFH histogram intervals + verify against sidecar.""" +from __future__ import annotations +import sys +import struct +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + + +SEGMENT_MAGIC = b"\x02\xda\x0a\x00\x00\x00" +SEGMENT_SIZE = 732 # = 10-byte header + 10 × 72-byte intervals + 2-byte tail +INTERVAL_SIZE = 72 +CHANNELS = ("Tran", "Vert", "Long", "MicL") + + +def decode_interval(buf72: bytes) -> dict: + """Decode one 72-byte interval into per-channel min/max/halfp.""" + out = {} + for i, ch in enumerate(CHANNELS): + block = buf72[i*16 : (i+1)*16] + mn = struct.unpack_from(">h", block, 0)[0] + mx = struct.unpack_from(">h", block, 2)[0] + sb = struct.unpack_from(">h", block, 4)[0] + halfp = struct.unpack_from(">H", block, 6)[0] + f10 = struct.unpack_from(">H", block, 10)[0] + f14 = struct.unpack_from(">H", block, 14)[0] + peak_count = max(abs(mn), abs(mx)) + out[ch] = { + "min": mn, + "max": mx, + "field4": sb, + "halfp": halfp, + "field10": f10, + "field14": f14, + "peak": peak_count, + "freq_hz": (512.0 / halfp) if halfp > 5 else None, + } + out["_tail"] = buf72[64:].hex(" ") + return out + + +def walk_idfh(buf: bytes) -> list: + """Walk all interval records in an IDFH file.""" + intervals = [] + # Multi-segment file: every 02 da 0a 00 00 00 marker introduces a segment. + # Single-interval file: just one body header at 0xf96 of form ?? ?? 0a 00 00 00. + # Find them all. + i = 0 + while True: + j = buf.find(b"\x0a\x00\x00\x00", i) + if j < 0: + break + # Validate: the 2 bytes before must form a length, and we want bytes + # [j-2 : j+6] to have a recognisable shape. Actually the cleanest + # filter is "preceded by a length and followed by 00 NN 05 3f". + if j < 2: + i = j + 1 + continue + # Body header form: [length_be_2][0a 00 00 00][00 NN][05 3f] + if j + 10 > len(buf): + break + length = int.from_bytes(buf[j-2:j], "big") + # Verify the segment-marker shape: [length_be][0a 00 00 00][00 NN][05 3f] + if buf[j+4] != 0x00: + i = j + 1 + continue + if buf[j+6:j+8] != b"\x05\x3f": + i = j + 1 + continue + # Header layout (10 bytes): [length_be 2B][0a 00 00 00 4B][00 NN 2B][05 3f 2B] + # Followed by N interval records of 72 bytes each, then 2 tail bytes. + # length value = (N × 72) + 10 (counts bytes from 0x0a... through interval data). + header_start = j - 2 + n_intervals = (length - 10) // INTERVAL_SIZE + interval_start = header_start + 10 + for k in range(n_intervals): + off = interval_start + k * INTERVAL_SIZE + if off + INTERVAL_SIZE > len(buf): + break + chunk = buf[off:off + INTERVAL_SIZE] + intervals.append({"offset": off, **decode_interval(chunk)}) + i = header_start + length + 2 + return intervals + + +def main(): + # Test against multi-segment IDFH + target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH" + sc_path = target.parent / "TXT" / f"{target.name}.txt" + buf = target.read_bytes() + intervals = walk_idfh(buf) + print(f"=== {target.name} ===") + print(f" file size: {len(buf)}") + print(f" decoded intervals: {len(intervals)}") + # Show first 2 + last 2 + sc_rows = [] + for line in sc_path.read_text(errors="replace").splitlines(): + if line.startswith("2022-") or line.startswith("2023-"): + sc_rows.append(line) + print(f" sidecar rows: {len(sc_rows)}") + + print() + for k in [0, 1, 78, 79, 80]: + if k >= len(intervals): + continue + iv = intervals[k] + print(f"--- interval {k} @0x{iv['offset']:04x} ---") + for ch in CHANNELS: + d = iv[ch] + peak_ips = d["peak"] / 32768 * 10.0 + print(f" {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s) halfp={d['halfp']:5d} freq={d['freq_hz']}") + # sidecar row + if k < len(sc_rows): + print(f" SC: {sc_rows[k]}") + + # Test single-interval IDFH + print() + target2 = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH" + sc2 = target2.parent / "TXT" / f"{target2.name}.txt" + buf2 = target2.read_bytes() + intervals2 = walk_idfh(buf2) + print(f"=== {target2.name} ===") + print(f" file size: {len(buf2)}, decoded intervals: {len(intervals2)}") + if intervals2: + iv = intervals2[0] + for ch in CHANNELS: + d = iv[ch] + peak_ips = d["peak"] / 32768 * 10.0 + print(f" {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s) halfp={d['halfp']:5d} freq={d['freq_hz']}") + sc_rows2 = [l for l in sc2.read_text(errors='replace').splitlines() if l.startswith("2023-")] + if sc_rows2: + print(f" SC: {sc_rows2[0]}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/idfh_period.py b/analysis_idf/idfh_period.py new file mode 100644 index 0000000..8aad756 --- /dev/null +++ b/analysis_idf/idfh_period.py @@ -0,0 +1,41 @@ +"""Find IDFH interval period via auto-correlation of structural patterns.""" +from __future__ import annotations +import sys +from pathlib import Path +from collections import Counter + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + + +def main(): + target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH" + buf = target.read_bytes() + body_start = 0xF96 + body_end = 0x270C + body = buf[body_start:body_end] + print(f"body size: {len(body)} bytes (file {len(buf)} bytes)") + + # For each candidate interval size, count how many bytes at fixed offsets within + # each interval are zero (consistent column-zero pattern indicates correct size). + print() + print("=== zero-column score by interval size (higher = more likely) ===") + best = [] + for sz in range(16, 100): + n = len(body) // sz + if n < 30: + continue + # For each column position within an interval, count how many of n intervals have zero + score = 0 + for col in range(sz): + zeros = sum(1 for i in range(n) if body[i*sz + col] == 0) + if zeros >= n * 0.9: + score += 1 + best.append((score, sz, n)) + best.sort(reverse=True) + for score, sz, n in best[:10]: + print(f" size={sz:3d} n_intervals={n} consistently-zero-cols={score}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/per_file_detail.py b/analysis_idf/per_file_detail.py new file mode 100644 index 0000000..b9040f3 --- /dev/null +++ b/analysis_idf/per_file_detail.py @@ -0,0 +1,40 @@ +"""Per-file accuracy + sample-count details.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from micromate.idf_file import read_idf_file +from analysis_idf.recon import load_sidecar_samples + + +def main(): + root = REPO / "tests/fixtures/THORDATA_example" + files = sorted([f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]) + GEO_LSB = 0.0003 + # Limit to first 15 successful files for detail. + shown = 0 + for f in files: + try: + res = read_idf_file(f) + except Exception: + continue + sc_path = f.parent / "TXT" / f"{f.name}.txt" + if not sc_path.exists(): + continue + sc = load_sidecar_samples(sc_path) + sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]] + dec = res.samples.get("Tran", []) + n = min(len(sc_tran), len(dec)) + exact = sum(1 for i in range(n) if sc_tran[i] == dec[i]) if n else 0 + pct = 100.0 * exact / n if n else 0.0 + print(f"{f.name:40s} size={f.stat().st_size:6d} sc_n={len(sc_tran):4d} dec_n={len(dec):4d} exact={pct:.1f}%") + shown += 1 + if shown >= 20: + break + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/probe_boundary.py b/analysis_idf/probe_boundary.py new file mode 100644 index 0000000..bbf2722 --- /dev/null +++ b/analysis_idf/probe_boundary.py @@ -0,0 +1,64 @@ +"""Look at what's at the divergence boundary.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from minimateplus.waveform_codec import walk_body, find_data_start, parse_segment_header +from analysis_idf.recon import TARGET, TXT, load_sidecar_samples + + +def main(): + buf = TARGET.read_bytes() + body = buf[0x0f1f:] + start = find_data_start(body) + print(f"data_start: {start} (= file offset 0x{0x0f1f + start:04x})") + + blocks = walk_body(body, start) + print(f"{len(blocks)} blocks total") + print() + + # First 25 blocks + print("=== first 30 blocks ===") + for i, b in enumerate(blocks[:30]): + body_off = 0x0f1f + b.offset + if b.tag_hi == 0x40: + hdr = parse_segment_header(b) + print(f" [{i:3d}] @0x{body_off:04x} {b.kind} (segment header) counter={hdr['counter'] if hdr else '?'} field2={hdr['field2'].hex() if hdr else '?'} anchor={hdr['anchor_bytes'].hex() if hdr else '?'} tail={hdr['tail'].hex() if hdr else '?'}") + else: + print(f" [{i:3d}] @0x{body_off:04x} {b.kind} len={b.length} data={b.data[:16].hex()}") + print() + + # Cumulative sample counts per block to find which block contains sample 254 + print("=== cumulative samples through blocks ===") + cur_ch = "Tran" + rotation = ["Vert", "Long", "MicL", "Tran"] + seg_count = 0 + samples_in_curseg = 2 # preamble Tran[0], Tran[1] + for i, b in enumerate(blocks[:30]): + if b.tag_hi == 0x40: + seg_count += 1 + prev_ch = cur_ch + cur_ch = rotation[(seg_count - 1) % 4] + print(f" [{i:3d}] 40 02 -> end of {prev_ch} segment, start {cur_ch} (segment {seg_count})") + samples_in_curseg = 2 # anchors + elif (b.tag_hi & 0xF0) == 0x10: + nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo + samples_in_curseg += nn + print(f" [{i:3d}] {b.kind} nibble: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}") + elif (b.tag_hi & 0xF0) == 0x20: + nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo + samples_in_curseg += nn + print(f" [{i:3d}] {b.kind} int8: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}") + elif b.tag_hi == 0x00: + samples_in_curseg += b.tag_lo + print(f" [{i:3d}] {b.kind} RLE: +{b.tag_lo}, ch={cur_ch}, ch_total~{samples_in_curseg}") + elif b.tag_hi == 0x30: + samples_in_curseg += b.tag_lo + print(f" [{i:3d}] {b.kind} packed12: +{b.tag_lo} samples, ch={cur_ch}, ch_total~{samples_in_curseg}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/recon.py b/analysis_idf/recon.py new file mode 100644 index 0000000..f87a060 --- /dev/null +++ b/analysis_idf/recon.py @@ -0,0 +1,89 @@ +"""Reconnaissance helpers for cracking the Thor IDFW binary.""" +from __future__ import annotations + +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW" +TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt" + + +def hex_at(buf: bytes, off: int, n: int = 32) -> str: + chunk = buf[off : off + n] + hexs = " ".join(f"{b:02x}" for b in chunk) + asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk) + return f"{off:04x}: {hexs} {asc}" + + +def find_all(buf: bytes, needle: bytes) -> list[int]: + out: list[int] = [] + i = 0 + while True: + j = buf.find(needle, i) + if j < 0: + break + out.append(j) + i = j + 1 + return out + + +def load_sidecar_samples(path: Path) -> dict[str, list[float]]: + """Parse the txt sample table — Tran/Vert/Long/MicL.""" + out = {"Tran": [], "Vert": [], "Long": [], "MicL": []} + in_block = False + for line in path.read_text(errors="replace").splitlines(): + if not in_block: + if line.strip() == "Waveform Data Channels": + in_block = True + continue + if line.startswith("Waveform Data USB Channels"): + break + parts = line.split("\t") + # First row is the header "\tTran\tVert\tLong\tMicL" + if len(parts) >= 5 and parts[1] == "Tran": + continue + if len(parts) < 5: + continue + try: + out["Tran"].append(float(parts[1])) + out["Vert"].append(float(parts[2])) + out["Long"].append(float(parts[3])) + out["MicL"].append(float(parts[4])) + except ValueError: + continue + return out + + +def main(): + buf = TARGET.read_bytes() + samples = load_sidecar_samples(TXT) + print(f"file size: {len(buf)} bytes") + print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}") + print(f"first 6 Tran samples: {samples['Tran'][:6]}") + print(f"first 6 Vert samples: {samples['Vert'][:6]}") + print(f"first 6 Long samples: {samples['Long'][:6]}") + print(f"first 6 MicL samples: {samples['MicL'][:6]}") + + print() + print("=== BW magic '00 02 00' positions ===") + hits = find_all(buf, b"\x00\x02\x00") + print(f"{len(hits)} hits") + for h in hits[:20]: + print(hex_at(buf, h, 24)) + + print() + print("=== '40 02' segment-header positions ===") + hits = find_all(buf, b"\x40\x02") + print(f"{len(hits)} hits") + for h in hits: + ctx_pre = buf[max(0, h - 4): h].hex() + ctx_post = buf[h: h + 20].hex() + # Show byte preceding to help identify real headers vs casual occurrences + print(f" 0x{h:04x} pre={ctx_pre} post={ctx_post}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/seg_resync.py b/analysis_idf/seg_resync.py new file mode 100644 index 0000000..6697cc8 --- /dev/null +++ b/analysis_idf/seg_resync.py @@ -0,0 +1,40 @@ +"""Find each segment boundary in the channel and check if errors reset there.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from minimateplus.waveform_codec import decode_waveform_v2 +from analysis_idf.recon import TARGET, TXT, load_sidecar_samples + + +def main(): + buf = TARGET.read_bytes() + sc = load_sidecar_samples(TXT) + decoded = decode_waveform_v2(buf[0x0f1f:]) + GEO_LSB = 0.0003 + + for ch in ("Tran", "Vert", "Long"): + sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]] + dec = decoded[ch] + # Find every transition where error becomes zero from nonzero (or grows from zero) + # Print indices where dec resyncs back to exact match. + n = min(len(sc_counts), len(dec)) + events = [] + prev_match = True + for i in range(n): + match = sc_counts[i] == dec[i] + if match != prev_match: + kind = "RESYNC" if match else "DIVERGE" + events.append((i, kind, sc_counts[i], dec[i])) + prev_match = match + print(f"{ch}: {len(events)} transitions") + for i, kind, sc_v, dec_v in events[:20]: + print(f" idx {i:4d} {kind:8s} sc={sc_v:6d} dec={dec_v:6d} diff={dec_v-sc_v:+d}") + print() + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/smoke_idfh.py b/analysis_idf/smoke_idfh.py new file mode 100644 index 0000000..ab1eb64 --- /dev/null +++ b/analysis_idf/smoke_idfh.py @@ -0,0 +1,46 @@ +"""Smoke-test read_idf_file on IDFH across the corpus.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from micromate.idf_file import read_idf_file + + +def main(): + target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH" + result = read_idf_file(target) + ev = result.event + print(f"=== {target.name} ===") + print(f" signature: {result.signature}") + print(f" serial: {ev.serial}") + print(f" timestamp: {ev.timestamp}") + print(f" sample_rate: {ev.sample_rate}") + print(f" kind: {ev.kind}") + print(f" intervals: {len(result.intervals or [])}") + print(f" peaks: T={ev.peaks.transverse_ips:.4f} V={ev.peaks.vertical_ips:.4f} L={ev.peaks.longitudinal_ips:.4f}") + print() + + root = REPO / "tests/fixtures/THORDATA_example" + files = list(root.rglob("*.IDFH")) + ok = fail = nyi = 0 + total_intervals = 0 + for f in files: + try: + r = read_idf_file(f) + ok += 1 + total_intervals += len(r.intervals or []) + except NotImplementedError: + nyi += 1 + except Exception as exc: + fail += 1 + if fail <= 3: + print(f" FAIL: {f.name}: {type(exc).__name__}: {exc}") + print(f"Corpus: {len(files)} IDFH files | ok={ok} fail={fail} nyi={nyi}") + print(f"Total intervals decoded: {total_intervals}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/smoke_test.py b/analysis_idf/smoke_test.py new file mode 100644 index 0000000..a0be7c6 --- /dev/null +++ b/analysis_idf/smoke_test.py @@ -0,0 +1,48 @@ +"""Smoke-test read_idf_file across the sample corpus.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from micromate.idf_file import read_idf_file, geo_count_to_ips, mic_count_to_psi + + +def main(): + target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW" + result = read_idf_file(target) + ev = result.event + print(f"=== {target.name} ===") + print(f" signature: {result.signature}") + print(f" serial: {ev.serial}") + print(f" timestamp: {ev.timestamp}") + print(f" sample_rate: {ev.sample_rate}") + print(f" record_time: {ev.record_time_sec}") + print(f" calibration: {result.binary_metadata.calibration_date}") + print(f" Tran samples: {len(result.samples['Tran'])}, peak_ips={ev.peaks.transverse_ips:.4f}") + print(f" Vert samples: {len(result.samples['Vert'])}, peak_ips={ev.peaks.vertical_ips:.4f}") + print(f" Long samples: {len(result.samples['Long'])}, peak_ips={ev.peaks.longitudinal_ips:.4f}") + print(f" MicL samples: {len(result.samples['MicL'])}") + print() + + # Corpus sweep + root = REPO / "tests/fixtures/THORDATA_example" + files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")] + ok = fail = nyi = 0 + for f in files: + try: + r = read_idf_file(f) + ok += 1 + except NotImplementedError: + nyi += 1 + except Exception as exc: + fail += 1 + if fail <= 5: + print(f" FAIL: {f.name}: {type(exc).__name__}: {exc}") + print() + print(f"Corpus: {len(files)} IDFW files | ok={ok} fail={fail} not-implemented={nyi}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/trace_path.py b/analysis_idf/trace_path.py new file mode 100644 index 0000000..fb9fb04 --- /dev/null +++ b/analysis_idf/trace_path.py @@ -0,0 +1,73 @@ +"""Trace Tran sample-by-sample to find exactly where the codec drifts.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from analysis_idf.recon import TARGET, TXT, load_sidecar_samples + + +def s4(n: int) -> int: + return n if n < 8 else n - 16 + + +def i8(b: int) -> int: + return b if b < 128 else b - 256 + + +def main(): + buf = TARGET.read_bytes() + sc = load_sidecar_samples(TXT) + GEO_LSB = 0.0003 + sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]] + + body = buf[0x0f1f:] + # Tran[0], Tran[1] from preamble + t0 = int.from_bytes(body[3:5], "big", signed=True) + t1 = int.from_bytes(body[5:7], "big", signed=True) + print(f"preamble Tran[0]={t0} Tran[1]={t1} (sidecar: {sc_tran[0]}, {sc_tran[1]})") + + # Block 0: 10 f8 at body[7:9] + print(f"block 0: tag {body[7]:02x} {body[8]:02x}") + print(f" block 0 first 10 data bytes: {body[9:19].hex()}") + + # Walk block 0 manually, comparing each sample + cur = t1 + samples = [t0, t1] + block_off = 7 + nn = body[8] + print(f" NN = {nn}") + data = body[9 : 9 + nn // 2] + for byi, byte in enumerate(data): + for nib_idx, nib in enumerate(((byte >> 4) & 0xF, byte & 0xF)): + cur += s4(nib) + samples.append(cur) + idx = len(samples) - 1 + if 0 <= idx < len(sc_tran): + sc_v = sc_tran[idx] + match = "✓" if sc_v == cur else "✗" + if idx < 12 or 240 <= idx <= 260: + print(f" idx {idx:3d}: nibble byte={byte:02x} nib={nib:x} delta={s4(nib):+d} cur={cur:+d} sc={sc_v:+d} {match}") + + print(f"end of block 0: cur={cur}, len(samples)={len(samples)}, decoder expected 250 here") + # Block 1: 20 28 starts at offset 9 + 124 = 133 from block_off=7 + block1_off = 9 + nn // 2 + print(f"block 1: tag {body[block1_off]:02x} {body[block1_off+1]:02x} (expecting 20 28)") + nn1 = body[block1_off + 1] + print(f" block 1 NN = {nn1}") + data1 = body[block1_off + 2 : block1_off + 2 + nn1] + for byi, byte in enumerate(data1): + cur += i8(byte) + samples.append(cur) + idx = len(samples) - 1 + if idx < len(sc_tran): + sc_v = sc_tran[idx] + match = "✓" if sc_v == cur else "✗" + if 248 <= idx <= 295: + print(f" idx {idx:3d}: int8 byte={byte:02x} delta={i8(byte):+d} cur={cur:+d} sc={sc_v:+d} {match}") + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/try_codec.py b/analysis_idf/try_codec.py new file mode 100644 index 0000000..e0f5269 --- /dev/null +++ b/analysis_idf/try_codec.py @@ -0,0 +1,42 @@ +"""Feed candidate body offsets to the BW codec and compare with sidecar.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from minimateplus.waveform_codec import decode_waveform_v2, walk_body, find_data_start +from analysis_idf.recon import TARGET, TXT, load_sidecar_samples + + +def main(): + buf = TARGET.read_bytes() + sc = load_sidecar_samples(TXT) + # Sidecar samples in 0.0003 counts (Thor geo LSB). + sc_tran = [int(round(v / 0.0003)) for v in sc["Tran"][:30]] + sc_vert = [int(round(v / 0.0003)) for v in sc["Vert"][:30]] + sc_long = [int(round(v / 0.0003)) for v in sc["Long"][:30]] + sc_micl = [int(round(v / 1e-6)) for v in sc["MicL"][:30]] # 1 µ unit for mic? Will iterate. + print(f"sidecar Tran (counts): {sc_tran}") + print(f"sidecar Vert (counts): {sc_vert}") + print(f"sidecar Long (counts): {sc_long}") + print(f"sidecar MicL (×1e-6): {sc_micl}") + print() + + # Try candidate body start offsets. + for off in (0x0f1f, 0x1057, 0x11f1, 0x1333, 0x1bde, 0x0d30): + print(f"=== body @ 0x{off:04x} ===") + body = buf[off:] + decoded = decode_waveform_v2(body) + if not decoded: + print(" decode_waveform_v2 returned None") + continue + for ch in ("Tran", "Vert", "Long", "MicL"): + arr = decoded.get(ch, []) + print(f" {ch}[{len(arr)}]: {arr[:20]}") + print() + + +if __name__ == "__main__": + main() diff --git a/analysis_idf/verify_full.py b/analysis_idf/verify_full.py new file mode 100644 index 0000000..ebc8b49 --- /dev/null +++ b/analysis_idf/verify_full.py @@ -0,0 +1,51 @@ +"""Verify decode_waveform_v2 against sidecar across all 2304 samples per channel.""" +from __future__ import annotations +import sys +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(REPO)) + +from minimateplus.waveform_codec import decode_waveform_v2 +from analysis_idf.recon import TARGET, TXT, load_sidecar_samples + + +def main(): + buf = TARGET.read_bytes() + sc = load_sidecar_samples(TXT) + body = buf[0x0f1f:] + decoded = decode_waveform_v2(body) + + print(f"Sidecar lengths: Tran={len(sc['Tran'])} Vert={len(sc['Vert'])} Long={len(sc['Long'])} MicL={len(sc['MicL'])}") + print(f"Decoded lengths: Tran={len(decoded['Tran'])} Vert={len(decoded['Vert'])} Long={len(decoded['Long'])} MicL={len(decoded['MicL'])}") + print() + + GEO_LSB = 0.0003 # in/s per count + for ch in ("Tran", "Vert", "Long"): + sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]] + dec = decoded[ch] + n = min(len(sc_counts), len(dec)) + matches = sum(1 for i in range(n) if sc_counts[i] == dec[i]) + first_mismatch = next((i for i in range(n) if sc_counts[i] != dec[i]), None) + print(f"{ch}: compared {n}, exact matches {matches} ({100*matches/n:.2f}%)") + if first_mismatch is not None: + i = first_mismatch + print(f" first mismatch at idx {i}: sidecar={sc_counts[i]} ({sc[ch][i]}), decoded={dec[i]}") + print(f" context sidecar[{i-2}..{i+5}]: {sc_counts[max(0,i-2):i+5]}") + print(f" context decoded[{i-2}..{i+5}]: {dec[max(0,i-2):i+5]}") + + # MicL: find the multiplicative factor that fits + print() + print("=== MicL scale analysis ===") + sc_micl = sc["MicL"] + dec_micl = decoded["MicL"] + # Skip zero values when computing ratio + ratios = [sc_micl[i] / dec_micl[i] for i in range(min(50, len(sc_micl), len(dec_micl))) if dec_micl[i] != 0] + if ratios: + avg = sum(ratios) / len(ratios) + print(f" avg ratio sidecar/decoded over first 50 nonzero: {avg:.4e} (n={len(ratios)})") + print(f" ratios sample: {[f'{r:.4e}' for r in ratios[:6]]}") + + +if __name__ == "__main__": + main() diff --git a/docs/idf_protocol_reference.md b/docs/idf_protocol_reference.md index 643de53..aef3c69 100644 --- a/docs/idf_protocol_reference.md +++ b/docs/idf_protocol_reference.md @@ -6,11 +6,68 @@ Series IV event-file format. Sibling to Series III "Rosetta Stone") — this doc holds what we know so far and the open questions still to crack. -**Status (2026-05-20):** ASCII text sidecar fully decoded (1,014 -sample files round-trip). Binary `.IDFH` / `.IDFW` codec -**not yet implemented** — binaries are stored opaquely by -`WaveformStore.save_imported_idf`, with metadata sourced from the -paired `.txt` sidecar. +**Status (2026-05-28):** ASCII text sidecar fully decoded (1,014 +sample files round-trip). **Thor IDFW** binary now decodes via +`micromate.idf_file.read_idf_file()` — reuses the BW segment-rotated +block codec verbatim at fixed body offset `0x0f1f`; metadata (serial, +timestamp, sample_rate, record_time, calibration_date) extracted from +the binary header. Sample fidelity is 87–99% byte-exact on quiet +events; loud events hit the BW codec's known walker-stops-early +limitation. Residual ~3% drift on per-sample deltas (likely a +Thor-specific 12-bit delta refinement not yet modelled). + +**Thor IDFH histograms also decoded.** Body has one or more segments; +each 12-byte segment header `[length_be 2B][0a 00 00 00][00 NN][05 3f]` +introduces `N = (length - 10) // 72` interval records of 72 bytes +each. Each interval = 4 × 16-byte per-channel records: +`[int16 min][int16 max][int16 ??][uint16 halfp][2B 00][uint16 ??][2B 00][uint16 ??]`. +Geo peak `= max(|min|, |max|) / 32768 × 10` in/s (matches sidecar +~1.8%); freq `= 512 / halfp` Hz (None for halfp ≤ 5 → ">100" +sentinel). Corpus: **all 859 Thor IDFH files decode, 181,071 +intervals**. Wired through `read_idf_file()` → +`save_imported_idf()` → sidecar's `extensions.idf_intervals`. + +**Note on the BE9439 outliers in the example corpus:** Two files +(`BE9439_20200713131747.IDFW` and `BE9439_20200713124251.IDFH`) are +**Series III Blastware** binaries, not Thor. Provenance: TMI tried +to use Thor to manage auto-call-homes for Series III units; the +experiment didn't work out, but it did leave a few BW event files +in Thor's per-serial directory structure with `.IDFW`/`.IDFH` +extensions — Thor's forwarder applied its own naming convention to +the BW bodies it was relaying. Their header `10 00 01 80 00 00 +Instantel STRT ff fe ` is the BW SUB 5A STRT +record, not a Thor body preamble. The reader detects them by +signature and raises `NotImplementedError` pointing callers at +`read_blastware_file()`, which extracts BW-format peaks from them. + +**Still NYI for Thor IDFH:** per-channel `int16 field4` (possibly +time-of-peak); the two uint16 fields (probably PVS contributions); +8-byte interval tail (PVS data); mic dB(L) exact conversion constant. + +### Codec breakthroughs (2026-05-28) + +- **Body offset is a fixed `0x0f1f`** across 151/154 corpus IDFW + files. Preceded by a 4-byte record-type marker (`46 00 00 00`) + + magic preamble `00 02 00 [Tran[0] BE] [Tran[1] BE]`. +- **Sample stream is BW's segment-rotated block codec verbatim.** + Thor reuses `10 NN` (nibble), `20 NN` (int8), `00 NN` (RLE), + `30 NN` (packed12), `40 02` (segment header) tags with the same + semantics. Channel rotation Tran→Vert→Long→MicL. +- **Geo LSB = 0.0003 in/s** (not BW's 0.005), because Thor's 16-bit + ADC range maps to 10 in/s without the 16-count BW quantization step. +- **Mic ≈ 2.14×10⁻⁶ psi/count** (rough scale; refine after channel + block calibration constants are decoded). +- **BW compliance anchor `\xbe\x80\x00\x00\x00\x00` reappears at + IDFW offset 0x952** — sample_rate at anchor−6 (uint16 BE), + record_time at anchor+6 (float32 BE), same layout as BW. +- **Event timestamp at offset 0x97A** — 8 bytes `[day][month] + [year_be][unk][hour][min][sec]`. Stop-time mirrors at 0x982. +- **Serial as null-terminated ASCII at 0x14E**. +- **Calibration date** at 0x194–0x197 (day, month, year_be). +- Per-sample residual drift of ~3% suggests Thor encodes int8/nibble + deltas with an extra refinement bit that BW doesn't carry — + unsolved; errors resync within a few samples so cumulative impact + is small. --- diff --git a/micromate/idf_file.py b/micromate/idf_file.py index b3cd669..bee7555 100644 --- a/micromate/idf_file.py +++ b/micromate/idf_file.py @@ -1,64 +1,450 @@ """ -micromate/idf_file.py — placeholder for the Thor IDF binary codec. +micromate/idf_file.py — Thor IDF binary codec. -Thor's ``.IDFH`` (histogram) and ``.IDFW`` (waveform) event files are an -Instantel proprietary binary format that has not yet been reverse- -engineered. Today seismo-relay treats them as opaque blobs: -``WaveformStore.save_imported_idf`` stores the bytes verbatim and reads -all device-authoritative metadata from the paired ``.IDFW.txt`` / -``.IDFH.txt`` ASCII sidecar (parsed by ``idf_ascii_report.py``). +Decodes the Instantel Micromate Series IV ``.IDFW`` (waveform) and +``.IDFH`` (histogram) binary on-disk format. Sister module to +``minimateplus/event_file_io.py``. -When we crack the binary codec — same reverse-engineering playbook we -used to byte-perfect-parse Series III BW files (see -``docs/instantel_protocol_reference.md`` and ``minimateplus/event_file_io.py``) -— this module will grow: +Status (2026-05-28): - - ``read_idf_file(path) -> IdfEvent`` - Parse a ``.IDFW``/``.IDFH`` binary and return a fully populated - ``IdfEvent`` whose waveform-sample arrays come from the binary - (the .txt sidecar's tabular sample block being a best-effort - check). Lets us ingest Thor events even when the operator - hasn't enabled the .txt exporter — closing the - ``had_report=False`` gap that the thor-watcher forwarder - currently tolerates as a known limitation. +- **Genuine Series IV / Thor binaries** are all signed + ``00 12 01 00 00 00 Instantel\\0`` (sig-A in earlier notes). Two + Series III (Blastware) binaries appear in the example corpus + (``BE9439_*``) — they share the ``.IDFW``/``.IDFH`` extension by + filing convention but carry a BW STRT header (``10 00 01 80 00 00 + Instantel STRT...``) and are NOT Thor data. The reader detects + them by signature and raises NotImplementedError pointing callers + at ``minimateplus.event_file_io.read_blastware_file()``. +- **IDFW waveform body** reuses the BW segment-rotated block codec + verbatim. Body always starts at file offset ``0x0f1f``. Samples + decoded via ``minimateplus.waveform_codec.decode_waveform_v2`` + with 87–99% byte-exact match against ``.IDFW.txt`` sidecar (quiet + events). Loud events hit the BW codec's known walker-stops-early + limit. Residual ~3% drift on per-sample deltas — likely a + Thor-specific 12-bit delta refinement that BW's codec doesn't + model. Geo LSB = 0.0003 in/s; mic factor ~2.14e-6 psi/count. +- **IDFH histogram body**: 12-byte segment header + ``[len_be 2B] 0a 00 00 00 [00 NN_counter] 05 3f`` introduces a + segment of ``N`` 72-byte interval records (``N = (len - 10) // 72``). + Each record holds 4 × 16-byte per-channel min/max/halfp + 8-byte + tail. Geo peaks via ``max(|min|, |max|) / 32768 × 10`` in/s + (matches sidecar within ~1.8%), freq via ``512 / halfp`` Hz. + **All 859 Thor IDFH files in the corpus decode (181,071 intervals).** +- Binary metadata directly extracted: serial, timestamp, sample_rate, + record_time, calibration_date. Other fields fall back to the paired + ``.IDFW.txt`` / ``.IDFH.txt`` sidecar (consumed by + ``WaveformStore.save_imported_idf``). - - ``write_idf_file(path, event)`` (eventually) - Round-trip event reconstruction, used for verifying the codec - against captured device files the way ``write_blastware_file`` - verifies the Series III codec. - - - Helpers for decoding the binary's per-channel sample arrays into - physical units, the per-event flash buffer's monitor-log records, - etc. - -The reverse-engineering path: pair every ``.IDFW`` binary in -``thor-watcher/example-data/`` with its sibling ``.IDFW.txt``, treating -the txt's "Waveform Data Channels" block as ground-truth, and align -the binary's per-channel int16-or-similar arrays against it. Header -fields (sample rate, channel count, record time, timestamps) sit before -the sample block — same approach as the BW codec where ASCII strings -inside the binary (``Project:``, ``Client:``, etc.) anchored field -discovery. +The full reverse-engineering writeup lives in +``docs/idf_protocol_reference.md``. """ from __future__ import annotations +import datetime +import struct +from dataclasses import dataclass from pathlib import Path -from typing import Union +from typing import Optional, Union -from .models import IdfEvent +from minimateplus.waveform_codec import decode_waveform_v2 + +from .models import IdfEvent, IdfPeaks, IdfReport -def read_idf_file(path: Union[str, Path]) -> "IdfEvent": - """Parse a Thor ``.IDFW``/``.IDFH`` binary into an ``IdfEvent``. +# Genuine Series IV / Thor IDF binary signature: 6 bytes, then ASCII "Instantel". +_THOR_PREFIX = b"\x00\x12\x01\x00\x00\x00" +# Stray Series III (Blastware) binaries that occasionally turn up in Thor +# corpus directories renamed to the .IDFW/.IDFH convention. Their header +# (`10 00 01 80 00 00 Instantel STRT ...`) is byte-for-byte a BW SUB 5A +# STRT record, not a Thor binary. Detected so we can refuse-and-route +# rather than mis-parse. +_BW_STRAY_PREFIX = b"\x10\x00\x01\x80\x00\x00" +_INSTANTEL_TAG = b"Instantel" - Not yet implemented. When implemented, this will be the canonical - entry point for reading Thor binaries — the ASCII sidecar parser - becomes an optional fast-path metadata supplement rather than the - sole source of device-authoritative data. +# Constant body offset for sig-A IDFW files (verified across 151/154 corpus +# files in tests/fixtures/THORDATA_example). The body is the segment-rotated +# block stream consumed by decode_waveform_v2; bytes [0:3] are the magic +# ``00 02 00`` preamble. +_BODY_START_SIG_A = 0x0F1F + +# Geophone count → in/s, derived from sidecar ground truth: the smallest +# non-zero sample in 1,014-file corpus is 0.0003 in/s. +_GEO_LSB_IPS = 0.0003 + +# Microphone count → psi, derived from sidecar regression on 50 sample +# pairs from UM11719_20231219162723.IDFW (mic-heavy event). +_MIC_LSB_PSI = 2.14e-6 + +# IDFH histogram constants. +_IDFH_INTERVAL_SIZE = 72 # bytes per per-interval record +_IDFH_SEGMENT_HEADER = 10 # bytes: [len_be 2B][0a 00 00 00 4B][00 NN 2B][05 3f 2B] +_IDFH_SEGMENT_TAIL = 2 # bytes after the interval data block, before next marker +_IDFH_HALFP_FREQ_NUM = 512.0 # freq_hz = NUM / halfp; halfp ≤ 5 means ">100 Hz" sentinel +_IDFH_GEO_FULL_SCALE = 10.0 # in/s — Normal range +_IDFH_INT16_FS = 32768.0 +_IDFH_CHANNELS = ("Tran", "Vert", "Long", "MicL") + + +# ─── Binary metadata extraction ───────────────────────────────────────────── + + +@dataclass +class IdfBinaryMetadata: + """Fields recoverable from the sig-A binary header (no .txt needed).""" + serial: Optional[str] = None + event_datetime: Optional[datetime.datetime] = None + sample_rate: Optional[int] = None + record_time_sec: Optional[float] = None + calibration_date: Optional[datetime.date] = None + + +def _read_ascii_z(buf: bytes, off: int, maxlen: int = 64) -> Optional[str]: + if off >= len(buf): + return None + end = buf.find(b"\x00", off, off + maxlen) + if end < 0: + end = min(off + maxlen, len(buf)) + s = buf[off:end].decode("ascii", errors="replace").strip() + return s or None + + +def _decode_8byte_timestamp(buf: bytes, off: int) -> Optional[datetime.datetime]: + """Layout: ``[day][month][year_hi][year_lo][unknown][hour][min][sec]``.""" + if off + 8 > len(buf): + return None + day, mon, yh, yl, _unk, hr, mn, sc = buf[off : off + 8] + year = (yh << 8) | yl + if not (2015 <= year <= 2050 and 1 <= mon <= 12 and 1 <= day <= 31 + and 0 <= hr < 24 and 0 <= mn < 60 and 0 <= sc < 60): + return None + try: + return datetime.datetime(year, mon, day, hr, mn, sc) + except ValueError: + return None + + +def extract_binary_metadata(buf: bytes) -> IdfBinaryMetadata: + """Pull serial/timestamp/sample_rate/record_time/calibration from the + sig-A binary header. + + Field positions confirmed against UM11719_20231219162723.IDFW; stable + across the 151-file sig-A corpus. """ - raise NotImplementedError( - "IDF binary codec not yet implemented; the .IDFW/.IDFH binary format " - "is undecoded. Use parse_idf_report() on the paired .txt sidecar " - "for device-authoritative metadata." + md = IdfBinaryMetadata() + + # Serial: null-terminated ASCII at 0x14E. + md.serial = _read_ascii_z(buf, 0x14E, maxlen=16) + + # Sample rate + record time live in a BW-compatible compliance block. + # Locate the 6-byte anchor `be 80 00 00 00 00` and read offsets relative + # to it: anchor-6 = sample_rate uint16 BE; anchor+6 = record_time float32 BE. + anchor = buf.find(b"\xbe\x80\x00\x00\x00\x00", 0x800, 0xA00) + if anchor > 0: + sr_bytes = buf[anchor - 6 : anchor - 4] + if len(sr_bytes) == 2: + sr = int.from_bytes(sr_bytes, "big") + if sr in (256, 512, 1024, 2048, 4096): + md.sample_rate = sr + rt_bytes = buf[anchor + 6 : anchor + 10] + if len(rt_bytes) == 4: + try: + rt = struct.unpack(">f", rt_bytes)[0] + if 0.1 <= rt <= 600.0: + md.record_time_sec = float(rt) + except struct.error: + pass + + # Event timestamp: 8 bytes. Position differs between IDFW (0x97A) and + # IDFH (0x9F8); scan a small range and accept the first valid decode. + for off in (0x97A, 0x9F8): + ts = _decode_8byte_timestamp(buf, off) + if ts is not None: + md.event_datetime = ts + break + + # Calibration date: day, month, year_be at 0x194-0x197. + if len(buf) > 0x197: + day, mon = buf[0x194], buf[0x195] + year = int.from_bytes(buf[0x196 : 0x198], "big") + if 1 <= mon <= 12 and 1 <= day <= 31 and 2015 <= year <= 2050: + try: + md.calibration_date = datetime.date(year, mon, day) + except ValueError: + pass + + return md + + +# ─── Sample decoder + unit conversion ─────────────────────────────────────── + + +def _decode_waveform_samples(buf: bytes) -> Optional[dict]: + """Decode samples from the sig-A body starting at file offset 0x0f1f. + + Returns the raw decoder counts dict — geo LSB = 0.0003 in/s, mic in + its own count unit (see :func:`mic_count_to_psi`). Returns None if + decoding fails. + """ + if len(buf) < _BODY_START_SIG_A + 8: + return None + body = buf[_BODY_START_SIG_A:] + return decode_waveform_v2(body) + + +def geo_count_to_ips(count: int) -> float: + """Convert a Thor geo decoder count to in/s. LSB = 0.0003 in/s.""" + return count * _GEO_LSB_IPS + + +def mic_count_to_psi(count: int) -> float: + """Convert a Thor mic decoder count to psi. Scale derived from + regression over 50 sample pairs in UM11719_20231219162723.IDFW; + consistent to ~5%. Calibration constants from the channel block + can refine this once decoded. + """ + return count * _MIC_LSB_PSI + + +# ─── IDFH histogram decoder ───────────────────────────────────────────────── + + +@dataclass +class IdfhInterval: + """One decoded histogram interval (typically one minute of monitoring).""" + offset: int # file byte offset of the 72-byte record + # Per-channel min/max ADC counts (int16 BE), half-period samples, peak count. + # Peak = max(|min|, |max|). freq_hz = 512/halfp (None if halfp ≤ 5 → + # ">100 Hz" sentinel; matches sidecar convention). + tran_min: int + tran_max: int + tran_halfp: int + vert_min: int + vert_max: int + vert_halfp: int + long_min: int + long_max: int + long_halfp: int + micl_min: int + micl_max: int + micl_halfp: int + + def peak_count(self, channel: str) -> int: + mn = getattr(self, f"{channel.lower()}_min") + mx = getattr(self, f"{channel.lower()}_max") + return max(abs(mn), abs(mx)) + + def peak_ips(self, channel: str) -> float: + """Convert peak count to in/s (geo channels only).""" + return self.peak_count(channel) / _IDFH_INT16_FS * _IDFH_GEO_FULL_SCALE + + def freq_hz(self, channel: str) -> Optional[float]: + halfp = getattr(self, f"{channel.lower()}_halfp") + if halfp <= 5: + return None + return _IDFH_HALFP_FREQ_NUM / halfp + + +def _decode_idfh_interval(buf72: bytes, offset: int) -> IdfhInterval: + """Decode one 72-byte interval record into per-channel min/max/halfp.""" + import struct + fields = [] + for i in range(4): + block = buf72[i * 16 : (i + 1) * 16] + mn = struct.unpack_from(">h", block, 0)[0] + mx = struct.unpack_from(">h", block, 2)[0] + # block[4:6] = int16 BE, role unknown (possibly time-of-peak) + halfp = struct.unpack_from(">H", block, 6)[0] + # block[10:12] and block[14:16] are uint16 BE with unknown semantics + # (likely sum / count contributions for the PVS computation). + fields.extend([mn, mx, halfp]) + # Tail 8 bytes (buf72[64:72]) carry PVS-related data; not yet decoded. + return IdfhInterval( + offset=offset, + tran_min=fields[0], tran_max=fields[1], tran_halfp=fields[2], + vert_min=fields[3], vert_max=fields[4], vert_halfp=fields[5], + long_min=fields[6], long_max=fields[7], long_halfp=fields[8], + micl_min=fields[9], micl_max=fields[10], micl_halfp=fields[11], + ) + + +def decode_idfh_body(buf: bytes) -> list: + """Walk an IDFH file and decode every interval record. + + The body has one or more segments; each segment header is 12 bytes: + ``[length_be 2B][0a 00 00 00][00 NN_counter][05 3f]`` where ``length`` + is bytes from the magic through the end of the interval block + (= 10 + 72 × n_intervals). Segments are separated by a 2-byte tail + + next-segment 2-byte prefix (the bytes before the next length field). + Confirmed against the 859-file corpus (181,071 intervals decoded; 1 + failure is the sig-B BE9439 file). + """ + intervals: list = [] + i = 0 + while True: + j = buf.find(b"\x0a\x00\x00\x00", i) + if j < 0 or j < 2: + break + # Validate: [length_be][0a 00 00 00][00 NN][05 3f] + if buf[j + 4] != 0x00 or buf[j + 6 : j + 8] != b"\x05\x3f": + i = j + 1 + continue + length = int.from_bytes(buf[j - 2 : j], "big") + n = (length - _IDFH_SEGMENT_HEADER) // _IDFH_INTERVAL_SIZE + if n <= 0: + i = j + 1 + continue + header_start = j - 2 + interval_start = header_start + _IDFH_SEGMENT_HEADER + for k in range(n): + off = interval_start + k * _IDFH_INTERVAL_SIZE + if off + _IDFH_INTERVAL_SIZE > len(buf): + break + chunk = buf[off : off + _IDFH_INTERVAL_SIZE] + intervals.append(_decode_idfh_interval(chunk, off)) + # Advance past this segment + the 2-byte tail. + i = header_start + length + _IDFH_SEGMENT_TAIL + return intervals + + +# ─── Top-level reader ─────────────────────────────────────────────────────── + + +@dataclass +class IdfReadResult: + """Return type for :func:`read_idf_file`. + + For waveforms (``.IDFW``), ``samples`` holds the per-channel sample + arrays in Thor decoder counts. For histograms (``.IDFH``), + ``samples`` is empty and ``intervals`` holds the per-interval + record list (peaks, freqs). + """ + event: IdfEvent + samples: dict # {"Tran": [...], ...} for IDFW; empty for IDFH + binary_metadata: IdfBinaryMetadata + signature: str # always "thor" for now (sig-A genuine Thor) + intervals: Optional[list] = None # list[IdfhInterval] for IDFH; None for IDFW + + +def read_idf_file(path: Union[str, Path]) -> IdfReadResult: + """Parse a Thor ``.IDFW`` binary into an ``IdfEvent`` + decoded samples. + + Currently implements signature-A waveforms only. Signature-B + (old-firmware) and ``.IDFH`` histograms raise NotImplementedError; + use the paired ``.IDFW.txt`` / ``.IDFH.txt`` sidecar for those via + ``parse_idf_report()``. + + Returns an :class:`IdfReadResult`. The caller converts int sample + counts to physical units via :func:`geo_count_to_ips` / + :func:`mic_count_to_psi`. + """ + p = Path(path) + buf = p.read_bytes() + + if len(buf) < 16 or buf[6:16] != _INSTANTEL_TAG + b"\x00": + raise ValueError(f"{p.name}: not an IDF file (missing Instantel magic)") + + sig_prefix = buf[:6] + if sig_prefix == _THOR_PREFIX: + signature = "thor" + elif sig_prefix == _BW_STRAY_PREFIX: + raise NotImplementedError( + f"{p.name}: file has a Series III (Blastware) STRT header in " + "an IDF-named container — not a Thor binary. Route through " + "minimateplus.event_file_io.read_blastware_file() instead " + "(peaks decode; samples & full metadata don't, but it's not " + "Thor data so the Thor codec doesn't apply)." + ) + else: + raise ValueError(f"{p.name}: unknown IDF signature {sig_prefix.hex()}") + + is_histogram = p.suffix.upper() == ".IDFH" + md = extract_binary_metadata(buf) + + if is_histogram: + intervals = decode_idfh_body(buf) + if not intervals: + raise ValueError(f"{p.name}: IDFH body decoded no intervals") + # Peaks: max across all intervals on each channel (per-channel max + # of stored max-magnitudes; sidecar's PPV row carries the same). + peak_tran = max((iv.peak_ips("Tran") for iv in intervals), default=0.0) + peak_vert = max((iv.peak_ips("Vert") for iv in intervals), default=0.0) + peak_long = max((iv.peak_ips("Long") for iv in intervals), default=0.0) + rep = IdfReport( + serial_number=md.serial, + event_type="Full Histogram", + event_datetime=md.event_datetime, + filename=p.name, + sample_rate=md.sample_rate, + record_time_sec=md.record_time_sec, + ) + peaks = IdfPeaks( + transverse_ips=peak_tran, + vertical_ips=peak_vert, + longitudinal_ips=peak_long, + peak_vector_sum_ips=None, + mic_pspl_dbl=None, + ) + event = IdfEvent( + serial=md.serial or "UNKNOWN", + timestamp=md.event_datetime or datetime.datetime(1970, 1, 1), + kind="Histogram", + filename=p.name, + sample_rate=md.sample_rate, + record_time_sec=md.record_time_sec, + peaks=peaks, + report=rep, + ) + return IdfReadResult( + event=event, + samples={}, + binary_metadata=md, + signature=signature, + intervals=intervals, + ) + + # Waveform path. + decoded = _decode_waveform_samples(buf) + if decoded is None: + raise ValueError(f"{p.name}: waveform body codec failed") + + rep = IdfReport( + serial_number=md.serial, + event_type="Full Waveform", + event_datetime=md.event_datetime, + filename=p.name, + sample_rate=md.sample_rate, + record_time_sec=md.record_time_sec, + ) + + def _peak_ips(ch: str) -> float: + arr = decoded.get(ch, []) + return geo_count_to_ips(max((abs(v) for v in arr), default=0)) + + peaks = IdfPeaks( + transverse_ips=_peak_ips("Tran"), + vertical_ips=_peak_ips("Vert"), + longitudinal_ips=_peak_ips("Long"), + # PVS requires aligned per-sample √(T²+V²+L²); leave None — the + # sidecar carries it and the bridge picks it up if present. + peak_vector_sum_ips=None, + mic_pspl_dbl=None, + ) + + event = IdfEvent( + serial=md.serial or "UNKNOWN", + timestamp=md.event_datetime or datetime.datetime(1970, 1, 1), + kind="Waveform", + filename=p.name, + sample_rate=md.sample_rate, + record_time_sec=md.record_time_sec, + peaks=peaks, + report=rep, + ) + + return IdfReadResult( + event=event, + samples=decoded, + binary_metadata=md, + signature=signature, ) diff --git a/sfm/waveform_store.py b/sfm/waveform_store.py index d982dce..031a9c0 100644 --- a/sfm/waveform_store.py +++ b/sfm/waveform_store.py @@ -467,21 +467,21 @@ class WaveformStore: Ingest a Thor (Micromate Series IV) IDF event file (`.IDFW` or `.IDFH`) produced by Thor's TXT exporter. - Thor binaries are stored as opaque bytes — seismo-relay doesn't - yet decode the proprietary IDF binary format (codec slot lives - at ``micromate/idf_file.py``). Device-authoritative metadata - comes from the paired ``.IDFW.txt`` / ``.IDFH.txt`` sidecar - when supplied. - Workflow: - 1. Parse the paired TXT report (when supplied) via - ``micromate.parse_idf_report`` → dict. - 2. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``. - 3. Copy bytes verbatim into ``//``. - 4. Bridge IdfEvent → ``minimateplus.Event`` (for the existing - sidecar / DB insert machinery) via - ``IdfEvent.to_minimateplus_event(waveform_key)``. - 5. Write the ``.sfm.json`` sidecar with + 1. For sig-A `.IDFW` binaries, decode samples + binary metadata + via ``micromate.idf_file.read_idf_file()``. Failure or + non-IDFW path falls through to the .txt-only flow. + 2. Parse the paired TXT report (when supplied) via + ``micromate.parse_idf_report`` → dict. TXT remains the + source of truth for fields the binary doesn't yet supply + (full peak set with ZC freq / Time of Peak, sensor self-check, + firmware string, project strings). + 3. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``. + 4. Copy bytes verbatim into ``//``. + 5. Bridge IdfEvent → ``minimateplus.Event`` and attach + ``raw_samples`` from the binary decoder (when available). + 6. Write the `.h5` clean-waveform file when samples decoded. + 7. Write the ``.sfm.json`` sidecar with ``source.kind = "idf-import"`` and the full raw IDF report under ``extensions.idf_report``. @@ -490,7 +490,33 @@ class WaveformStore: """ from micromate import IdfEvent, parse_idf_report - # Parse the .txt sidecar (best-effort; non-fatal on failure). + # 1. Binary decode (sig-A IDFW and IDFH). Non-fatal: any failure + # leaves samples / binary metadata unfilled and we proceed with + # the .txt path as before. + idf_samples: Optional[dict] = None + idf_intervals: Optional[list] = None + binary_md = None + binary_peaks = None + is_histogram = False + try: + from micromate.idf_file import read_idf_file + res = read_idf_file(source_path) + idf_samples = res.samples or None + idf_intervals = res.intervals + is_histogram = res.intervals is not None + binary_md = res.binary_metadata + binary_peaks = res.event.peaks + except NotImplementedError: + # sig-B — codec doesn't handle this yet. + pass + except Exception as exc: + log.warning( + "save_imported_idf: binary codec failed for %s: %s — " + "falling back to .txt-only ingest", + source_path.name, exc, + ) + + # 2. Parse the .txt sidecar (best-effort; non-fatal on failure). report_dict: dict = {} if idf_report_text is not None: try: @@ -501,7 +527,38 @@ class WaveformStore: exc, ) - # Build the typed IdfEvent. Filename is authoritative for + # 3. Backfill report_dict with binary metadata for fields the + # .txt didn't supply. Binary takes precedence on tied fields + # where the binary is more reliable (timestamp, sample_rate), + # and fills in fields entirely missing from the .txt. + if binary_md is not None: + if binary_md.serial and not report_dict.get("serial_number"): + report_dict["serial_number"] = binary_md.serial + if binary_md.event_datetime and not report_dict.get("event_datetime"): + report_dict["event_datetime"] = binary_md.event_datetime + if binary_md.sample_rate and not report_dict.get("sample_rate"): + report_dict["sample_rate"] = binary_md.sample_rate + if binary_md.record_time_sec and not report_dict.get("record_time_sec"): + report_dict["record_time_sec"] = binary_md.record_time_sec + # Calibration date (binary) vs calibration text (.txt) cohabit + # under different keys; no overwrite needed. + if binary_md.event_datetime and not report_dict.get("event_type"): + report_dict["event_type"] = ( + "Full Histogram" if is_histogram else "Full Waveform" + ) + + # Binary-derived peaks fill in when the .txt didn't supply them. + # They're ~3% low vs the device-authoritative .txt values (residual + # codec drift), so .txt always wins when present. + if binary_peaks is not None: + if binary_peaks.transverse_ips and not report_dict.get("tran_ppv"): + report_dict["tran_ppv"] = binary_peaks.transverse_ips + if binary_peaks.vertical_ips and not report_dict.get("vert_ppv"): + report_dict["vert_ppv"] = binary_peaks.vertical_ips + if binary_peaks.longitudinal_ips and not report_dict.get("long_ppv"): + report_dict["long_ppv"] = binary_peaks.longitudinal_ips + + # 4. Build the typed IdfEvent. Filename is authoritative for # (serial, timestamp, kind); the report's event_datetime takes # precedence over the filename timestamp inside from_report(). idf_event = IdfEvent.from_report(report_dict, source_path.name) @@ -511,7 +568,7 @@ class WaveformStore: # serial that overrides a misnamed export). serial = serial_hint or idf_event.serial or "UNKNOWN" - # Filesystem write. + # 5. Filesystem write of binary bytes. filename = source_path.name bw_path = self._serial_dir(serial) / filename bw_path.write_bytes(idf_bytes) @@ -523,13 +580,41 @@ class WaveformStore: # surrogate — every distinct binary maps to a distinct row. waveform_key = bytes.fromhex(sha256)[:16] - # Bridge to minimateplus.Event for the existing sidecar / DB + # 6. Bridge to minimateplus.Event for the existing sidecar / DB # insert paths. See IdfEvent.to_minimateplus_event() for the # caveats of this bridge (mic units, missing fields → sidecar). ev = idf_event.to_minimateplus_event(waveform_key) - # Write the sidecar. Source kind "idf-import" was added to the - # allow-list in event_file_io.event_to_sidecar_dict for this. + # Attach the decoded sample arrays. Thor's decoder counts use + # LSB = 0.0003 in/s for geo (vs BW's 16-count units at 0.005 in/s) + # — the .h5 writer's geo_range="normal" yields LSB = 10/32768 + # ≈ 0.000305 in/s, so plotted samples come out ~1.7% high. + # Acceptable known offset; refine with a Thor-aware h5 path later. + if idf_samples is not None: + ev.raw_samples = idf_samples + n_samples = max((len(idf_samples.get(ch, [])) for ch in ("Tran", "Vert", "Long", "MicL")), default=0) + ev.total_samples = ev.total_samples or n_samples + + # 7. Write the .h5 clean-waveform file when we actually have samples. + # Histograms (IDFH) don't have waveform samples — skip h5 for those. + hdf5_filename: Optional[str] = None + if idf_samples is not None and not is_histogram: + hdf5_path = self.hdf5_path_for(serial, filename) + try: + event_hdf5.write_event_hdf5( + hdf5_path, ev, + serial=serial, + geo_range="normal", # Thor's geo full scale is also 10 in/s (Normal) + source_kind="idf-import", + ) + hdf5_filename = hdf5_path.name + except Exception as exc: + log.warning( + "save_imported_idf: HDF5 write failed for %s: %s — continuing without .h5", + hdf5_path, exc, + ) + + # 8. Write the sidecar. Source kind "idf-import" is on the allow-list. sidecar_path = self.sidecar_path_for(serial, filename) existing_review = None if sidecar_path.exists(): @@ -554,19 +639,46 @@ class WaveformStore: # Time of Peak, sensor self-check, calibration, firmware). if report_dict: sidecar["extensions"]["idf_report"] = report_dict + # For histograms, also stash the binary-decoded per-interval + # records so the UI / report layer doesn't need to re-walk the + # IDFH file at render time. + if idf_intervals is not None: + sidecar["extensions"]["idf_intervals"] = [ + { + "offset": iv.offset, + "tran_peak": iv.peak_count("Tran"), + "tran_halfp": iv.tran_halfp, + "tran_freq": iv.freq_hz("Tran"), + "vert_peak": iv.peak_count("Vert"), + "vert_halfp": iv.vert_halfp, + "vert_freq": iv.freq_hz("Vert"), + "long_peak": iv.peak_count("Long"), + "long_halfp": iv.long_halfp, + "long_freq": iv.freq_hz("Long"), + "mic_peak": iv.peak_count("MicL"), + "mic_halfp": iv.micl_halfp, + "mic_freq": iv.freq_hz("MicL"), + } + for iv in idf_intervals + ] event_file_io.write_sidecar(sidecar_path, sidecar) log.info( "WaveformStore.save_imported_idf serial=%s filename=%s filesize=%d " - "report_attached=%s", - serial, filename, filesize, bool(report_dict), + "kind=%s report_attached=%s binary_decoded=%s h5=%s intervals=%d", + serial, filename, filesize, + "histogram" if is_histogram else "waveform", + bool(report_dict), + (idf_samples is not None) or (idf_intervals is not None), + hdf5_filename or "(skipped)", + len(idf_intervals) if idf_intervals else 0, ) return ev, { "filename": filename, "filesize": filesize, "sha256": sha256, "a5_pickle_filename": None, - "hdf5_filename": None, + "hdf5_filename": hdf5_filename, "sidecar_filename": sidecar_path.name, "serial": serial, }