"""Reconnaissance helpers for cracking the Thor IDFW binary.""" from __future__ import annotations import sys from pathlib import Path REPO = Path(__file__).resolve().parents[1] sys.path.insert(0, str(REPO)) TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW" TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt" def hex_at(buf: bytes, off: int, n: int = 32) -> str: chunk = buf[off : off + n] hexs = " ".join(f"{b:02x}" for b in chunk) asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk) return f"{off:04x}: {hexs} {asc}" def find_all(buf: bytes, needle: bytes) -> list[int]: out: list[int] = [] i = 0 while True: j = buf.find(needle, i) if j < 0: break out.append(j) i = j + 1 return out def load_sidecar_samples(path: Path) -> dict[str, list[float]]: """Parse the txt sample table — Tran/Vert/Long/MicL.""" out = {"Tran": [], "Vert": [], "Long": [], "MicL": []} in_block = False for line in path.read_text(errors="replace").splitlines(): if not in_block: if line.strip() == "Waveform Data Channels": in_block = True continue if line.startswith("Waveform Data USB Channels"): break parts = line.split("\t") # First row is the header "\tTran\tVert\tLong\tMicL" if len(parts) >= 5 and parts[1] == "Tran": continue if len(parts) < 5: continue try: out["Tran"].append(float(parts[1])) out["Vert"].append(float(parts[2])) out["Long"].append(float(parts[3])) out["MicL"].append(float(parts[4])) except ValueError: continue return out def main(): buf = TARGET.read_bytes() samples = load_sidecar_samples(TXT) print(f"file size: {len(buf)} bytes") print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}") print(f"first 6 Tran samples: {samples['Tran'][:6]}") print(f"first 6 Vert samples: {samples['Vert'][:6]}") print(f"first 6 Long samples: {samples['Long'][:6]}") print(f"first 6 MicL samples: {samples['MicL'][:6]}") print() print("=== BW magic '00 02 00' positions ===") hits = find_all(buf, b"\x00\x02\x00") print(f"{len(hits)} hits") for h in hits[:20]: print(hex_at(buf, h, 24)) print() print("=== '40 02' segment-header positions ===") hits = find_all(buf, b"\x40\x02") print(f"{len(hits)} hits") for h in hits: ctx_pre = buf[max(0, h - 4): h].hex() ctx_post = buf[h: h + 20].hex() # Show byte preceding to help identify real headers vs casual occurrences print(f" 0x{h:04x} pre={ctx_pre} post={ctx_post}") if __name__ == "__main__": main()