seismo-relay/analysis_idf/recon.py

"""Reconnaissance helpers for cracking the Thor IDFW binary."""
from __future__ import annotations

import sys
from pathlib import Path

REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))

TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt"


def hex_at(buf: bytes, off: int, n: int = 32) -> str:
    chunk = buf[off : off + n]
    hexs = " ".join(f"{b:02x}" for b in chunk)
    asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
    return f"{off:04x}: {hexs}  {asc}"


def find_all(buf: bytes, needle: bytes) -> list[int]:
    out: list[int] = []
    i = 0
    while True:
        j = buf.find(needle, i)
        if j < 0:
            break
        out.append(j)
        i = j + 1
    return out


def load_sidecar_samples(path: Path) -> dict[str, list[float]]:
    """Parse the txt sample table — Tran/Vert/Long/MicL."""
    out = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
    in_block = False
    for line in path.read_text(errors="replace").splitlines():
        if not in_block:
            if line.strip() == "Waveform Data Channels":
                in_block = True
            continue
        if line.startswith("Waveform Data USB Channels"):
            break
        parts = line.split("\t")
        # First row is the header "\tTran\tVert\tLong\tMicL"
        if len(parts) >= 5 and parts[1] == "Tran":
            continue
        if len(parts) < 5:
            continue
        try:
            out["Tran"].append(float(parts[1]))
            out["Vert"].append(float(parts[2]))
            out["Long"].append(float(parts[3]))
            out["MicL"].append(float(parts[4]))
        except ValueError:
            continue
    return out


def main():
    buf = TARGET.read_bytes()
    samples = load_sidecar_samples(TXT)
    print(f"file size: {len(buf)} bytes")
    print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}")
    print(f"first 6 Tran samples: {samples['Tran'][:6]}")
    print(f"first 6 Vert samples: {samples['Vert'][:6]}")
    print(f"first 6 Long samples: {samples['Long'][:6]}")
    print(f"first 6 MicL samples: {samples['MicL'][:6]}")

    print()
    print("=== BW magic '00 02 00' positions ===")
    hits = find_all(buf, b"\x00\x02\x00")
    print(f"{len(hits)} hits")
    for h in hits[:20]:
        print(hex_at(buf, h, 24))

    print()
    print("=== '40 02' segment-header positions ===")
    hits = find_all(buf, b"\x40\x02")
    print(f"{len(hits)} hits")
    for h in hits:
        ctx_pre = buf[max(0, h - 4): h].hex()
        ctx_post = buf[h: h + 20].hex()
        # Show byte preceding to help identify real headers vs casual occurrences
        print(f"  0x{h:04x}  pre={ctx_pre}  post={ctx_post}")


if __name__ == "__main__":
    main()