series 4 codec work, inital decode success

This commit is contained in:
2026-05-29 06:33:06 +00:00
parent 1bccc44b88
commit 9b71ead44b
20 changed files with 1578 additions and 76 deletions
+89
View File
@@ -0,0 +1,89 @@
"""Reconnaissance helpers for cracking the Thor IDFW binary."""
from __future__ import annotations
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO))
TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt"
def hex_at(buf: bytes, off: int, n: int = 32) -> str:
chunk = buf[off : off + n]
hexs = " ".join(f"{b:02x}" for b in chunk)
asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
return f"{off:04x}: {hexs} {asc}"
def find_all(buf: bytes, needle: bytes) -> list[int]:
out: list[int] = []
i = 0
while True:
j = buf.find(needle, i)
if j < 0:
break
out.append(j)
i = j + 1
return out
def load_sidecar_samples(path: Path) -> dict[str, list[float]]:
"""Parse the txt sample table — Tran/Vert/Long/MicL."""
out = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
in_block = False
for line in path.read_text(errors="replace").splitlines():
if not in_block:
if line.strip() == "Waveform Data Channels":
in_block = True
continue
if line.startswith("Waveform Data USB Channels"):
break
parts = line.split("\t")
# First row is the header "\tTran\tVert\tLong\tMicL"
if len(parts) >= 5 and parts[1] == "Tran":
continue
if len(parts) < 5:
continue
try:
out["Tran"].append(float(parts[1]))
out["Vert"].append(float(parts[2]))
out["Long"].append(float(parts[3]))
out["MicL"].append(float(parts[4]))
except ValueError:
continue
return out
def main():
buf = TARGET.read_bytes()
samples = load_sidecar_samples(TXT)
print(f"file size: {len(buf)} bytes")
print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}")
print(f"first 6 Tran samples: {samples['Tran'][:6]}")
print(f"first 6 Vert samples: {samples['Vert'][:6]}")
print(f"first 6 Long samples: {samples['Long'][:6]}")
print(f"first 6 MicL samples: {samples['MicL'][:6]}")
print()
print("=== BW magic '00 02 00' positions ===")
hits = find_all(buf, b"\x00\x02\x00")
print(f"{len(hits)} hits")
for h in hits[:20]:
print(hex_at(buf, h, 24))
print()
print("=== '40 02' segment-header positions ===")
hits = find_all(buf, b"\x40\x02")
print(f"{len(hits)} hits")
for h in hits:
ctx_pre = buf[max(0, h - 4): h].hex()
ctx_post = buf[h: h + 20].hex()
# Show byte preceding to help identify real headers vs casual occurrences
print(f" 0x{h:04x} pre={ctx_pre} post={ctx_post}")
if __name__ == "__main__":
main()