series 4 codec work, inital decode success
This commit is contained in:
@@ -73,6 +73,28 @@ should not import from `sfm/`, must not touch a DB, and have no I/O
|
|||||||
beyond reading files passed as arguments. Keep them pure — both
|
beyond reading files passed as arguments. Keep them pure — both
|
||||||
tiers can then depend on them without circularity.
|
tiers can then depend on them without circularity.
|
||||||
|
|
||||||
|
#### Thor IDF binary codec (2026-05-28)
|
||||||
|
|
||||||
|
`micromate/idf_file.read_idf_file()` decodes both Thor IDFW
|
||||||
|
(waveform) and IDFH (histogram) binaries.
|
||||||
|
|
||||||
|
- **IDFW** reuses `decode_waveform_v2()` on the body at fixed file
|
||||||
|
offset `0x0f1f`. Sample fidelity is 87–99% byte-exact on quiet
|
||||||
|
events; loud events hit the BW codec's known walker-stops-early
|
||||||
|
limitation.
|
||||||
|
- **IDFH** has its own segment-based decoder: `[len_be][0a 00 00 00]
|
||||||
|
[00 NN][05 3f]` + N × 72-byte interval records (4 × 16-byte
|
||||||
|
per-channel min/max/halfp). All 859 Thor IDFH corpus files
|
||||||
|
decode (181,071 intervals); peak matches sidecar within ~1.8%
|
||||||
|
(ADC quantization).
|
||||||
|
|
||||||
|
The two outlier `BE9439_*` files in the Thor example corpus are
|
||||||
|
actually Series III Blastware binaries that share the `.IDFW`/`.IDFH`
|
||||||
|
filename convention by accident. `read_idf_file()` detects them by
|
||||||
|
their BW STRT signature and raises NotImplementedError pointing
|
||||||
|
callers at `read_blastware_file()`. See
|
||||||
|
`docs/idf_protocol_reference.md` for full field layouts.
|
||||||
|
|
||||||
### Practical consequences
|
### Practical consequences
|
||||||
|
|
||||||
When deciding where new code goes, ask:
|
When deciding where new code goes, ask:
|
||||||
|
|||||||
@@ -0,0 +1,65 @@
|
|||||||
|
"""Run read_idf_file across the corpus and report per-channel accuracy vs sidecars."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from micromate.idf_file import read_idf_file
|
||||||
|
from analysis_idf.recon import load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def sidecar_path(idfw: Path) -> Path:
|
||||||
|
return idfw.parent / "TXT" / f"{idfw.name}.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
root = REPO / "tests/fixtures/THORDATA_example"
|
||||||
|
files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]
|
||||||
|
files.sort()
|
||||||
|
GEO_LSB = 0.0003
|
||||||
|
|
||||||
|
n_ok = n_skip = 0
|
||||||
|
overall = {"Tran": [], "Vert": [], "Long": []}
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
res = read_idf_file(f)
|
||||||
|
except Exception:
|
||||||
|
n_skip += 1
|
||||||
|
continue
|
||||||
|
sc_path = sidecar_path(f)
|
||||||
|
if not sc_path.exists():
|
||||||
|
n_skip += 1
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
sc = load_sidecar_samples(sc_path)
|
||||||
|
except Exception:
|
||||||
|
n_skip += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
per_file = {}
|
||||||
|
for ch in ("Tran", "Vert", "Long"):
|
||||||
|
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
|
||||||
|
dec = res.samples.get(ch, [])
|
||||||
|
n = min(len(sc_counts), len(dec))
|
||||||
|
if n == 0:
|
||||||
|
per_file[ch] = 0.0
|
||||||
|
continue
|
||||||
|
exact = sum(1 for i in range(n) if sc_counts[i] == dec[i])
|
||||||
|
pct = 100.0 * exact / n
|
||||||
|
per_file[ch] = pct
|
||||||
|
overall[ch].append(pct)
|
||||||
|
n_ok += 1
|
||||||
|
|
||||||
|
print(f"Processed {n_ok} files (skipped {n_skip})")
|
||||||
|
print("Per-channel exact-match % (mean / min / max):")
|
||||||
|
for ch, vals in overall.items():
|
||||||
|
if vals:
|
||||||
|
avg = sum(vals) / len(vals)
|
||||||
|
print(f" {ch}: mean={avg:.2f}% min={min(vals):.2f}% max={max(vals):.2f}% n={len(vals)}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
"""Find where decoded-vs-sidecar diverges for each channel."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from minimateplus.waveform_codec import decode_waveform_v2
|
||||||
|
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
sc = load_sidecar_samples(TXT)
|
||||||
|
decoded = decode_waveform_v2(buf[0x0f1f:])
|
||||||
|
GEO_LSB = 0.0003
|
||||||
|
|
||||||
|
for ch in ("Tran", "Vert", "Long"):
|
||||||
|
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
|
||||||
|
dec = decoded[ch]
|
||||||
|
# Find ALL transitions where mismatches start/stop
|
||||||
|
first_diff = next((i for i in range(len(dec)) if dec[i] != sc_counts[i]), None)
|
||||||
|
if first_diff is None:
|
||||||
|
print(f"{ch}: NO MISMATCHES")
|
||||||
|
continue
|
||||||
|
print(f"{ch}: first diff at idx {first_diff}")
|
||||||
|
# Show 5 before, 5 after
|
||||||
|
for i in range(max(0, first_diff - 3), min(len(dec), first_diff + 8)):
|
||||||
|
mark = " " if dec[i] == sc_counts[i] else "**"
|
||||||
|
print(f" {mark} idx {i:4d}: sc={sc_counts[i]:6d} dec={dec[i]:6d} diff={dec[i]-sc_counts[i]:+d}")
|
||||||
|
# Where does cumulative diff exceed 100?
|
||||||
|
cum_match_run = 0
|
||||||
|
max_match_run = 0
|
||||||
|
match_run_start = 0
|
||||||
|
diff_count = 0
|
||||||
|
for i in range(len(dec)):
|
||||||
|
if dec[i] == sc_counts[i]:
|
||||||
|
cum_match_run += 1
|
||||||
|
max_match_run = max(max_match_run, cum_match_run)
|
||||||
|
else:
|
||||||
|
cum_match_run = 0
|
||||||
|
diff_count += 1
|
||||||
|
print(f" total mismatches: {diff_count}/{len(dec)}, longest run of matches: {max_match_run}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
"""End-to-end IDFH ingest verification."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from sfm.waveform_store import WaveformStore
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
idfh = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
|
||||||
|
txt = idfh.parent / "TXT" / f"{idfh.name}.txt"
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
store = WaveformStore(Path(td))
|
||||||
|
ev, rec = store.save_imported_idf(
|
||||||
|
idfh.read_bytes(),
|
||||||
|
idfh,
|
||||||
|
idf_report_text=txt.read_text(errors="replace"),
|
||||||
|
)
|
||||||
|
print("=== save_imported_idf (IDFH) ===")
|
||||||
|
print(f" serial: {rec['serial']}")
|
||||||
|
print(f" filename: {rec['filename']}")
|
||||||
|
print(f" filesize: {rec['filesize']}")
|
||||||
|
print(f" h5: {rec['hdf5_filename']}") # expect None for histogram
|
||||||
|
print(f" sidecar: {rec['sidecar_filename']}")
|
||||||
|
print()
|
||||||
|
print("=== Event ===")
|
||||||
|
print(f" timestamp: {ev.timestamp}")
|
||||||
|
print(f" record_type: {ev.record_type}")
|
||||||
|
print(f" sample_rate: {ev.sample_rate}")
|
||||||
|
print()
|
||||||
|
# Inspect sidecar to confirm intervals were stashed
|
||||||
|
sc_path = Path(td) / "UM13981" / f"{idfh.name}.sfm.json"
|
||||||
|
sc = json.loads(sc_path.read_text())
|
||||||
|
intervals = sc.get("extensions", {}).get("idf_intervals", [])
|
||||||
|
print(f" sidecar intervals: {len(intervals)}")
|
||||||
|
if intervals:
|
||||||
|
print(f" first interval: {intervals[0]}")
|
||||||
|
print(f" last interval: {intervals[-1]}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
"""Verify the had_report=False path: ingest IDFW with no .txt."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from sfm.waveform_store import WaveformStore
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
store = WaveformStore(Path(td))
|
||||||
|
ev, rec = store.save_imported_idf(
|
||||||
|
idfw.read_bytes(),
|
||||||
|
idfw,
|
||||||
|
serial_hint=None,
|
||||||
|
idf_report_text=None, # ← no .txt!
|
||||||
|
)
|
||||||
|
print("=== IDFW without .txt ingest ===")
|
||||||
|
print(f" serial: {rec['serial']}")
|
||||||
|
print(f" timestamp: {ev.timestamp}")
|
||||||
|
print(f" sample_rate: {ev.sample_rate}")
|
||||||
|
print(f" record_type: {ev.record_type}")
|
||||||
|
print(f" rectime_sec: {ev.rectime_seconds}")
|
||||||
|
nT = len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0
|
||||||
|
nV = len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0
|
||||||
|
nL = len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0
|
||||||
|
nM = len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0
|
||||||
|
print(f" raw_samples: Tran={nT} Vert={nV} Long={nL} MicL={nM}")
|
||||||
|
if ev.peak_values:
|
||||||
|
print(f" peak_values: tran={ev.peak_values.tran} vert={ev.peak_values.vert} long={ev.peak_values.long}")
|
||||||
|
print(f" h5 written: {rec['hdf5_filename']}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
"""End-to-end ingest test: feed an IDFW + .txt to save_imported_idf in a tmp store."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from sfm.waveform_store import WaveformStore
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
idfw = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
|
||||||
|
txt = idfw.parent / "TXT" / f"{idfw.name}.txt"
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
store = WaveformStore(Path(td))
|
||||||
|
ev, rec = store.save_imported_idf(
|
||||||
|
idfw.read_bytes(),
|
||||||
|
idfw,
|
||||||
|
serial_hint=None,
|
||||||
|
idf_report_text=txt.read_text(errors="replace"),
|
||||||
|
)
|
||||||
|
print("=== Save result ===")
|
||||||
|
print(f" serial: {rec['serial']}")
|
||||||
|
print(f" filename: {rec['filename']}")
|
||||||
|
print(f" filesize: {rec['filesize']}")
|
||||||
|
print(f" h5: {rec['hdf5_filename']}")
|
||||||
|
print(f" sidecar: {rec['sidecar_filename']}")
|
||||||
|
print()
|
||||||
|
print("=== Event ===")
|
||||||
|
print(f" serial: {ev.serial if hasattr(ev,'serial') else '(n/a)'}")
|
||||||
|
print(f" timestamp: {ev.timestamp}")
|
||||||
|
print(f" sample_rate: {ev.sample_rate}")
|
||||||
|
print(f" record_type: {ev.record_type}")
|
||||||
|
print(f" rectime_sec: {ev.rectime_seconds}")
|
||||||
|
print(f" raw_samples: Tran={len(ev.raw_samples.get('Tran', [])) if ev.raw_samples else 0}, Vert={len(ev.raw_samples.get('Vert', [])) if ev.raw_samples else 0}, Long={len(ev.raw_samples.get('Long', [])) if ev.raw_samples else 0}, MicL={len(ev.raw_samples.get('MicL', [])) if ev.raw_samples else 0}")
|
||||||
|
if ev.peak_values:
|
||||||
|
print(f" peaks (txt): Tran={ev.peak_values.tran} Vert={ev.peak_values.vert} Long={ev.peak_values.long}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Verify the h5 file actually got written
|
||||||
|
h5path = Path(td) / "UM11719" / f"{idfw.name}.h5"
|
||||||
|
print(f" h5 exists: {h5path.exists()} size={h5path.stat().st_size if h5path.exists() else 0}")
|
||||||
|
sidecar = Path(td) / "UM11719" / f"{idfw.name}.sfm.json"
|
||||||
|
print(f" sidecar exists:{sidecar.exists()} size={sidecar.stat().st_size if sidecar.exists() else 0}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,137 @@
|
|||||||
|
"""Decode IDFH histogram intervals + verify against sidecar."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
import struct
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
|
||||||
|
SEGMENT_MAGIC = b"\x02\xda\x0a\x00\x00\x00"
|
||||||
|
SEGMENT_SIZE = 732 # = 10-byte header + 10 × 72-byte intervals + 2-byte tail
|
||||||
|
INTERVAL_SIZE = 72
|
||||||
|
CHANNELS = ("Tran", "Vert", "Long", "MicL")
|
||||||
|
|
||||||
|
|
||||||
|
def decode_interval(buf72: bytes) -> dict:
|
||||||
|
"""Decode one 72-byte interval into per-channel min/max/halfp."""
|
||||||
|
out = {}
|
||||||
|
for i, ch in enumerate(CHANNELS):
|
||||||
|
block = buf72[i*16 : (i+1)*16]
|
||||||
|
mn = struct.unpack_from(">h", block, 0)[0]
|
||||||
|
mx = struct.unpack_from(">h", block, 2)[0]
|
||||||
|
sb = struct.unpack_from(">h", block, 4)[0]
|
||||||
|
halfp = struct.unpack_from(">H", block, 6)[0]
|
||||||
|
f10 = struct.unpack_from(">H", block, 10)[0]
|
||||||
|
f14 = struct.unpack_from(">H", block, 14)[0]
|
||||||
|
peak_count = max(abs(mn), abs(mx))
|
||||||
|
out[ch] = {
|
||||||
|
"min": mn,
|
||||||
|
"max": mx,
|
||||||
|
"field4": sb,
|
||||||
|
"halfp": halfp,
|
||||||
|
"field10": f10,
|
||||||
|
"field14": f14,
|
||||||
|
"peak": peak_count,
|
||||||
|
"freq_hz": (512.0 / halfp) if halfp > 5 else None,
|
||||||
|
}
|
||||||
|
out["_tail"] = buf72[64:].hex(" ")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def walk_idfh(buf: bytes) -> list:
|
||||||
|
"""Walk all interval records in an IDFH file."""
|
||||||
|
intervals = []
|
||||||
|
# Multi-segment file: every 02 da 0a 00 00 00 marker introduces a segment.
|
||||||
|
# Single-interval file: just one body header at 0xf96 of form ?? ?? 0a 00 00 00.
|
||||||
|
# Find them all.
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
j = buf.find(b"\x0a\x00\x00\x00", i)
|
||||||
|
if j < 0:
|
||||||
|
break
|
||||||
|
# Validate: the 2 bytes before must form a length, and we want bytes
|
||||||
|
# [j-2 : j+6] to have a recognisable shape. Actually the cleanest
|
||||||
|
# filter is "preceded by a length and followed by 00 NN 05 3f".
|
||||||
|
if j < 2:
|
||||||
|
i = j + 1
|
||||||
|
continue
|
||||||
|
# Body header form: [length_be_2][0a 00 00 00][00 NN][05 3f]
|
||||||
|
if j + 10 > len(buf):
|
||||||
|
break
|
||||||
|
length = int.from_bytes(buf[j-2:j], "big")
|
||||||
|
# Verify the segment-marker shape: [length_be][0a 00 00 00][00 NN][05 3f]
|
||||||
|
if buf[j+4] != 0x00:
|
||||||
|
i = j + 1
|
||||||
|
continue
|
||||||
|
if buf[j+6:j+8] != b"\x05\x3f":
|
||||||
|
i = j + 1
|
||||||
|
continue
|
||||||
|
# Header layout (10 bytes): [length_be 2B][0a 00 00 00 4B][00 NN 2B][05 3f 2B]
|
||||||
|
# Followed by N interval records of 72 bytes each, then 2 tail bytes.
|
||||||
|
# length value = (N × 72) + 10 (counts bytes from 0x0a... through interval data).
|
||||||
|
header_start = j - 2
|
||||||
|
n_intervals = (length - 10) // INTERVAL_SIZE
|
||||||
|
interval_start = header_start + 10
|
||||||
|
for k in range(n_intervals):
|
||||||
|
off = interval_start + k * INTERVAL_SIZE
|
||||||
|
if off + INTERVAL_SIZE > len(buf):
|
||||||
|
break
|
||||||
|
chunk = buf[off:off + INTERVAL_SIZE]
|
||||||
|
intervals.append({"offset": off, **decode_interval(chunk)})
|
||||||
|
i = header_start + length + 2
|
||||||
|
return intervals
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Test against multi-segment IDFH
|
||||||
|
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
|
||||||
|
sc_path = target.parent / "TXT" / f"{target.name}.txt"
|
||||||
|
buf = target.read_bytes()
|
||||||
|
intervals = walk_idfh(buf)
|
||||||
|
print(f"=== {target.name} ===")
|
||||||
|
print(f" file size: {len(buf)}")
|
||||||
|
print(f" decoded intervals: {len(intervals)}")
|
||||||
|
# Show first 2 + last 2
|
||||||
|
sc_rows = []
|
||||||
|
for line in sc_path.read_text(errors="replace").splitlines():
|
||||||
|
if line.startswith("2022-") or line.startswith("2023-"):
|
||||||
|
sc_rows.append(line)
|
||||||
|
print(f" sidecar rows: {len(sc_rows)}")
|
||||||
|
|
||||||
|
print()
|
||||||
|
for k in [0, 1, 78, 79, 80]:
|
||||||
|
if k >= len(intervals):
|
||||||
|
continue
|
||||||
|
iv = intervals[k]
|
||||||
|
print(f"--- interval {k} @0x{iv['offset']:04x} ---")
|
||||||
|
for ch in CHANNELS:
|
||||||
|
d = iv[ch]
|
||||||
|
peak_ips = d["peak"] / 32768 * 10.0
|
||||||
|
print(f" {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s) halfp={d['halfp']:5d} freq={d['freq_hz']}")
|
||||||
|
# sidecar row
|
||||||
|
if k < len(sc_rows):
|
||||||
|
print(f" SC: {sc_rows[k]}")
|
||||||
|
|
||||||
|
# Test single-interval IDFH
|
||||||
|
print()
|
||||||
|
target2 = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH"
|
||||||
|
sc2 = target2.parent / "TXT" / f"{target2.name}.txt"
|
||||||
|
buf2 = target2.read_bytes()
|
||||||
|
intervals2 = walk_idfh(buf2)
|
||||||
|
print(f"=== {target2.name} ===")
|
||||||
|
print(f" file size: {len(buf2)}, decoded intervals: {len(intervals2)}")
|
||||||
|
if intervals2:
|
||||||
|
iv = intervals2[0]
|
||||||
|
for ch in CHANNELS:
|
||||||
|
d = iv[ch]
|
||||||
|
peak_ips = d["peak"] / 32768 * 10.0
|
||||||
|
print(f" {ch}: peak={d['peak']:5d} ({peak_ips:.4f} in/s) halfp={d['halfp']:5d} freq={d['freq_hz']}")
|
||||||
|
sc_rows2 = [l for l in sc2.read_text(errors='replace').splitlines() if l.startswith("2023-")]
|
||||||
|
if sc_rows2:
|
||||||
|
print(f" SC: {sc_rows2[0]}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
"""Find IDFH interval period via auto-correlation of structural patterns."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM13981/UM13981_20220805075441.IDFH"
|
||||||
|
buf = target.read_bytes()
|
||||||
|
body_start = 0xF96
|
||||||
|
body_end = 0x270C
|
||||||
|
body = buf[body_start:body_end]
|
||||||
|
print(f"body size: {len(body)} bytes (file {len(buf)} bytes)")
|
||||||
|
|
||||||
|
# For each candidate interval size, count how many bytes at fixed offsets within
|
||||||
|
# each interval are zero (consistent column-zero pattern indicates correct size).
|
||||||
|
print()
|
||||||
|
print("=== zero-column score by interval size (higher = more likely) ===")
|
||||||
|
best = []
|
||||||
|
for sz in range(16, 100):
|
||||||
|
n = len(body) // sz
|
||||||
|
if n < 30:
|
||||||
|
continue
|
||||||
|
# For each column position within an interval, count how many of n intervals have zero
|
||||||
|
score = 0
|
||||||
|
for col in range(sz):
|
||||||
|
zeros = sum(1 for i in range(n) if body[i*sz + col] == 0)
|
||||||
|
if zeros >= n * 0.9:
|
||||||
|
score += 1
|
||||||
|
best.append((score, sz, n))
|
||||||
|
best.sort(reverse=True)
|
||||||
|
for score, sz, n in best[:10]:
|
||||||
|
print(f" size={sz:3d} n_intervals={n} consistently-zero-cols={score}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
"""Per-file accuracy + sample-count details."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from micromate.idf_file import read_idf_file
|
||||||
|
from analysis_idf.recon import load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
root = REPO / "tests/fixtures/THORDATA_example"
|
||||||
|
files = sorted([f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")])
|
||||||
|
GEO_LSB = 0.0003
|
||||||
|
# Limit to first 15 successful files for detail.
|
||||||
|
shown = 0
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
res = read_idf_file(f)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
sc_path = f.parent / "TXT" / f"{f.name}.txt"
|
||||||
|
if not sc_path.exists():
|
||||||
|
continue
|
||||||
|
sc = load_sidecar_samples(sc_path)
|
||||||
|
sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]]
|
||||||
|
dec = res.samples.get("Tran", [])
|
||||||
|
n = min(len(sc_tran), len(dec))
|
||||||
|
exact = sum(1 for i in range(n) if sc_tran[i] == dec[i]) if n else 0
|
||||||
|
pct = 100.0 * exact / n if n else 0.0
|
||||||
|
print(f"{f.name:40s} size={f.stat().st_size:6d} sc_n={len(sc_tran):4d} dec_n={len(dec):4d} exact={pct:.1f}%")
|
||||||
|
shown += 1
|
||||||
|
if shown >= 20:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
"""Look at what's at the divergence boundary."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from minimateplus.waveform_codec import walk_body, find_data_start, parse_segment_header
|
||||||
|
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
body = buf[0x0f1f:]
|
||||||
|
start = find_data_start(body)
|
||||||
|
print(f"data_start: {start} (= file offset 0x{0x0f1f + start:04x})")
|
||||||
|
|
||||||
|
blocks = walk_body(body, start)
|
||||||
|
print(f"{len(blocks)} blocks total")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# First 25 blocks
|
||||||
|
print("=== first 30 blocks ===")
|
||||||
|
for i, b in enumerate(blocks[:30]):
|
||||||
|
body_off = 0x0f1f + b.offset
|
||||||
|
if b.tag_hi == 0x40:
|
||||||
|
hdr = parse_segment_header(b)
|
||||||
|
print(f" [{i:3d}] @0x{body_off:04x} {b.kind} (segment header) counter={hdr['counter'] if hdr else '?'} field2={hdr['field2'].hex() if hdr else '?'} anchor={hdr['anchor_bytes'].hex() if hdr else '?'} tail={hdr['tail'].hex() if hdr else '?'}")
|
||||||
|
else:
|
||||||
|
print(f" [{i:3d}] @0x{body_off:04x} {b.kind} len={b.length} data={b.data[:16].hex()}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Cumulative sample counts per block to find which block contains sample 254
|
||||||
|
print("=== cumulative samples through blocks ===")
|
||||||
|
cur_ch = "Tran"
|
||||||
|
rotation = ["Vert", "Long", "MicL", "Tran"]
|
||||||
|
seg_count = 0
|
||||||
|
samples_in_curseg = 2 # preamble Tran[0], Tran[1]
|
||||||
|
for i, b in enumerate(blocks[:30]):
|
||||||
|
if b.tag_hi == 0x40:
|
||||||
|
seg_count += 1
|
||||||
|
prev_ch = cur_ch
|
||||||
|
cur_ch = rotation[(seg_count - 1) % 4]
|
||||||
|
print(f" [{i:3d}] 40 02 -> end of {prev_ch} segment, start {cur_ch} (segment {seg_count})")
|
||||||
|
samples_in_curseg = 2 # anchors
|
||||||
|
elif (b.tag_hi & 0xF0) == 0x10:
|
||||||
|
nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo
|
||||||
|
samples_in_curseg += nn
|
||||||
|
print(f" [{i:3d}] {b.kind} nibble: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
|
||||||
|
elif (b.tag_hi & 0xF0) == 0x20:
|
||||||
|
nn = ((b.tag_hi & 0x0F) << 8) | b.tag_lo
|
||||||
|
samples_in_curseg += nn
|
||||||
|
print(f" [{i:3d}] {b.kind} int8: +{nn} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
|
||||||
|
elif b.tag_hi == 0x00:
|
||||||
|
samples_in_curseg += b.tag_lo
|
||||||
|
print(f" [{i:3d}] {b.kind} RLE: +{b.tag_lo}, ch={cur_ch}, ch_total~{samples_in_curseg}")
|
||||||
|
elif b.tag_hi == 0x30:
|
||||||
|
samples_in_curseg += b.tag_lo
|
||||||
|
print(f" [{i:3d}] {b.kind} packed12: +{b.tag_lo} samples, ch={cur_ch}, ch_total~{samples_in_curseg}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
"""Reconnaissance helpers for cracking the Thor IDFW binary."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
TARGET = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
|
||||||
|
TXT = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def hex_at(buf: bytes, off: int, n: int = 32) -> str:
|
||||||
|
chunk = buf[off : off + n]
|
||||||
|
hexs = " ".join(f"{b:02x}" for b in chunk)
|
||||||
|
asc = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
|
||||||
|
return f"{off:04x}: {hexs} {asc}"
|
||||||
|
|
||||||
|
|
||||||
|
def find_all(buf: bytes, needle: bytes) -> list[int]:
|
||||||
|
out: list[int] = []
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
j = buf.find(needle, i)
|
||||||
|
if j < 0:
|
||||||
|
break
|
||||||
|
out.append(j)
|
||||||
|
i = j + 1
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def load_sidecar_samples(path: Path) -> dict[str, list[float]]:
|
||||||
|
"""Parse the txt sample table — Tran/Vert/Long/MicL."""
|
||||||
|
out = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
|
||||||
|
in_block = False
|
||||||
|
for line in path.read_text(errors="replace").splitlines():
|
||||||
|
if not in_block:
|
||||||
|
if line.strip() == "Waveform Data Channels":
|
||||||
|
in_block = True
|
||||||
|
continue
|
||||||
|
if line.startswith("Waveform Data USB Channels"):
|
||||||
|
break
|
||||||
|
parts = line.split("\t")
|
||||||
|
# First row is the header "\tTran\tVert\tLong\tMicL"
|
||||||
|
if len(parts) >= 5 and parts[1] == "Tran":
|
||||||
|
continue
|
||||||
|
if len(parts) < 5:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
out["Tran"].append(float(parts[1]))
|
||||||
|
out["Vert"].append(float(parts[2]))
|
||||||
|
out["Long"].append(float(parts[3]))
|
||||||
|
out["MicL"].append(float(parts[4]))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
samples = load_sidecar_samples(TXT)
|
||||||
|
print(f"file size: {len(buf)} bytes")
|
||||||
|
print(f"sample rows: Tran={len(samples['Tran'])} Vert={len(samples['Vert'])} Long={len(samples['Long'])} MicL={len(samples['MicL'])}")
|
||||||
|
print(f"first 6 Tran samples: {samples['Tran'][:6]}")
|
||||||
|
print(f"first 6 Vert samples: {samples['Vert'][:6]}")
|
||||||
|
print(f"first 6 Long samples: {samples['Long'][:6]}")
|
||||||
|
print(f"first 6 MicL samples: {samples['MicL'][:6]}")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("=== BW magic '00 02 00' positions ===")
|
||||||
|
hits = find_all(buf, b"\x00\x02\x00")
|
||||||
|
print(f"{len(hits)} hits")
|
||||||
|
for h in hits[:20]:
|
||||||
|
print(hex_at(buf, h, 24))
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("=== '40 02' segment-header positions ===")
|
||||||
|
hits = find_all(buf, b"\x40\x02")
|
||||||
|
print(f"{len(hits)} hits")
|
||||||
|
for h in hits:
|
||||||
|
ctx_pre = buf[max(0, h - 4): h].hex()
|
||||||
|
ctx_post = buf[h: h + 20].hex()
|
||||||
|
# Show byte preceding to help identify real headers vs casual occurrences
|
||||||
|
print(f" 0x{h:04x} pre={ctx_pre} post={ctx_post}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
"""Find each segment boundary in the channel and check if errors reset there."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from minimateplus.waveform_codec import decode_waveform_v2
|
||||||
|
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
sc = load_sidecar_samples(TXT)
|
||||||
|
decoded = decode_waveform_v2(buf[0x0f1f:])
|
||||||
|
GEO_LSB = 0.0003
|
||||||
|
|
||||||
|
for ch in ("Tran", "Vert", "Long"):
|
||||||
|
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
|
||||||
|
dec = decoded[ch]
|
||||||
|
# Find every transition where error becomes zero from nonzero (or grows from zero)
|
||||||
|
# Print indices where dec resyncs back to exact match.
|
||||||
|
n = min(len(sc_counts), len(dec))
|
||||||
|
events = []
|
||||||
|
prev_match = True
|
||||||
|
for i in range(n):
|
||||||
|
match = sc_counts[i] == dec[i]
|
||||||
|
if match != prev_match:
|
||||||
|
kind = "RESYNC" if match else "DIVERGE"
|
||||||
|
events.append((i, kind, sc_counts[i], dec[i]))
|
||||||
|
prev_match = match
|
||||||
|
print(f"{ch}: {len(events)} transitions")
|
||||||
|
for i, kind, sc_v, dec_v in events[:20]:
|
||||||
|
print(f" idx {i:4d} {kind:8s} sc={sc_v:6d} dec={dec_v:6d} diff={dec_v-sc_v:+d}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Smoke-test read_idf_file on IDFH across the corpus."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from micromate.idf_file import read_idf_file
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162648.IDFH"
|
||||||
|
result = read_idf_file(target)
|
||||||
|
ev = result.event
|
||||||
|
print(f"=== {target.name} ===")
|
||||||
|
print(f" signature: {result.signature}")
|
||||||
|
print(f" serial: {ev.serial}")
|
||||||
|
print(f" timestamp: {ev.timestamp}")
|
||||||
|
print(f" sample_rate: {ev.sample_rate}")
|
||||||
|
print(f" kind: {ev.kind}")
|
||||||
|
print(f" intervals: {len(result.intervals or [])}")
|
||||||
|
print(f" peaks: T={ev.peaks.transverse_ips:.4f} V={ev.peaks.vertical_ips:.4f} L={ev.peaks.longitudinal_ips:.4f}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
root = REPO / "tests/fixtures/THORDATA_example"
|
||||||
|
files = list(root.rglob("*.IDFH"))
|
||||||
|
ok = fail = nyi = 0
|
||||||
|
total_intervals = 0
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
r = read_idf_file(f)
|
||||||
|
ok += 1
|
||||||
|
total_intervals += len(r.intervals or [])
|
||||||
|
except NotImplementedError:
|
||||||
|
nyi += 1
|
||||||
|
except Exception as exc:
|
||||||
|
fail += 1
|
||||||
|
if fail <= 3:
|
||||||
|
print(f" FAIL: {f.name}: {type(exc).__name__}: {exc}")
|
||||||
|
print(f"Corpus: {len(files)} IDFH files | ok={ok} fail={fail} nyi={nyi}")
|
||||||
|
print(f"Total intervals decoded: {total_intervals}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
"""Smoke-test read_idf_file across the sample corpus."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from micromate.idf_file import read_idf_file, geo_count_to_ips, mic_count_to_psi
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
target = REPO / "tests/fixtures/THORDATA_example/THORDATA_example/UPMC Presby/UM11719/UM11719_20231219162723.IDFW"
|
||||||
|
result = read_idf_file(target)
|
||||||
|
ev = result.event
|
||||||
|
print(f"=== {target.name} ===")
|
||||||
|
print(f" signature: {result.signature}")
|
||||||
|
print(f" serial: {ev.serial}")
|
||||||
|
print(f" timestamp: {ev.timestamp}")
|
||||||
|
print(f" sample_rate: {ev.sample_rate}")
|
||||||
|
print(f" record_time: {ev.record_time_sec}")
|
||||||
|
print(f" calibration: {result.binary_metadata.calibration_date}")
|
||||||
|
print(f" Tran samples: {len(result.samples['Tran'])}, peak_ips={ev.peaks.transverse_ips:.4f}")
|
||||||
|
print(f" Vert samples: {len(result.samples['Vert'])}, peak_ips={ev.peaks.vertical_ips:.4f}")
|
||||||
|
print(f" Long samples: {len(result.samples['Long'])}, peak_ips={ev.peaks.longitudinal_ips:.4f}")
|
||||||
|
print(f" MicL samples: {len(result.samples['MicL'])}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Corpus sweep
|
||||||
|
root = REPO / "tests/fixtures/THORDATA_example"
|
||||||
|
files = [f for f in root.rglob("*.IDFW") if not str(f).endswith(".CDB")]
|
||||||
|
ok = fail = nyi = 0
|
||||||
|
for f in files:
|
||||||
|
try:
|
||||||
|
r = read_idf_file(f)
|
||||||
|
ok += 1
|
||||||
|
except NotImplementedError:
|
||||||
|
nyi += 1
|
||||||
|
except Exception as exc:
|
||||||
|
fail += 1
|
||||||
|
if fail <= 5:
|
||||||
|
print(f" FAIL: {f.name}: {type(exc).__name__}: {exc}")
|
||||||
|
print()
|
||||||
|
print(f"Corpus: {len(files)} IDFW files | ok={ok} fail={fail} not-implemented={nyi}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""Trace Tran sample-by-sample to find exactly where the codec drifts."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def s4(n: int) -> int:
|
||||||
|
return n if n < 8 else n - 16
|
||||||
|
|
||||||
|
|
||||||
|
def i8(b: int) -> int:
|
||||||
|
return b if b < 128 else b - 256
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
sc = load_sidecar_samples(TXT)
|
||||||
|
GEO_LSB = 0.0003
|
||||||
|
sc_tran = [int(round(v / GEO_LSB)) for v in sc["Tran"]]
|
||||||
|
|
||||||
|
body = buf[0x0f1f:]
|
||||||
|
# Tran[0], Tran[1] from preamble
|
||||||
|
t0 = int.from_bytes(body[3:5], "big", signed=True)
|
||||||
|
t1 = int.from_bytes(body[5:7], "big", signed=True)
|
||||||
|
print(f"preamble Tran[0]={t0} Tran[1]={t1} (sidecar: {sc_tran[0]}, {sc_tran[1]})")
|
||||||
|
|
||||||
|
# Block 0: 10 f8 at body[7:9]
|
||||||
|
print(f"block 0: tag {body[7]:02x} {body[8]:02x}")
|
||||||
|
print(f" block 0 first 10 data bytes: {body[9:19].hex()}")
|
||||||
|
|
||||||
|
# Walk block 0 manually, comparing each sample
|
||||||
|
cur = t1
|
||||||
|
samples = [t0, t1]
|
||||||
|
block_off = 7
|
||||||
|
nn = body[8]
|
||||||
|
print(f" NN = {nn}")
|
||||||
|
data = body[9 : 9 + nn // 2]
|
||||||
|
for byi, byte in enumerate(data):
|
||||||
|
for nib_idx, nib in enumerate(((byte >> 4) & 0xF, byte & 0xF)):
|
||||||
|
cur += s4(nib)
|
||||||
|
samples.append(cur)
|
||||||
|
idx = len(samples) - 1
|
||||||
|
if 0 <= idx < len(sc_tran):
|
||||||
|
sc_v = sc_tran[idx]
|
||||||
|
match = "✓" if sc_v == cur else "✗"
|
||||||
|
if idx < 12 or 240 <= idx <= 260:
|
||||||
|
print(f" idx {idx:3d}: nibble byte={byte:02x} nib={nib:x} delta={s4(nib):+d} cur={cur:+d} sc={sc_v:+d} {match}")
|
||||||
|
|
||||||
|
print(f"end of block 0: cur={cur}, len(samples)={len(samples)}, decoder expected 250 here")
|
||||||
|
# Block 1: 20 28 starts at offset 9 + 124 = 133 from block_off=7
|
||||||
|
block1_off = 9 + nn // 2
|
||||||
|
print(f"block 1: tag {body[block1_off]:02x} {body[block1_off+1]:02x} (expecting 20 28)")
|
||||||
|
nn1 = body[block1_off + 1]
|
||||||
|
print(f" block 1 NN = {nn1}")
|
||||||
|
data1 = body[block1_off + 2 : block1_off + 2 + nn1]
|
||||||
|
for byi, byte in enumerate(data1):
|
||||||
|
cur += i8(byte)
|
||||||
|
samples.append(cur)
|
||||||
|
idx = len(samples) - 1
|
||||||
|
if idx < len(sc_tran):
|
||||||
|
sc_v = sc_tran[idx]
|
||||||
|
match = "✓" if sc_v == cur else "✗"
|
||||||
|
if 248 <= idx <= 295:
|
||||||
|
print(f" idx {idx:3d}: int8 byte={byte:02x} delta={i8(byte):+d} cur={cur:+d} sc={sc_v:+d} {match}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
"""Feed candidate body offsets to the BW codec and compare with sidecar."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from minimateplus.waveform_codec import decode_waveform_v2, walk_body, find_data_start
|
||||||
|
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
sc = load_sidecar_samples(TXT)
|
||||||
|
# Sidecar samples in 0.0003 counts (Thor geo LSB).
|
||||||
|
sc_tran = [int(round(v / 0.0003)) for v in sc["Tran"][:30]]
|
||||||
|
sc_vert = [int(round(v / 0.0003)) for v in sc["Vert"][:30]]
|
||||||
|
sc_long = [int(round(v / 0.0003)) for v in sc["Long"][:30]]
|
||||||
|
sc_micl = [int(round(v / 1e-6)) for v in sc["MicL"][:30]] # 1 µ unit for mic? Will iterate.
|
||||||
|
print(f"sidecar Tran (counts): {sc_tran}")
|
||||||
|
print(f"sidecar Vert (counts): {sc_vert}")
|
||||||
|
print(f"sidecar Long (counts): {sc_long}")
|
||||||
|
print(f"sidecar MicL (×1e-6): {sc_micl}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Try candidate body start offsets.
|
||||||
|
for off in (0x0f1f, 0x1057, 0x11f1, 0x1333, 0x1bde, 0x0d30):
|
||||||
|
print(f"=== body @ 0x{off:04x} ===")
|
||||||
|
body = buf[off:]
|
||||||
|
decoded = decode_waveform_v2(body)
|
||||||
|
if not decoded:
|
||||||
|
print(" decode_waveform_v2 returned None")
|
||||||
|
continue
|
||||||
|
for ch in ("Tran", "Vert", "Long", "MicL"):
|
||||||
|
arr = decoded.get(ch, [])
|
||||||
|
print(f" {ch}[{len(arr)}]: {arr[:20]}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
"""Verify decode_waveform_v2 against sidecar across all 2304 samples per channel."""
|
||||||
|
from __future__ import annotations
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(REPO))
|
||||||
|
|
||||||
|
from minimateplus.waveform_codec import decode_waveform_v2
|
||||||
|
from analysis_idf.recon import TARGET, TXT, load_sidecar_samples
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
buf = TARGET.read_bytes()
|
||||||
|
sc = load_sidecar_samples(TXT)
|
||||||
|
body = buf[0x0f1f:]
|
||||||
|
decoded = decode_waveform_v2(body)
|
||||||
|
|
||||||
|
print(f"Sidecar lengths: Tran={len(sc['Tran'])} Vert={len(sc['Vert'])} Long={len(sc['Long'])} MicL={len(sc['MicL'])}")
|
||||||
|
print(f"Decoded lengths: Tran={len(decoded['Tran'])} Vert={len(decoded['Vert'])} Long={len(decoded['Long'])} MicL={len(decoded['MicL'])}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
GEO_LSB = 0.0003 # in/s per count
|
||||||
|
for ch in ("Tran", "Vert", "Long"):
|
||||||
|
sc_counts = [int(round(v / GEO_LSB)) for v in sc[ch]]
|
||||||
|
dec = decoded[ch]
|
||||||
|
n = min(len(sc_counts), len(dec))
|
||||||
|
matches = sum(1 for i in range(n) if sc_counts[i] == dec[i])
|
||||||
|
first_mismatch = next((i for i in range(n) if sc_counts[i] != dec[i]), None)
|
||||||
|
print(f"{ch}: compared {n}, exact matches {matches} ({100*matches/n:.2f}%)")
|
||||||
|
if first_mismatch is not None:
|
||||||
|
i = first_mismatch
|
||||||
|
print(f" first mismatch at idx {i}: sidecar={sc_counts[i]} ({sc[ch][i]}), decoded={dec[i]}")
|
||||||
|
print(f" context sidecar[{i-2}..{i+5}]: {sc_counts[max(0,i-2):i+5]}")
|
||||||
|
print(f" context decoded[{i-2}..{i+5}]: {dec[max(0,i-2):i+5]}")
|
||||||
|
|
||||||
|
# MicL: find the multiplicative factor that fits
|
||||||
|
print()
|
||||||
|
print("=== MicL scale analysis ===")
|
||||||
|
sc_micl = sc["MicL"]
|
||||||
|
dec_micl = decoded["MicL"]
|
||||||
|
# Skip zero values when computing ratio
|
||||||
|
ratios = [sc_micl[i] / dec_micl[i] for i in range(min(50, len(sc_micl), len(dec_micl))) if dec_micl[i] != 0]
|
||||||
|
if ratios:
|
||||||
|
avg = sum(ratios) / len(ratios)
|
||||||
|
print(f" avg ratio sidecar/decoded over first 50 nonzero: {avg:.4e} (n={len(ratios)})")
|
||||||
|
print(f" ratios sample: {[f'{r:.4e}' for r in ratios[:6]]}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -6,11 +6,68 @@ Series IV event-file format. Sibling to
|
|||||||
Series III "Rosetta Stone") — this doc holds what we know so far and
|
Series III "Rosetta Stone") — this doc holds what we know so far and
|
||||||
the open questions still to crack.
|
the open questions still to crack.
|
||||||
|
|
||||||
**Status (2026-05-20):** ASCII text sidecar fully decoded (1,014
|
**Status (2026-05-28):** ASCII text sidecar fully decoded (1,014
|
||||||
sample files round-trip). Binary `.IDFH` / `.IDFW` codec
|
sample files round-trip). **Thor IDFW** binary now decodes via
|
||||||
**not yet implemented** — binaries are stored opaquely by
|
`micromate.idf_file.read_idf_file()` — reuses the BW segment-rotated
|
||||||
`WaveformStore.save_imported_idf`, with metadata sourced from the
|
block codec verbatim at fixed body offset `0x0f1f`; metadata (serial,
|
||||||
paired `.txt` sidecar.
|
timestamp, sample_rate, record_time, calibration_date) extracted from
|
||||||
|
the binary header. Sample fidelity is 87–99% byte-exact on quiet
|
||||||
|
events; loud events hit the BW codec's known walker-stops-early
|
||||||
|
limitation. Residual ~3% drift on per-sample deltas (likely a
|
||||||
|
Thor-specific 12-bit delta refinement not yet modelled).
|
||||||
|
|
||||||
|
**Thor IDFH histograms also decoded.** Body has one or more segments;
|
||||||
|
each 12-byte segment header `[length_be 2B][0a 00 00 00][00 NN][05 3f]`
|
||||||
|
introduces `N = (length - 10) // 72` interval records of 72 bytes
|
||||||
|
each. Each interval = 4 × 16-byte per-channel records:
|
||||||
|
`[int16 min][int16 max][int16 ??][uint16 halfp][2B 00][uint16 ??][2B 00][uint16 ??]`.
|
||||||
|
Geo peak `= max(|min|, |max|) / 32768 × 10` in/s (matches sidecar
|
||||||
|
~1.8%); freq `= 512 / halfp` Hz (None for halfp ≤ 5 → ">100"
|
||||||
|
sentinel). Corpus: **all 859 Thor IDFH files decode, 181,071
|
||||||
|
intervals**. Wired through `read_idf_file()` →
|
||||||
|
`save_imported_idf()` → sidecar's `extensions.idf_intervals`.
|
||||||
|
|
||||||
|
**Note on the BE9439 outliers in the example corpus:** Two files
|
||||||
|
(`BE9439_20200713131747.IDFW` and `BE9439_20200713124251.IDFH`) are
|
||||||
|
**Series III Blastware** binaries, not Thor. Provenance: TMI tried
|
||||||
|
to use Thor to manage auto-call-homes for Series III units; the
|
||||||
|
experiment didn't work out, but it did leave a few BW event files
|
||||||
|
in Thor's per-serial directory structure with `.IDFW`/`.IDFH`
|
||||||
|
extensions — Thor's forwarder applied its own naming convention to
|
||||||
|
the BW bodies it was relaying. Their header `10 00 01 80 00 00
|
||||||
|
Instantel STRT ff fe <end_key> <start_key>` is the BW SUB 5A STRT
|
||||||
|
record, not a Thor body preamble. The reader detects them by
|
||||||
|
signature and raises `NotImplementedError` pointing callers at
|
||||||
|
`read_blastware_file()`, which extracts BW-format peaks from them.
|
||||||
|
|
||||||
|
**Still NYI for Thor IDFH:** per-channel `int16 field4` (possibly
|
||||||
|
time-of-peak); the two uint16 fields (probably PVS contributions);
|
||||||
|
8-byte interval tail (PVS data); mic dB(L) exact conversion constant.
|
||||||
|
|
||||||
|
### Codec breakthroughs (2026-05-28)
|
||||||
|
|
||||||
|
- **Body offset is a fixed `0x0f1f`** across 151/154 corpus IDFW
|
||||||
|
files. Preceded by a 4-byte record-type marker (`46 00 00 00`)
|
||||||
|
+ magic preamble `00 02 00 [Tran[0] BE] [Tran[1] BE]`.
|
||||||
|
- **Sample stream is BW's segment-rotated block codec verbatim.**
|
||||||
|
Thor reuses `10 NN` (nibble), `20 NN` (int8), `00 NN` (RLE),
|
||||||
|
`30 NN` (packed12), `40 02` (segment header) tags with the same
|
||||||
|
semantics. Channel rotation Tran→Vert→Long→MicL.
|
||||||
|
- **Geo LSB = 0.0003 in/s** (not BW's 0.005), because Thor's 16-bit
|
||||||
|
ADC range maps to 10 in/s without the 16-count BW quantization step.
|
||||||
|
- **Mic ≈ 2.14×10⁻⁶ psi/count** (rough scale; refine after channel
|
||||||
|
block calibration constants are decoded).
|
||||||
|
- **BW compliance anchor `\xbe\x80\x00\x00\x00\x00` reappears at
|
||||||
|
IDFW offset 0x952** — sample_rate at anchor−6 (uint16 BE),
|
||||||
|
record_time at anchor+6 (float32 BE), same layout as BW.
|
||||||
|
- **Event timestamp at offset 0x97A** — 8 bytes `[day][month]
|
||||||
|
[year_be][unk][hour][min][sec]`. Stop-time mirrors at 0x982.
|
||||||
|
- **Serial as null-terminated ASCII at 0x14E**.
|
||||||
|
- **Calibration date** at 0x194–0x197 (day, month, year_be).
|
||||||
|
- Per-sample residual drift of ~3% suggests Thor encodes int8/nibble
|
||||||
|
deltas with an extra refinement bit that BW doesn't carry —
|
||||||
|
unsolved; errors resync within a few samples so cumulative impact
|
||||||
|
is small.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
+434
-48
@@ -1,64 +1,450 @@
|
|||||||
"""
|
"""
|
||||||
micromate/idf_file.py — placeholder for the Thor IDF binary codec.
|
micromate/idf_file.py — Thor IDF binary codec.
|
||||||
|
|
||||||
Thor's ``.IDFH`` (histogram) and ``.IDFW`` (waveform) event files are an
|
Decodes the Instantel Micromate Series IV ``.IDFW`` (waveform) and
|
||||||
Instantel proprietary binary format that has not yet been reverse-
|
``.IDFH`` (histogram) binary on-disk format. Sister module to
|
||||||
engineered. Today seismo-relay treats them as opaque blobs:
|
``minimateplus/event_file_io.py``.
|
||||||
``WaveformStore.save_imported_idf`` stores the bytes verbatim and reads
|
|
||||||
all device-authoritative metadata from the paired ``.IDFW.txt`` /
|
|
||||||
``.IDFH.txt`` ASCII sidecar (parsed by ``idf_ascii_report.py``).
|
|
||||||
|
|
||||||
When we crack the binary codec — same reverse-engineering playbook we
|
Status (2026-05-28):
|
||||||
used to byte-perfect-parse Series III BW files (see
|
|
||||||
``docs/instantel_protocol_reference.md`` and ``minimateplus/event_file_io.py``)
|
|
||||||
— this module will grow:
|
|
||||||
|
|
||||||
- ``read_idf_file(path) -> IdfEvent``
|
- **Genuine Series IV / Thor binaries** are all signed
|
||||||
Parse a ``.IDFW``/``.IDFH`` binary and return a fully populated
|
``00 12 01 00 00 00 Instantel\\0`` (sig-A in earlier notes). Two
|
||||||
``IdfEvent`` whose waveform-sample arrays come from the binary
|
Series III (Blastware) binaries appear in the example corpus
|
||||||
(the .txt sidecar's tabular sample block being a best-effort
|
(``BE9439_*``) — they share the ``.IDFW``/``.IDFH`` extension by
|
||||||
check). Lets us ingest Thor events even when the operator
|
filing convention but carry a BW STRT header (``10 00 01 80 00 00
|
||||||
hasn't enabled the .txt exporter — closing the
|
Instantel STRT...``) and are NOT Thor data. The reader detects
|
||||||
``had_report=False`` gap that the thor-watcher forwarder
|
them by signature and raises NotImplementedError pointing callers
|
||||||
currently tolerates as a known limitation.
|
at ``minimateplus.event_file_io.read_blastware_file()``.
|
||||||
|
- **IDFW waveform body** reuses the BW segment-rotated block codec
|
||||||
|
verbatim. Body always starts at file offset ``0x0f1f``. Samples
|
||||||
|
decoded via ``minimateplus.waveform_codec.decode_waveform_v2``
|
||||||
|
with 87–99% byte-exact match against ``.IDFW.txt`` sidecar (quiet
|
||||||
|
events). Loud events hit the BW codec's known walker-stops-early
|
||||||
|
limit. Residual ~3% drift on per-sample deltas — likely a
|
||||||
|
Thor-specific 12-bit delta refinement that BW's codec doesn't
|
||||||
|
model. Geo LSB = 0.0003 in/s; mic factor ~2.14e-6 psi/count.
|
||||||
|
- **IDFH histogram body**: 12-byte segment header
|
||||||
|
``[len_be 2B] 0a 00 00 00 [00 NN_counter] 05 3f`` introduces a
|
||||||
|
segment of ``N`` 72-byte interval records (``N = (len - 10) // 72``).
|
||||||
|
Each record holds 4 × 16-byte per-channel min/max/halfp + 8-byte
|
||||||
|
tail. Geo peaks via ``max(|min|, |max|) / 32768 × 10`` in/s
|
||||||
|
(matches sidecar within ~1.8%), freq via ``512 / halfp`` Hz.
|
||||||
|
**All 859 Thor IDFH files in the corpus decode (181,071 intervals).**
|
||||||
|
- Binary metadata directly extracted: serial, timestamp, sample_rate,
|
||||||
|
record_time, calibration_date. Other fields fall back to the paired
|
||||||
|
``.IDFW.txt`` / ``.IDFH.txt`` sidecar (consumed by
|
||||||
|
``WaveformStore.save_imported_idf``).
|
||||||
|
|
||||||
- ``write_idf_file(path, event)`` (eventually)
|
The full reverse-engineering writeup lives in
|
||||||
Round-trip event reconstruction, used for verifying the codec
|
``docs/idf_protocol_reference.md``.
|
||||||
against captured device files the way ``write_blastware_file``
|
|
||||||
verifies the Series III codec.
|
|
||||||
|
|
||||||
- Helpers for decoding the binary's per-channel sample arrays into
|
|
||||||
physical units, the per-event flash buffer's monitor-log records,
|
|
||||||
etc.
|
|
||||||
|
|
||||||
The reverse-engineering path: pair every ``.IDFW`` binary in
|
|
||||||
``thor-watcher/example-data/`` with its sibling ``.IDFW.txt``, treating
|
|
||||||
the txt's "Waveform Data Channels" block as ground-truth, and align
|
|
||||||
the binary's per-channel int16-or-similar arrays against it. Header
|
|
||||||
fields (sample rate, channel count, record time, timestamps) sit before
|
|
||||||
the sample block — same approach as the BW codec where ASCII strings
|
|
||||||
inside the binary (``Project:``, ``Client:``, etc.) anchored field
|
|
||||||
discovery.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import struct
|
||||||
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
from .models import IdfEvent
|
from minimateplus.waveform_codec import decode_waveform_v2
|
||||||
|
|
||||||
|
from .models import IdfEvent, IdfPeaks, IdfReport
|
||||||
|
|
||||||
|
|
||||||
def read_idf_file(path: Union[str, Path]) -> "IdfEvent":
|
# Genuine Series IV / Thor IDF binary signature: 6 bytes, then ASCII "Instantel".
|
||||||
"""Parse a Thor ``.IDFW``/``.IDFH`` binary into an ``IdfEvent``.
|
_THOR_PREFIX = b"\x00\x12\x01\x00\x00\x00"
|
||||||
|
# Stray Series III (Blastware) binaries that occasionally turn up in Thor
|
||||||
|
# corpus directories renamed to the .IDFW/.IDFH convention. Their header
|
||||||
|
# (`10 00 01 80 00 00 Instantel STRT ...`) is byte-for-byte a BW SUB 5A
|
||||||
|
# STRT record, not a Thor binary. Detected so we can refuse-and-route
|
||||||
|
# rather than mis-parse.
|
||||||
|
_BW_STRAY_PREFIX = b"\x10\x00\x01\x80\x00\x00"
|
||||||
|
_INSTANTEL_TAG = b"Instantel"
|
||||||
|
|
||||||
Not yet implemented. When implemented, this will be the canonical
|
# Constant body offset for sig-A IDFW files (verified across 151/154 corpus
|
||||||
entry point for reading Thor binaries — the ASCII sidecar parser
|
# files in tests/fixtures/THORDATA_example). The body is the segment-rotated
|
||||||
becomes an optional fast-path metadata supplement rather than the
|
# block stream consumed by decode_waveform_v2; bytes [0:3] are the magic
|
||||||
sole source of device-authoritative data.
|
# ``00 02 00`` preamble.
|
||||||
|
_BODY_START_SIG_A = 0x0F1F
|
||||||
|
|
||||||
|
# Geophone count → in/s, derived from sidecar ground truth: the smallest
|
||||||
|
# non-zero sample in 1,014-file corpus is 0.0003 in/s.
|
||||||
|
_GEO_LSB_IPS = 0.0003
|
||||||
|
|
||||||
|
# Microphone count → psi, derived from sidecar regression on 50 sample
|
||||||
|
# pairs from UM11719_20231219162723.IDFW (mic-heavy event).
|
||||||
|
_MIC_LSB_PSI = 2.14e-6
|
||||||
|
|
||||||
|
# IDFH histogram constants.
|
||||||
|
_IDFH_INTERVAL_SIZE = 72 # bytes per per-interval record
|
||||||
|
_IDFH_SEGMENT_HEADER = 10 # bytes: [len_be 2B][0a 00 00 00 4B][00 NN 2B][05 3f 2B]
|
||||||
|
_IDFH_SEGMENT_TAIL = 2 # bytes after the interval data block, before next marker
|
||||||
|
_IDFH_HALFP_FREQ_NUM = 512.0 # freq_hz = NUM / halfp; halfp ≤ 5 means ">100 Hz" sentinel
|
||||||
|
_IDFH_GEO_FULL_SCALE = 10.0 # in/s — Normal range
|
||||||
|
_IDFH_INT16_FS = 32768.0
|
||||||
|
_IDFH_CHANNELS = ("Tran", "Vert", "Long", "MicL")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Binary metadata extraction ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IdfBinaryMetadata:
|
||||||
|
"""Fields recoverable from the sig-A binary header (no .txt needed)."""
|
||||||
|
serial: Optional[str] = None
|
||||||
|
event_datetime: Optional[datetime.datetime] = None
|
||||||
|
sample_rate: Optional[int] = None
|
||||||
|
record_time_sec: Optional[float] = None
|
||||||
|
calibration_date: Optional[datetime.date] = None
|
||||||
|
|
||||||
|
|
||||||
|
def _read_ascii_z(buf: bytes, off: int, maxlen: int = 64) -> Optional[str]:
|
||||||
|
if off >= len(buf):
|
||||||
|
return None
|
||||||
|
end = buf.find(b"\x00", off, off + maxlen)
|
||||||
|
if end < 0:
|
||||||
|
end = min(off + maxlen, len(buf))
|
||||||
|
s = buf[off:end].decode("ascii", errors="replace").strip()
|
||||||
|
return s or None
|
||||||
|
|
||||||
|
|
||||||
|
def _decode_8byte_timestamp(buf: bytes, off: int) -> Optional[datetime.datetime]:
|
||||||
|
"""Layout: ``[day][month][year_hi][year_lo][unknown][hour][min][sec]``."""
|
||||||
|
if off + 8 > len(buf):
|
||||||
|
return None
|
||||||
|
day, mon, yh, yl, _unk, hr, mn, sc = buf[off : off + 8]
|
||||||
|
year = (yh << 8) | yl
|
||||||
|
if not (2015 <= year <= 2050 and 1 <= mon <= 12 and 1 <= day <= 31
|
||||||
|
and 0 <= hr < 24 and 0 <= mn < 60 and 0 <= sc < 60):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return datetime.datetime(year, mon, day, hr, mn, sc)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_binary_metadata(buf: bytes) -> IdfBinaryMetadata:
|
||||||
|
"""Pull serial/timestamp/sample_rate/record_time/calibration from the
|
||||||
|
sig-A binary header.
|
||||||
|
|
||||||
|
Field positions confirmed against UM11719_20231219162723.IDFW; stable
|
||||||
|
across the 151-file sig-A corpus.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError(
|
md = IdfBinaryMetadata()
|
||||||
"IDF binary codec not yet implemented; the .IDFW/.IDFH binary format "
|
|
||||||
"is undecoded. Use parse_idf_report() on the paired .txt sidecar "
|
# Serial: null-terminated ASCII at 0x14E.
|
||||||
"for device-authoritative metadata."
|
md.serial = _read_ascii_z(buf, 0x14E, maxlen=16)
|
||||||
|
|
||||||
|
# Sample rate + record time live in a BW-compatible compliance block.
|
||||||
|
# Locate the 6-byte anchor `be 80 00 00 00 00` and read offsets relative
|
||||||
|
# to it: anchor-6 = sample_rate uint16 BE; anchor+6 = record_time float32 BE.
|
||||||
|
anchor = buf.find(b"\xbe\x80\x00\x00\x00\x00", 0x800, 0xA00)
|
||||||
|
if anchor > 0:
|
||||||
|
sr_bytes = buf[anchor - 6 : anchor - 4]
|
||||||
|
if len(sr_bytes) == 2:
|
||||||
|
sr = int.from_bytes(sr_bytes, "big")
|
||||||
|
if sr in (256, 512, 1024, 2048, 4096):
|
||||||
|
md.sample_rate = sr
|
||||||
|
rt_bytes = buf[anchor + 6 : anchor + 10]
|
||||||
|
if len(rt_bytes) == 4:
|
||||||
|
try:
|
||||||
|
rt = struct.unpack(">f", rt_bytes)[0]
|
||||||
|
if 0.1 <= rt <= 600.0:
|
||||||
|
md.record_time_sec = float(rt)
|
||||||
|
except struct.error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Event timestamp: 8 bytes. Position differs between IDFW (0x97A) and
|
||||||
|
# IDFH (0x9F8); scan a small range and accept the first valid decode.
|
||||||
|
for off in (0x97A, 0x9F8):
|
||||||
|
ts = _decode_8byte_timestamp(buf, off)
|
||||||
|
if ts is not None:
|
||||||
|
md.event_datetime = ts
|
||||||
|
break
|
||||||
|
|
||||||
|
# Calibration date: day, month, year_be at 0x194-0x197.
|
||||||
|
if len(buf) > 0x197:
|
||||||
|
day, mon = buf[0x194], buf[0x195]
|
||||||
|
year = int.from_bytes(buf[0x196 : 0x198], "big")
|
||||||
|
if 1 <= mon <= 12 and 1 <= day <= 31 and 2015 <= year <= 2050:
|
||||||
|
try:
|
||||||
|
md.calibration_date = datetime.date(year, mon, day)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return md
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Sample decoder + unit conversion ───────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _decode_waveform_samples(buf: bytes) -> Optional[dict]:
|
||||||
|
"""Decode samples from the sig-A body starting at file offset 0x0f1f.
|
||||||
|
|
||||||
|
Returns the raw decoder counts dict — geo LSB = 0.0003 in/s, mic in
|
||||||
|
its own count unit (see :func:`mic_count_to_psi`). Returns None if
|
||||||
|
decoding fails.
|
||||||
|
"""
|
||||||
|
if len(buf) < _BODY_START_SIG_A + 8:
|
||||||
|
return None
|
||||||
|
body = buf[_BODY_START_SIG_A:]
|
||||||
|
return decode_waveform_v2(body)
|
||||||
|
|
||||||
|
|
||||||
|
def geo_count_to_ips(count: int) -> float:
|
||||||
|
"""Convert a Thor geo decoder count to in/s. LSB = 0.0003 in/s."""
|
||||||
|
return count * _GEO_LSB_IPS
|
||||||
|
|
||||||
|
|
||||||
|
def mic_count_to_psi(count: int) -> float:
|
||||||
|
"""Convert a Thor mic decoder count to psi. Scale derived from
|
||||||
|
regression over 50 sample pairs in UM11719_20231219162723.IDFW;
|
||||||
|
consistent to ~5%. Calibration constants from the channel block
|
||||||
|
can refine this once decoded.
|
||||||
|
"""
|
||||||
|
return count * _MIC_LSB_PSI
|
||||||
|
|
||||||
|
|
||||||
|
# ─── IDFH histogram decoder ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IdfhInterval:
|
||||||
|
"""One decoded histogram interval (typically one minute of monitoring)."""
|
||||||
|
offset: int # file byte offset of the 72-byte record
|
||||||
|
# Per-channel min/max ADC counts (int16 BE), half-period samples, peak count.
|
||||||
|
# Peak = max(|min|, |max|). freq_hz = 512/halfp (None if halfp ≤ 5 →
|
||||||
|
# ">100 Hz" sentinel; matches sidecar convention).
|
||||||
|
tran_min: int
|
||||||
|
tran_max: int
|
||||||
|
tran_halfp: int
|
||||||
|
vert_min: int
|
||||||
|
vert_max: int
|
||||||
|
vert_halfp: int
|
||||||
|
long_min: int
|
||||||
|
long_max: int
|
||||||
|
long_halfp: int
|
||||||
|
micl_min: int
|
||||||
|
micl_max: int
|
||||||
|
micl_halfp: int
|
||||||
|
|
||||||
|
def peak_count(self, channel: str) -> int:
|
||||||
|
mn = getattr(self, f"{channel.lower()}_min")
|
||||||
|
mx = getattr(self, f"{channel.lower()}_max")
|
||||||
|
return max(abs(mn), abs(mx))
|
||||||
|
|
||||||
|
def peak_ips(self, channel: str) -> float:
|
||||||
|
"""Convert peak count to in/s (geo channels only)."""
|
||||||
|
return self.peak_count(channel) / _IDFH_INT16_FS * _IDFH_GEO_FULL_SCALE
|
||||||
|
|
||||||
|
def freq_hz(self, channel: str) -> Optional[float]:
|
||||||
|
halfp = getattr(self, f"{channel.lower()}_halfp")
|
||||||
|
if halfp <= 5:
|
||||||
|
return None
|
||||||
|
return _IDFH_HALFP_FREQ_NUM / halfp
|
||||||
|
|
||||||
|
|
||||||
|
def _decode_idfh_interval(buf72: bytes, offset: int) -> IdfhInterval:
|
||||||
|
"""Decode one 72-byte interval record into per-channel min/max/halfp."""
|
||||||
|
import struct
|
||||||
|
fields = []
|
||||||
|
for i in range(4):
|
||||||
|
block = buf72[i * 16 : (i + 1) * 16]
|
||||||
|
mn = struct.unpack_from(">h", block, 0)[0]
|
||||||
|
mx = struct.unpack_from(">h", block, 2)[0]
|
||||||
|
# block[4:6] = int16 BE, role unknown (possibly time-of-peak)
|
||||||
|
halfp = struct.unpack_from(">H", block, 6)[0]
|
||||||
|
# block[10:12] and block[14:16] are uint16 BE with unknown semantics
|
||||||
|
# (likely sum / count contributions for the PVS computation).
|
||||||
|
fields.extend([mn, mx, halfp])
|
||||||
|
# Tail 8 bytes (buf72[64:72]) carry PVS-related data; not yet decoded.
|
||||||
|
return IdfhInterval(
|
||||||
|
offset=offset,
|
||||||
|
tran_min=fields[0], tran_max=fields[1], tran_halfp=fields[2],
|
||||||
|
vert_min=fields[3], vert_max=fields[4], vert_halfp=fields[5],
|
||||||
|
long_min=fields[6], long_max=fields[7], long_halfp=fields[8],
|
||||||
|
micl_min=fields[9], micl_max=fields[10], micl_halfp=fields[11],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def decode_idfh_body(buf: bytes) -> list:
|
||||||
|
"""Walk an IDFH file and decode every interval record.
|
||||||
|
|
||||||
|
The body has one or more segments; each segment header is 12 bytes:
|
||||||
|
``[length_be 2B][0a 00 00 00][00 NN_counter][05 3f]`` where ``length``
|
||||||
|
is bytes from the magic through the end of the interval block
|
||||||
|
(= 10 + 72 × n_intervals). Segments are separated by a 2-byte tail
|
||||||
|
+ next-segment 2-byte prefix (the bytes before the next length field).
|
||||||
|
Confirmed against the 859-file corpus (181,071 intervals decoded; 1
|
||||||
|
failure is the sig-B BE9439 file).
|
||||||
|
"""
|
||||||
|
intervals: list = []
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
j = buf.find(b"\x0a\x00\x00\x00", i)
|
||||||
|
if j < 0 or j < 2:
|
||||||
|
break
|
||||||
|
# Validate: [length_be][0a 00 00 00][00 NN][05 3f]
|
||||||
|
if buf[j + 4] != 0x00 or buf[j + 6 : j + 8] != b"\x05\x3f":
|
||||||
|
i = j + 1
|
||||||
|
continue
|
||||||
|
length = int.from_bytes(buf[j - 2 : j], "big")
|
||||||
|
n = (length - _IDFH_SEGMENT_HEADER) // _IDFH_INTERVAL_SIZE
|
||||||
|
if n <= 0:
|
||||||
|
i = j + 1
|
||||||
|
continue
|
||||||
|
header_start = j - 2
|
||||||
|
interval_start = header_start + _IDFH_SEGMENT_HEADER
|
||||||
|
for k in range(n):
|
||||||
|
off = interval_start + k * _IDFH_INTERVAL_SIZE
|
||||||
|
if off + _IDFH_INTERVAL_SIZE > len(buf):
|
||||||
|
break
|
||||||
|
chunk = buf[off : off + _IDFH_INTERVAL_SIZE]
|
||||||
|
intervals.append(_decode_idfh_interval(chunk, off))
|
||||||
|
# Advance past this segment + the 2-byte tail.
|
||||||
|
i = header_start + length + _IDFH_SEGMENT_TAIL
|
||||||
|
return intervals
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Top-level reader ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IdfReadResult:
|
||||||
|
"""Return type for :func:`read_idf_file`.
|
||||||
|
|
||||||
|
For waveforms (``.IDFW``), ``samples`` holds the per-channel sample
|
||||||
|
arrays in Thor decoder counts. For histograms (``.IDFH``),
|
||||||
|
``samples`` is empty and ``intervals`` holds the per-interval
|
||||||
|
record list (peaks, freqs).
|
||||||
|
"""
|
||||||
|
event: IdfEvent
|
||||||
|
samples: dict # {"Tran": [...], ...} for IDFW; empty for IDFH
|
||||||
|
binary_metadata: IdfBinaryMetadata
|
||||||
|
signature: str # always "thor" for now (sig-A genuine Thor)
|
||||||
|
intervals: Optional[list] = None # list[IdfhInterval] for IDFH; None for IDFW
|
||||||
|
|
||||||
|
|
||||||
|
def read_idf_file(path: Union[str, Path]) -> IdfReadResult:
|
||||||
|
"""Parse a Thor ``.IDFW`` binary into an ``IdfEvent`` + decoded samples.
|
||||||
|
|
||||||
|
Currently implements signature-A waveforms only. Signature-B
|
||||||
|
(old-firmware) and ``.IDFH`` histograms raise NotImplementedError;
|
||||||
|
use the paired ``.IDFW.txt`` / ``.IDFH.txt`` sidecar for those via
|
||||||
|
``parse_idf_report()``.
|
||||||
|
|
||||||
|
Returns an :class:`IdfReadResult`. The caller converts int sample
|
||||||
|
counts to physical units via :func:`geo_count_to_ips` /
|
||||||
|
:func:`mic_count_to_psi`.
|
||||||
|
"""
|
||||||
|
p = Path(path)
|
||||||
|
buf = p.read_bytes()
|
||||||
|
|
||||||
|
if len(buf) < 16 or buf[6:16] != _INSTANTEL_TAG + b"\x00":
|
||||||
|
raise ValueError(f"{p.name}: not an IDF file (missing Instantel magic)")
|
||||||
|
|
||||||
|
sig_prefix = buf[:6]
|
||||||
|
if sig_prefix == _THOR_PREFIX:
|
||||||
|
signature = "thor"
|
||||||
|
elif sig_prefix == _BW_STRAY_PREFIX:
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"{p.name}: file has a Series III (Blastware) STRT header in "
|
||||||
|
"an IDF-named container — not a Thor binary. Route through "
|
||||||
|
"minimateplus.event_file_io.read_blastware_file() instead "
|
||||||
|
"(peaks decode; samples & full metadata don't, but it's not "
|
||||||
|
"Thor data so the Thor codec doesn't apply)."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"{p.name}: unknown IDF signature {sig_prefix.hex()}")
|
||||||
|
|
||||||
|
is_histogram = p.suffix.upper() == ".IDFH"
|
||||||
|
md = extract_binary_metadata(buf)
|
||||||
|
|
||||||
|
if is_histogram:
|
||||||
|
intervals = decode_idfh_body(buf)
|
||||||
|
if not intervals:
|
||||||
|
raise ValueError(f"{p.name}: IDFH body decoded no intervals")
|
||||||
|
# Peaks: max across all intervals on each channel (per-channel max
|
||||||
|
# of stored max-magnitudes; sidecar's PPV row carries the same).
|
||||||
|
peak_tran = max((iv.peak_ips("Tran") for iv in intervals), default=0.0)
|
||||||
|
peak_vert = max((iv.peak_ips("Vert") for iv in intervals), default=0.0)
|
||||||
|
peak_long = max((iv.peak_ips("Long") for iv in intervals), default=0.0)
|
||||||
|
rep = IdfReport(
|
||||||
|
serial_number=md.serial,
|
||||||
|
event_type="Full Histogram",
|
||||||
|
event_datetime=md.event_datetime,
|
||||||
|
filename=p.name,
|
||||||
|
sample_rate=md.sample_rate,
|
||||||
|
record_time_sec=md.record_time_sec,
|
||||||
|
)
|
||||||
|
peaks = IdfPeaks(
|
||||||
|
transverse_ips=peak_tran,
|
||||||
|
vertical_ips=peak_vert,
|
||||||
|
longitudinal_ips=peak_long,
|
||||||
|
peak_vector_sum_ips=None,
|
||||||
|
mic_pspl_dbl=None,
|
||||||
|
)
|
||||||
|
event = IdfEvent(
|
||||||
|
serial=md.serial or "UNKNOWN",
|
||||||
|
timestamp=md.event_datetime or datetime.datetime(1970, 1, 1),
|
||||||
|
kind="Histogram",
|
||||||
|
filename=p.name,
|
||||||
|
sample_rate=md.sample_rate,
|
||||||
|
record_time_sec=md.record_time_sec,
|
||||||
|
peaks=peaks,
|
||||||
|
report=rep,
|
||||||
|
)
|
||||||
|
return IdfReadResult(
|
||||||
|
event=event,
|
||||||
|
samples={},
|
||||||
|
binary_metadata=md,
|
||||||
|
signature=signature,
|
||||||
|
intervals=intervals,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Waveform path.
|
||||||
|
decoded = _decode_waveform_samples(buf)
|
||||||
|
if decoded is None:
|
||||||
|
raise ValueError(f"{p.name}: waveform body codec failed")
|
||||||
|
|
||||||
|
rep = IdfReport(
|
||||||
|
serial_number=md.serial,
|
||||||
|
event_type="Full Waveform",
|
||||||
|
event_datetime=md.event_datetime,
|
||||||
|
filename=p.name,
|
||||||
|
sample_rate=md.sample_rate,
|
||||||
|
record_time_sec=md.record_time_sec,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _peak_ips(ch: str) -> float:
|
||||||
|
arr = decoded.get(ch, [])
|
||||||
|
return geo_count_to_ips(max((abs(v) for v in arr), default=0))
|
||||||
|
|
||||||
|
peaks = IdfPeaks(
|
||||||
|
transverse_ips=_peak_ips("Tran"),
|
||||||
|
vertical_ips=_peak_ips("Vert"),
|
||||||
|
longitudinal_ips=_peak_ips("Long"),
|
||||||
|
# PVS requires aligned per-sample √(T²+V²+L²); leave None — the
|
||||||
|
# sidecar carries it and the bridge picks it up if present.
|
||||||
|
peak_vector_sum_ips=None,
|
||||||
|
mic_pspl_dbl=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
event = IdfEvent(
|
||||||
|
serial=md.serial or "UNKNOWN",
|
||||||
|
timestamp=md.event_datetime or datetime.datetime(1970, 1, 1),
|
||||||
|
kind="Waveform",
|
||||||
|
filename=p.name,
|
||||||
|
sample_rate=md.sample_rate,
|
||||||
|
record_time_sec=md.record_time_sec,
|
||||||
|
peaks=peaks,
|
||||||
|
report=rep,
|
||||||
|
)
|
||||||
|
|
||||||
|
return IdfReadResult(
|
||||||
|
event=event,
|
||||||
|
samples=decoded,
|
||||||
|
binary_metadata=md,
|
||||||
|
signature=signature,
|
||||||
)
|
)
|
||||||
|
|||||||
+135
-23
@@ -467,21 +467,21 @@ class WaveformStore:
|
|||||||
Ingest a Thor (Micromate Series IV) IDF event file (`.IDFW` or
|
Ingest a Thor (Micromate Series IV) IDF event file (`.IDFW` or
|
||||||
`.IDFH`) produced by Thor's TXT exporter.
|
`.IDFH`) produced by Thor's TXT exporter.
|
||||||
|
|
||||||
Thor binaries are stored as opaque bytes — seismo-relay doesn't
|
|
||||||
yet decode the proprietary IDF binary format (codec slot lives
|
|
||||||
at ``micromate/idf_file.py``). Device-authoritative metadata
|
|
||||||
comes from the paired ``.IDFW.txt`` / ``.IDFH.txt`` sidecar
|
|
||||||
when supplied.
|
|
||||||
|
|
||||||
Workflow:
|
Workflow:
|
||||||
1. Parse the paired TXT report (when supplied) via
|
1. For sig-A `.IDFW` binaries, decode samples + binary metadata
|
||||||
``micromate.parse_idf_report`` → dict.
|
via ``micromate.idf_file.read_idf_file()``. Failure or
|
||||||
2. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``.
|
non-IDFW path falls through to the .txt-only flow.
|
||||||
3. Copy bytes verbatim into ``<root>/<serial>/<filename>``.
|
2. Parse the paired TXT report (when supplied) via
|
||||||
4. Bridge IdfEvent → ``minimateplus.Event`` (for the existing
|
``micromate.parse_idf_report`` → dict. TXT remains the
|
||||||
sidecar / DB insert machinery) via
|
source of truth for fields the binary doesn't yet supply
|
||||||
``IdfEvent.to_minimateplus_event(waveform_key)``.
|
(full peak set with ZC freq / Time of Peak, sensor self-check,
|
||||||
5. Write the ``.sfm.json`` sidecar with
|
firmware string, project strings).
|
||||||
|
3. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``.
|
||||||
|
4. Copy bytes verbatim into ``<root>/<serial>/<filename>``.
|
||||||
|
5. Bridge IdfEvent → ``minimateplus.Event`` and attach
|
||||||
|
``raw_samples`` from the binary decoder (when available).
|
||||||
|
6. Write the `.h5` clean-waveform file when samples decoded.
|
||||||
|
7. Write the ``.sfm.json`` sidecar with
|
||||||
``source.kind = "idf-import"`` and the full raw IDF report
|
``source.kind = "idf-import"`` and the full raw IDF report
|
||||||
under ``extensions.idf_report``.
|
under ``extensions.idf_report``.
|
||||||
|
|
||||||
@@ -490,7 +490,33 @@ class WaveformStore:
|
|||||||
"""
|
"""
|
||||||
from micromate import IdfEvent, parse_idf_report
|
from micromate import IdfEvent, parse_idf_report
|
||||||
|
|
||||||
# Parse the .txt sidecar (best-effort; non-fatal on failure).
|
# 1. Binary decode (sig-A IDFW and IDFH). Non-fatal: any failure
|
||||||
|
# leaves samples / binary metadata unfilled and we proceed with
|
||||||
|
# the .txt path as before.
|
||||||
|
idf_samples: Optional[dict] = None
|
||||||
|
idf_intervals: Optional[list] = None
|
||||||
|
binary_md = None
|
||||||
|
binary_peaks = None
|
||||||
|
is_histogram = False
|
||||||
|
try:
|
||||||
|
from micromate.idf_file import read_idf_file
|
||||||
|
res = read_idf_file(source_path)
|
||||||
|
idf_samples = res.samples or None
|
||||||
|
idf_intervals = res.intervals
|
||||||
|
is_histogram = res.intervals is not None
|
||||||
|
binary_md = res.binary_metadata
|
||||||
|
binary_peaks = res.event.peaks
|
||||||
|
except NotImplementedError:
|
||||||
|
# sig-B — codec doesn't handle this yet.
|
||||||
|
pass
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning(
|
||||||
|
"save_imported_idf: binary codec failed for %s: %s — "
|
||||||
|
"falling back to .txt-only ingest",
|
||||||
|
source_path.name, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Parse the .txt sidecar (best-effort; non-fatal on failure).
|
||||||
report_dict: dict = {}
|
report_dict: dict = {}
|
||||||
if idf_report_text is not None:
|
if idf_report_text is not None:
|
||||||
try:
|
try:
|
||||||
@@ -501,7 +527,38 @@ class WaveformStore:
|
|||||||
exc,
|
exc,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build the typed IdfEvent. Filename is authoritative for
|
# 3. Backfill report_dict with binary metadata for fields the
|
||||||
|
# .txt didn't supply. Binary takes precedence on tied fields
|
||||||
|
# where the binary is more reliable (timestamp, sample_rate),
|
||||||
|
# and fills in fields entirely missing from the .txt.
|
||||||
|
if binary_md is not None:
|
||||||
|
if binary_md.serial and not report_dict.get("serial_number"):
|
||||||
|
report_dict["serial_number"] = binary_md.serial
|
||||||
|
if binary_md.event_datetime and not report_dict.get("event_datetime"):
|
||||||
|
report_dict["event_datetime"] = binary_md.event_datetime
|
||||||
|
if binary_md.sample_rate and not report_dict.get("sample_rate"):
|
||||||
|
report_dict["sample_rate"] = binary_md.sample_rate
|
||||||
|
if binary_md.record_time_sec and not report_dict.get("record_time_sec"):
|
||||||
|
report_dict["record_time_sec"] = binary_md.record_time_sec
|
||||||
|
# Calibration date (binary) vs calibration text (.txt) cohabit
|
||||||
|
# under different keys; no overwrite needed.
|
||||||
|
if binary_md.event_datetime and not report_dict.get("event_type"):
|
||||||
|
report_dict["event_type"] = (
|
||||||
|
"Full Histogram" if is_histogram else "Full Waveform"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Binary-derived peaks fill in when the .txt didn't supply them.
|
||||||
|
# They're ~3% low vs the device-authoritative .txt values (residual
|
||||||
|
# codec drift), so .txt always wins when present.
|
||||||
|
if binary_peaks is not None:
|
||||||
|
if binary_peaks.transverse_ips and not report_dict.get("tran_ppv"):
|
||||||
|
report_dict["tran_ppv"] = binary_peaks.transverse_ips
|
||||||
|
if binary_peaks.vertical_ips and not report_dict.get("vert_ppv"):
|
||||||
|
report_dict["vert_ppv"] = binary_peaks.vertical_ips
|
||||||
|
if binary_peaks.longitudinal_ips and not report_dict.get("long_ppv"):
|
||||||
|
report_dict["long_ppv"] = binary_peaks.longitudinal_ips
|
||||||
|
|
||||||
|
# 4. Build the typed IdfEvent. Filename is authoritative for
|
||||||
# (serial, timestamp, kind); the report's event_datetime takes
|
# (serial, timestamp, kind); the report's event_datetime takes
|
||||||
# precedence over the filename timestamp inside from_report().
|
# precedence over the filename timestamp inside from_report().
|
||||||
idf_event = IdfEvent.from_report(report_dict, source_path.name)
|
idf_event = IdfEvent.from_report(report_dict, source_path.name)
|
||||||
@@ -511,7 +568,7 @@ class WaveformStore:
|
|||||||
# serial that overrides a misnamed export).
|
# serial that overrides a misnamed export).
|
||||||
serial = serial_hint or idf_event.serial or "UNKNOWN"
|
serial = serial_hint or idf_event.serial or "UNKNOWN"
|
||||||
|
|
||||||
# Filesystem write.
|
# 5. Filesystem write of binary bytes.
|
||||||
filename = source_path.name
|
filename = source_path.name
|
||||||
bw_path = self._serial_dir(serial) / filename
|
bw_path = self._serial_dir(serial) / filename
|
||||||
bw_path.write_bytes(idf_bytes)
|
bw_path.write_bytes(idf_bytes)
|
||||||
@@ -523,13 +580,41 @@ class WaveformStore:
|
|||||||
# surrogate — every distinct binary maps to a distinct row.
|
# surrogate — every distinct binary maps to a distinct row.
|
||||||
waveform_key = bytes.fromhex(sha256)[:16]
|
waveform_key = bytes.fromhex(sha256)[:16]
|
||||||
|
|
||||||
# Bridge to minimateplus.Event for the existing sidecar / DB
|
# 6. Bridge to minimateplus.Event for the existing sidecar / DB
|
||||||
# insert paths. See IdfEvent.to_minimateplus_event() for the
|
# insert paths. See IdfEvent.to_minimateplus_event() for the
|
||||||
# caveats of this bridge (mic units, missing fields → sidecar).
|
# caveats of this bridge (mic units, missing fields → sidecar).
|
||||||
ev = idf_event.to_minimateplus_event(waveform_key)
|
ev = idf_event.to_minimateplus_event(waveform_key)
|
||||||
|
|
||||||
# Write the sidecar. Source kind "idf-import" was added to the
|
# Attach the decoded sample arrays. Thor's decoder counts use
|
||||||
# allow-list in event_file_io.event_to_sidecar_dict for this.
|
# LSB = 0.0003 in/s for geo (vs BW's 16-count units at 0.005 in/s)
|
||||||
|
# — the .h5 writer's geo_range="normal" yields LSB = 10/32768
|
||||||
|
# ≈ 0.000305 in/s, so plotted samples come out ~1.7% high.
|
||||||
|
# Acceptable known offset; refine with a Thor-aware h5 path later.
|
||||||
|
if idf_samples is not None:
|
||||||
|
ev.raw_samples = idf_samples
|
||||||
|
n_samples = max((len(idf_samples.get(ch, [])) for ch in ("Tran", "Vert", "Long", "MicL")), default=0)
|
||||||
|
ev.total_samples = ev.total_samples or n_samples
|
||||||
|
|
||||||
|
# 7. Write the .h5 clean-waveform file when we actually have samples.
|
||||||
|
# Histograms (IDFH) don't have waveform samples — skip h5 for those.
|
||||||
|
hdf5_filename: Optional[str] = None
|
||||||
|
if idf_samples is not None and not is_histogram:
|
||||||
|
hdf5_path = self.hdf5_path_for(serial, filename)
|
||||||
|
try:
|
||||||
|
event_hdf5.write_event_hdf5(
|
||||||
|
hdf5_path, ev,
|
||||||
|
serial=serial,
|
||||||
|
geo_range="normal", # Thor's geo full scale is also 10 in/s (Normal)
|
||||||
|
source_kind="idf-import",
|
||||||
|
)
|
||||||
|
hdf5_filename = hdf5_path.name
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning(
|
||||||
|
"save_imported_idf: HDF5 write failed for %s: %s — continuing without .h5",
|
||||||
|
hdf5_path, exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 8. Write the sidecar. Source kind "idf-import" is on the allow-list.
|
||||||
sidecar_path = self.sidecar_path_for(serial, filename)
|
sidecar_path = self.sidecar_path_for(serial, filename)
|
||||||
existing_review = None
|
existing_review = None
|
||||||
if sidecar_path.exists():
|
if sidecar_path.exists():
|
||||||
@@ -554,19 +639,46 @@ class WaveformStore:
|
|||||||
# Time of Peak, sensor self-check, calibration, firmware).
|
# Time of Peak, sensor self-check, calibration, firmware).
|
||||||
if report_dict:
|
if report_dict:
|
||||||
sidecar["extensions"]["idf_report"] = report_dict
|
sidecar["extensions"]["idf_report"] = report_dict
|
||||||
|
# For histograms, also stash the binary-decoded per-interval
|
||||||
|
# records so the UI / report layer doesn't need to re-walk the
|
||||||
|
# IDFH file at render time.
|
||||||
|
if idf_intervals is not None:
|
||||||
|
sidecar["extensions"]["idf_intervals"] = [
|
||||||
|
{
|
||||||
|
"offset": iv.offset,
|
||||||
|
"tran_peak": iv.peak_count("Tran"),
|
||||||
|
"tran_halfp": iv.tran_halfp,
|
||||||
|
"tran_freq": iv.freq_hz("Tran"),
|
||||||
|
"vert_peak": iv.peak_count("Vert"),
|
||||||
|
"vert_halfp": iv.vert_halfp,
|
||||||
|
"vert_freq": iv.freq_hz("Vert"),
|
||||||
|
"long_peak": iv.peak_count("Long"),
|
||||||
|
"long_halfp": iv.long_halfp,
|
||||||
|
"long_freq": iv.freq_hz("Long"),
|
||||||
|
"mic_peak": iv.peak_count("MicL"),
|
||||||
|
"mic_halfp": iv.micl_halfp,
|
||||||
|
"mic_freq": iv.freq_hz("MicL"),
|
||||||
|
}
|
||||||
|
for iv in idf_intervals
|
||||||
|
]
|
||||||
event_file_io.write_sidecar(sidecar_path, sidecar)
|
event_file_io.write_sidecar(sidecar_path, sidecar)
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"WaveformStore.save_imported_idf serial=%s filename=%s filesize=%d "
|
"WaveformStore.save_imported_idf serial=%s filename=%s filesize=%d "
|
||||||
"report_attached=%s",
|
"kind=%s report_attached=%s binary_decoded=%s h5=%s intervals=%d",
|
||||||
serial, filename, filesize, bool(report_dict),
|
serial, filename, filesize,
|
||||||
|
"histogram" if is_histogram else "waveform",
|
||||||
|
bool(report_dict),
|
||||||
|
(idf_samples is not None) or (idf_intervals is not None),
|
||||||
|
hdf5_filename or "(skipped)",
|
||||||
|
len(idf_intervals) if idf_intervals else 0,
|
||||||
)
|
)
|
||||||
return ev, {
|
return ev, {
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
"filesize": filesize,
|
"filesize": filesize,
|
||||||
"sha256": sha256,
|
"sha256": sha256,
|
||||||
"a5_pickle_filename": None,
|
"a5_pickle_filename": None,
|
||||||
"hdf5_filename": None,
|
"hdf5_filename": hdf5_filename,
|
||||||
"sidecar_filename": sidecar_path.name,
|
"sidecar_filename": sidecar_path.name,
|
||||||
"serial": serial,
|
"serial": serial,
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user