From bee118506b9b0df5fd18d10670f9cb4ac99d43a4 Mon Sep 17 00:00:00 2001 From: serversdown Date: Fri, 29 May 2026 20:09:54 +0000 Subject: [PATCH] fix(idf): decode from in-memory bytes during ingest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug shipped in v0.21.0: save_imported_idf called read_idf_file() with `source_path` (a bare filename like "UM12947_….IDFW") BEFORE writing the binary to disk. The codec did Path(path).read_bytes() which resolved relative to /app and hit FileNotFoundError. The error was caught + logged as a warning, and ingest fell back to .txt-only — events still landed in the DB but lost the bw_report block + .h5 waveform that the codec was supposed to produce. Observed during a full re-forward from thor-watcher on 2026-05-29: every Thor event logged "binary codec failed for X: [Errno 2] No such file or directory" and got binary_decoded=False. Fix: - read_idf_file() gains a `data: Optional[bytes]` kwarg. When supplied, skips the disk read and decodes the provided bytes directly. `path` stays required (used for filename in error messages + .IDFH vs .IDFW suffix detection); only the read is conditional. Backward compatible — existing positional callers (CLI scripts, tests) continue to work unchanged. - save_imported_idf passes `data=idf_bytes` since the bytes are already in memory from the multipart upload. Filesystem write still happens at step 5 of the existing flow; codec just no longer depends on it. Verified end-to-end against UM11719_20231219162723.IDFW from the example-data corpus: ingest endpoint returns inserted=1, log line shows binary_decoded=True + h5=...IDFW.h5, no warnings. Re-forward existing Thor events from thor-watcher after deploy to backfill the bw_report block — UPSERT preserves review state. Co-Authored-By: Claude Opus 4.7 (1M context) --- micromate/idf_file.py | 13 +++++++++++-- sfm/waveform_store.py | 7 ++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/micromate/idf_file.py b/micromate/idf_file.py index bee7555..203a26e 100644 --- a/micromate/idf_file.py +++ b/micromate/idf_file.py @@ -326,7 +326,11 @@ class IdfReadResult: intervals: Optional[list] = None # list[IdfhInterval] for IDFH; None for IDFW -def read_idf_file(path: Union[str, Path]) -> IdfReadResult: +def read_idf_file( + path: Union[str, Path], + *, + data: Optional[bytes] = None, +) -> IdfReadResult: """Parse a Thor ``.IDFW`` binary into an ``IdfEvent`` + decoded samples. Currently implements signature-A waveforms only. Signature-B @@ -337,9 +341,14 @@ def read_idf_file(path: Union[str, Path]) -> IdfReadResult: Returns an :class:`IdfReadResult`. The caller converts int sample counts to physical units via :func:`geo_count_to_ips` / :func:`mic_count_to_psi`. + + ``path`` is used for filename in error messages and ``.IDFH`` vs + ``.IDFW`` suffix detection. When ``data`` is supplied the disk + read is skipped — useful for ingest paths that already have the + bytes in memory and where the file may not exist on disk yet. """ p = Path(path) - buf = p.read_bytes() + buf = data if data is not None else p.read_bytes() if len(buf) < 16 or buf[6:16] != _INSTANTEL_TAG + b"\x00": raise ValueError(f"{p.name}: not an IDF file (missing Instantel magic)") diff --git a/sfm/waveform_store.py b/sfm/waveform_store.py index c4861a1..3b2ba42 100644 --- a/sfm/waveform_store.py +++ b/sfm/waveform_store.py @@ -500,7 +500,12 @@ class WaveformStore: is_histogram = False try: from micromate.idf_file import read_idf_file - res = read_idf_file(source_path) + # Pass idf_bytes through `data=` — at this point in the flow + # the binary hasn't been written to disk yet, so the codec + # can't read from source_path. We still pass source_path so + # the codec has the filename for error messages + .IDFH + # suffix detection. + res = read_idf_file(source_path, data=idf_bytes) idf_samples = res.samples or None idf_intervals = res.intervals is_histogram = res.intervals is not None