minimateplus: wire read_blastware_file to verified body codec
`read_blastware_file()` was still calling `_decode_samples_4ch_int16_le`
(the retracted int16-LE-interleaved hypothesis) on the body bytes,
producing ±32K noise on every channel of every BW file read from disk.
This was the path watcher-forwarded events take into the system
(via the import endpoint → save_imported_bw → read_blastware_file,
since the watcher doesn't ship A5 frames), so every .h5 sidecar
generated for a forwarded event has been wrong since the feature
shipped.
The fix is mechanical: pass the body bytes straight to
`waveform_codec.decode_waveform_v2()` and run the result through
`decoded_to_adc_counts()` for the 16x geo scaling. The body already
starts with the codec's exact 7-byte preamble `00 02 00 [Tran[0] BE]
[Tran[1] BE]` — confirmed by `body[:3].hex()` across all 9 fixture
events. No body-slice adjustment needed.
If the codec returns None (truncated/malformed file, synthetic test
input with no real waveform), fall back to empty channels with a log
warning. The rest of the event (timestamp, waveform_key, project
strings, sensor_location, peaks-from-samples=0) is still recoverable.
Verified against the bundled fixture corpus:
V70 Tran/Vert/Long 3328/3328 sample-sets match .TXT ground truth
within the 0.005 in/s display quantum, every row
6S0/RG0/AB0/470 (5-8-26) 3328/2304/1280/1280 samples; Vert PPVs
match BW's own report within 0.02 in/s
JQ0 3328 samples, Vert PPV 3.384 vs BW 3.465
SP0/SS0/SV0 (loud events) 3072–3328 samples; known walker
tail-truncation 1–7 samples per channel, samples reached are
byte-exact
Existing `test_read_blastware_file_round_trip` (synthetic empty event)
continues to pass thanks to the None-fallback. Codec verify scripts
(`analysis/verify_quiet_bundle.py`, `analysis/verify_full_decode.py`)
re-run unchanged.
Added two regression-lock tests in tests/test_event_file_io.py:
- test_read_blastware_file_decodes_via_codec[6 fixtures]
— verifies sample count + Vert PPV per fixture
- test_read_blastware_file_v70_samples_match_txt_truth
— verifies every one of V70's 3328 sample-sets across Tran/Vert/Long
matches the .TXT ground truth row-by-row within 0.003 in/s
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -294,6 +294,97 @@ def test_read_blastware_file_round_trip(tmp_path: Path):
|
||||
assert parsed.peak_values.peak_vector_sum == 0.0
|
||||
|
||||
|
||||
_BW_CODEC_FIXTURES = [
|
||||
# (path, expected_n_samples_per_channel, BW-reported Vert PPV in/s for sanity)
|
||||
("tests/fixtures/decode-re-5-8-26/event-a/M529LKVQ.6S0", 3328, 0.780),
|
||||
("tests/fixtures/decode-re-5-8-26/event-b/M529LK5Q.RG0", 2304, 0.505),
|
||||
("tests/fixtures/decode-re-5-8-26/event-c/M529LK44.AB0", 1280, 0.610),
|
||||
("tests/fixtures/decode-re-5-8-26/event-d/M529LK2V.470", 1280, 0.565),
|
||||
("tests/fixtures/5-11-26/M529LL1L.V70", 3328, 0.010),
|
||||
("tests/fixtures/5-11-26/M529LL1L.JQ0", 3328, 3.465),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path,expected_n,expected_ppv", _BW_CODEC_FIXTURES)
|
||||
def test_read_blastware_file_decodes_via_codec(path: str, expected_n: int, expected_ppv: float):
|
||||
"""Regression lock: ``read_blastware_file()`` must use the verified
|
||||
waveform-body codec (``minimateplus.waveform_codec``), not the
|
||||
retracted int16-LE assumption.
|
||||
|
||||
Verifies against the real BW fixture corpus: every event in the
|
||||
bundled fixtures must produce the expected per-channel sample count
|
||||
and a Vert PPV close to BW's own reported value. Catches any
|
||||
accidental regression of the body decoder back to the old
|
||||
``_decode_samples_4ch_int16_le`` path (which produced ±32K noise
|
||||
on every event, giving wildly wrong PPVs).
|
||||
"""
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
full_path = repo_root / path
|
||||
if not full_path.exists():
|
||||
pytest.skip(f"fixture missing: {full_path}")
|
||||
|
||||
ev = event_file_io.read_blastware_file(full_path)
|
||||
assert ev.raw_samples is not None
|
||||
for ch in ("Tran", "Vert", "Long"):
|
||||
assert len(ev.raw_samples[ch]) == expected_n, (
|
||||
f"{ch}: expected {expected_n} samples, got {len(ev.raw_samples[ch])}"
|
||||
)
|
||||
|
||||
# PPV check: the codec produces decoded samples in 1-count ADC units;
|
||||
# _peaks_from_samples scales by GEO_NORMAL_FS_INS / 32767. BW's own
|
||||
# PPV is computed at slightly different precision/interpolation, so
|
||||
# we allow a 0.2 in/s tolerance — well under the broken-decoder
|
||||
# signature (which would produce ~10 in/s saturation).
|
||||
assert ev.peak_values is not None
|
||||
assert abs(ev.peak_values.vert - expected_ppv) < 0.2, (
|
||||
f"Vert PPV {ev.peak_values.vert:.3f} differs from BW's "
|
||||
f"{expected_ppv:.3f} by >0.2 in/s — codec regression?"
|
||||
)
|
||||
|
||||
|
||||
def test_read_blastware_file_v70_samples_match_txt_truth():
|
||||
"""Strongest regression lock: every one of V70's 3328 decoded
|
||||
sample-sets must match the .TXT ground truth table within the
|
||||
0.005 in/s display quantum."""
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
bw_path = repo_root / "tests/fixtures/5-11-26/M529LL1L.V70"
|
||||
txt_path = repo_root / "tests/fixtures/5-11-26/M529LL1L.V70.TXT"
|
||||
if not bw_path.exists() or not txt_path.exists():
|
||||
pytest.skip(f"V70 fixture missing")
|
||||
|
||||
import re
|
||||
ev = event_file_io.read_blastware_file(bw_path)
|
||||
|
||||
# Parse .TXT ground truth sample table
|
||||
text = txt_path.read_text()
|
||||
lines = text.splitlines()
|
||||
hdr_idx = next(i for i, line in enumerate(lines)
|
||||
if re.match(r"^Tran\s+Vert\s+Long\s+MicL?", line.strip()))
|
||||
truth = []
|
||||
for line in lines[hdr_idx + 1:]:
|
||||
parts = line.strip().split()
|
||||
if len(parts) != 4:
|
||||
continue
|
||||
try:
|
||||
truth.append([float(x) for x in parts])
|
||||
except ValueError:
|
||||
continue
|
||||
assert len(truth) == 3328, f"expected 3328 truth rows, got {len(truth)}"
|
||||
|
||||
def adc_to_ins(count):
|
||||
return count / 32767.0 * 10.0
|
||||
|
||||
for i, truth_row in enumerate(truth):
|
||||
for ch_idx, ch_name in enumerate(("Tran", "Vert", "Long")):
|
||||
decoded_ips = adc_to_ins(ev.raw_samples[ch_name][i])
|
||||
truth_ips = truth_row[ch_idx]
|
||||
# 0.003 in/s tolerance: <0.005 quantum + small float precision room
|
||||
assert abs(decoded_ips - truth_ips) < 0.003, (
|
||||
f"row {i} {ch_name}: decoded {decoded_ips:+.4f} vs "
|
||||
f"truth {truth_ips:+.4f} (delta {decoded_ips - truth_ips:+.4f})"
|
||||
)
|
||||
|
||||
|
||||
def test_save_imported_bw_with_paired_report(tmp_path: Path):
|
||||
"""save_imported_bw + a paired BW ASCII report fold the report's
|
||||
rich derived fields into the sidecar. This is the daemon-forwarded
|
||||
|
||||
Reference in New Issue
Block a user