minimateplus: wire read_blastware_file to verified body codec #24
@@ -27,6 +27,7 @@ from typing import Optional, Union
|
|||||||
from .models import Event, PeakValues, ProjectInfo, Timestamp
|
from .models import Event, PeakValues, ProjectInfo, Timestamp
|
||||||
from . import blastware_file as _bw # avoid circular reference at module load
|
from . import blastware_file as _bw # avoid circular reference at module load
|
||||||
from .bw_ascii_report import BwAsciiReport
|
from .bw_ascii_report import BwAsciiReport
|
||||||
|
from .waveform_codec import decode_waveform_v2, decoded_to_adc_counts
|
||||||
|
|
||||||
# Reference pressure for dB(L) → psi conversion (20 µPa expressed in psi).
|
# Reference pressure for dB(L) → psi conversion (20 µPa expressed in psi).
|
||||||
# Same constant as sfm/sfm_webapp.html so server-side and browser-side
|
# Same constant as sfm/sfm_webapp.html so server-side and browser-side
|
||||||
@@ -47,7 +48,7 @@ SIDECAR_KIND = "sfm.event"
|
|||||||
# bumped without a `pip install` re-run — leading to confusing stale
|
# bumped without a `pip install` re-run — leading to confusing stale
|
||||||
# version stamps in sidecars. Bump this constant and CHANGELOG.md
|
# version stamps in sidecars. Bump this constant and CHANGELOG.md
|
||||||
# together at release time.
|
# together at release time.
|
||||||
TOOL_VERSION = "0.16.1"
|
TOOL_VERSION = "0.20.0"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Best-effort: prefer the installed metadata when it's NEWER than the
|
# Best-effort: prefer the installed metadata when it's NEWER than the
|
||||||
@@ -755,11 +756,28 @@ def read_blastware_file(path: Union[str, Path]) -> Event:
|
|||||||
ts1 = _bw._decode_ts_be(footer[2:10])
|
ts1 = _bw._decode_ts_be(footer[2:10])
|
||||||
ts2 = _bw._decode_ts_be(footer[10:18])
|
ts2 = _bw._decode_ts_be(footer[10:18])
|
||||||
|
|
||||||
# Body: first 6 bytes are the preamble (00 00 ff ff ff ff). Strip
|
# Body: decode via the verified BW waveform-body codec. The body
|
||||||
# them before decoding samples. Any trailing tail past the last
|
# starts with the codec's 7-byte preamble ``00 02 00 [Tran[0] BE]
|
||||||
# full sample-set is silently truncated by _decode_samples_4ch.
|
# [Tran[1] BE]`` and continues with the tagged-block stream the codec
|
||||||
sample_bytes = body[6:] if body[:6].hex() in ("0000ffffffff", "0000FFFFFFFF") else body
|
# walks. See ``minimateplus/waveform_codec.py`` + ``docs/waveform_codec_re_status.md``
|
||||||
samples = _decode_samples_4ch_int16_le(sample_bytes)
|
# for the full format spec; the historical int16-LE assumption that
|
||||||
|
# ``_decode_samples_4ch_int16_le`` implements was retracted 2026-05-08
|
||||||
|
# (see ``docs/instantel_protocol_reference.md`` §7.6.1).
|
||||||
|
#
|
||||||
|
# If decode fails (malformed file, truncated body, synthetic test
|
||||||
|
# input), fall back to empty channels — the rest of the event
|
||||||
|
# (timestamp, waveform_key, project strings) is still recoverable
|
||||||
|
# and useful. The peaks-from-samples helper handles empty input
|
||||||
|
# gracefully.
|
||||||
|
decoded = decode_waveform_v2(body)
|
||||||
|
if decoded is None:
|
||||||
|
log.warning(
|
||||||
|
"%s: waveform body codec failed to decode (body starts %s) — "
|
||||||
|
"raw_samples will be empty", path, body[:8].hex(" "),
|
||||||
|
)
|
||||||
|
samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
|
||||||
|
else:
|
||||||
|
samples = decoded_to_adc_counts(decoded)
|
||||||
|
|
||||||
# Metadata strings (label-anchored search across the body).
|
# Metadata strings (label-anchored search across the body).
|
||||||
project = _find_first_string(body, b"Project:")
|
project = _find_first_string(body, b"Project:")
|
||||||
|
|||||||
@@ -12,8 +12,20 @@ Walks `<store_root>/<serial>/<filename>` and for each BW event file:
|
|||||||
parsing the BW binary directly (peaks computed from samples).
|
parsing the BW binary directly (peaks computed from samples).
|
||||||
|
|
||||||
Clean waveform (.h5):
|
Clean waveform (.h5):
|
||||||
- Skip when <filename>.h5 already exists (idempotent).
|
- Regenerated whenever the sidecar is regenerated (sha mismatch
|
||||||
- Else write from .a5.pkl (preferred) or BW binary parse (fallback).
|
OR sidecar.source.tool_version < current TOOL_VERSION OR --force).
|
||||||
|
The .h5 and the sidecar both come from the same decoder output,
|
||||||
|
so if the sidecar is stale the .h5 is too.
|
||||||
|
- Written when missing.
|
||||||
|
- --skip-hdf5 turns off all .h5 writes.
|
||||||
|
|
||||||
|
Typical use after a decoder upgrade:
|
||||||
|
1. Pull the new seismo-relay code (which bumped TOOL_VERSION).
|
||||||
|
2. Run this script — every sidecar with an older tool_version
|
||||||
|
stamp regenerates, and the associated .h5 cascade-regenerates.
|
||||||
|
3. Operator review state (review.false_trigger, notes, reviewer)
|
||||||
|
and the sidecar's extensions block are preserved across the
|
||||||
|
regen.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python scripts/backfill_sidecars.py [--store-root PATH]
|
python scripts/backfill_sidecars.py [--store-root PATH]
|
||||||
@@ -123,6 +135,12 @@ def main(argv=None) -> int:
|
|||||||
# the sidecar was written by a build that includes any
|
# the sidecar was written by a build that includes any
|
||||||
# decoder fixes shipped since).
|
# decoder fixes shipped since).
|
||||||
# Either part failing → regenerate. --force bypasses both.
|
# Either part failing → regenerate. --force bypasses both.
|
||||||
|
#
|
||||||
|
# Tracks whether we're regenerating the sidecar this iteration
|
||||||
|
# so the .h5 logic below knows to refresh that too — staleness
|
||||||
|
# of the sidecar implies staleness of the derived .h5 (both
|
||||||
|
# come out of the same decoder).
|
||||||
|
sidecar_stale = True
|
||||||
if sidecar_path.exists() and not args.force:
|
if sidecar_path.exists() and not args.force:
|
||||||
try:
|
try:
|
||||||
existing = event_file_io.read_sidecar(sidecar_path)
|
existing = event_file_io.read_sidecar(sidecar_path)
|
||||||
@@ -136,6 +154,7 @@ def main(argv=None) -> int:
|
|||||||
ver_ok = _vt(src_ver) >= _vt(event_file_io.TOOL_VERSION)
|
ver_ok = _vt(src_ver) >= _vt(event_file_io.TOOL_VERSION)
|
||||||
if sha_ok and ver_ok:
|
if sha_ok and ver_ok:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
|
sidecar_stale = False
|
||||||
continue
|
continue
|
||||||
if sha_ok and not ver_ok:
|
if sha_ok and not ver_ok:
|
||||||
log.info(
|
log.info(
|
||||||
@@ -281,12 +300,23 @@ def main(argv=None) -> int:
|
|||||||
extensions=preserved_ext,
|
extensions=preserved_ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Also emit the .h5 clean-waveform file when missing OR when
|
# Also emit the .h5 clean-waveform file when:
|
||||||
# --force was passed (so a re-backfill picks up decoder fixes).
|
# - it's missing, OR
|
||||||
|
# - --force was passed, OR
|
||||||
|
# - the sidecar is being regenerated this iteration
|
||||||
|
# (sha mismatch / tool_version too old). The .h5 and
|
||||||
|
# the sidecar are both derived from the same decoder
|
||||||
|
# output, so if the sidecar is stale, so is the .h5.
|
||||||
|
# This is the path that recovers from the broken-
|
||||||
|
# int16-LE codec era — bumping TOOL_VERSION to 0.20.0+
|
||||||
|
# marks every pre-codec sidecar stale, which now
|
||||||
|
# correctly cascades to .h5 regeneration too.
|
||||||
hdf5_path = store.hdf5_path_for(serial, path.name)
|
hdf5_path = store.hdf5_path_for(serial, path.name)
|
||||||
hdf5_filename = hdf5_path.name if hdf5_path.exists() else None
|
hdf5_filename = hdf5_path.name if hdf5_path.exists() else None
|
||||||
hdf5_action = "kept"
|
hdf5_action = "kept"
|
||||||
need_h5 = not args.skip_hdf5 and (args.force or not hdf5_path.exists())
|
need_h5 = not args.skip_hdf5 and (
|
||||||
|
args.force or not hdf5_path.exists() or sidecar_stale
|
||||||
|
)
|
||||||
if need_h5:
|
if need_h5:
|
||||||
if args.dry_run:
|
if args.dry_run:
|
||||||
hdf5_action = "would (re)write"
|
hdf5_action = "would (re)write"
|
||||||
|
|||||||
@@ -294,6 +294,97 @@ def test_read_blastware_file_round_trip(tmp_path: Path):
|
|||||||
assert parsed.peak_values.peak_vector_sum == 0.0
|
assert parsed.peak_values.peak_vector_sum == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
_BW_CODEC_FIXTURES = [
|
||||||
|
# (path, expected_n_samples_per_channel, BW-reported Vert PPV in/s for sanity)
|
||||||
|
("tests/fixtures/decode-re-5-8-26/event-a/M529LKVQ.6S0", 3328, 0.780),
|
||||||
|
("tests/fixtures/decode-re-5-8-26/event-b/M529LK5Q.RG0", 2304, 0.505),
|
||||||
|
("tests/fixtures/decode-re-5-8-26/event-c/M529LK44.AB0", 1280, 0.610),
|
||||||
|
("tests/fixtures/decode-re-5-8-26/event-d/M529LK2V.470", 1280, 0.565),
|
||||||
|
("tests/fixtures/5-11-26/M529LL1L.V70", 3328, 0.010),
|
||||||
|
("tests/fixtures/5-11-26/M529LL1L.JQ0", 3328, 3.465),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("path,expected_n,expected_ppv", _BW_CODEC_FIXTURES)
|
||||||
|
def test_read_blastware_file_decodes_via_codec(path: str, expected_n: int, expected_ppv: float):
|
||||||
|
"""Regression lock: ``read_blastware_file()`` must use the verified
|
||||||
|
waveform-body codec (``minimateplus.waveform_codec``), not the
|
||||||
|
retracted int16-LE assumption.
|
||||||
|
|
||||||
|
Verifies against the real BW fixture corpus: every event in the
|
||||||
|
bundled fixtures must produce the expected per-channel sample count
|
||||||
|
and a Vert PPV close to BW's own reported value. Catches any
|
||||||
|
accidental regression of the body decoder back to the old
|
||||||
|
``_decode_samples_4ch_int16_le`` path (which produced ±32K noise
|
||||||
|
on every event, giving wildly wrong PPVs).
|
||||||
|
"""
|
||||||
|
repo_root = Path(__file__).resolve().parent.parent
|
||||||
|
full_path = repo_root / path
|
||||||
|
if not full_path.exists():
|
||||||
|
pytest.skip(f"fixture missing: {full_path}")
|
||||||
|
|
||||||
|
ev = event_file_io.read_blastware_file(full_path)
|
||||||
|
assert ev.raw_samples is not None
|
||||||
|
for ch in ("Tran", "Vert", "Long"):
|
||||||
|
assert len(ev.raw_samples[ch]) == expected_n, (
|
||||||
|
f"{ch}: expected {expected_n} samples, got {len(ev.raw_samples[ch])}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# PPV check: the codec produces decoded samples in 1-count ADC units;
|
||||||
|
# _peaks_from_samples scales by GEO_NORMAL_FS_INS / 32767. BW's own
|
||||||
|
# PPV is computed at slightly different precision/interpolation, so
|
||||||
|
# we allow a 0.2 in/s tolerance — well under the broken-decoder
|
||||||
|
# signature (which would produce ~10 in/s saturation).
|
||||||
|
assert ev.peak_values is not None
|
||||||
|
assert abs(ev.peak_values.vert - expected_ppv) < 0.2, (
|
||||||
|
f"Vert PPV {ev.peak_values.vert:.3f} differs from BW's "
|
||||||
|
f"{expected_ppv:.3f} by >0.2 in/s — codec regression?"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_blastware_file_v70_samples_match_txt_truth():
|
||||||
|
"""Strongest regression lock: every one of V70's 3328 decoded
|
||||||
|
sample-sets must match the .TXT ground truth table within the
|
||||||
|
0.005 in/s display quantum."""
|
||||||
|
repo_root = Path(__file__).resolve().parent.parent
|
||||||
|
bw_path = repo_root / "tests/fixtures/5-11-26/M529LL1L.V70"
|
||||||
|
txt_path = repo_root / "tests/fixtures/5-11-26/M529LL1L.V70.TXT"
|
||||||
|
if not bw_path.exists() or not txt_path.exists():
|
||||||
|
pytest.skip(f"V70 fixture missing")
|
||||||
|
|
||||||
|
import re
|
||||||
|
ev = event_file_io.read_blastware_file(bw_path)
|
||||||
|
|
||||||
|
# Parse .TXT ground truth sample table
|
||||||
|
text = txt_path.read_text()
|
||||||
|
lines = text.splitlines()
|
||||||
|
hdr_idx = next(i for i, line in enumerate(lines)
|
||||||
|
if re.match(r"^Tran\s+Vert\s+Long\s+MicL?", line.strip()))
|
||||||
|
truth = []
|
||||||
|
for line in lines[hdr_idx + 1:]:
|
||||||
|
parts = line.strip().split()
|
||||||
|
if len(parts) != 4:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
truth.append([float(x) for x in parts])
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
assert len(truth) == 3328, f"expected 3328 truth rows, got {len(truth)}"
|
||||||
|
|
||||||
|
def adc_to_ins(count):
|
||||||
|
return count / 32767.0 * 10.0
|
||||||
|
|
||||||
|
for i, truth_row in enumerate(truth):
|
||||||
|
for ch_idx, ch_name in enumerate(("Tran", "Vert", "Long")):
|
||||||
|
decoded_ips = adc_to_ins(ev.raw_samples[ch_name][i])
|
||||||
|
truth_ips = truth_row[ch_idx]
|
||||||
|
# 0.003 in/s tolerance: <0.005 quantum + small float precision room
|
||||||
|
assert abs(decoded_ips - truth_ips) < 0.003, (
|
||||||
|
f"row {i} {ch_name}: decoded {decoded_ips:+.4f} vs "
|
||||||
|
f"truth {truth_ips:+.4f} (delta {decoded_ips - truth_ips:+.4f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_save_imported_bw_with_paired_report(tmp_path: Path):
|
def test_save_imported_bw_with_paired_report(tmp_path: Path):
|
||||||
"""save_imported_bw + a paired BW ASCII report fold the report's
|
"""save_imported_bw + a paired BW ASCII report fold the report's
|
||||||
rich derived fields into the sidecar. This is the daemon-forwarded
|
rich derived fields into the sidecar. This is the daemon-forwarded
|
||||||
|
|||||||
Reference in New Issue
Block a user