Merge pull request 'feat: add waveform store handling' (#16) from sfm-waveform-store into main

Reviewed-on: #16
This commit was merged in pull request #16.
This commit is contained in:
2026-05-08 15:03:32 -04:00
19 changed files with 5188 additions and 462 deletions
+244 -129
View File
@@ -449,7 +449,7 @@ class MiniMateClient:
proto.confirm_erase_all()
log.info("delete_all_events: erase confirmed — device memory cleared")
def get_events(self, full_waveform: bool = False, debug: bool = False, stop_after_index: Optional[int] = None, skip_waveform_for_keys: Optional[set] = None, extra_chunks_after_metadata: int = 1) -> list[Event]:
def get_events(self, full_waveform: bool = False, debug: bool = False, stop_after_index: Optional[int] = None, skip_waveform_for_keys: Optional[set] = None, skip_waveform_for_events: Optional[dict] = None, extra_chunks_after_metadata: int = 1) -> list[Event]:
"""
Download all stored events from the device using the confirmed
1E → 0A → 0C → 5A → 1F event-iterator protocol.
@@ -497,37 +497,24 @@ class MiniMateClient:
events: list[Event] = []
idx = 0
# Legacy bare-key skip set is deprecated: the device's key counter
# resets to 0x01110000 after every memory erase, so a key in this set
# cannot be trusted to identify the same physical event across erases.
# If a caller still passes it, log a warning and ignore — full
# downloads will run for every event so the bug never silently bites.
if skip_waveform_for_keys:
log.warning(
"get_events: skip_waveform_for_keys is deprecated and unsafe "
"(post-erase key reuse); ignoring %d entries. Use "
"skip_waveform_for_events={key: timestamp_iso} instead.",
len(skip_waveform_for_keys),
)
skip_evts: dict[str, str] = dict(skip_waveform_for_events or {})
while data8[4:8] != b"\x00\x00\x00\x00":
cur_key = key4 # key for this event's 0A/1E-arm/0C/5A calls
log.info("get_events: record %d key=%s", idx, cur_key.hex())
# Fast-advance path: if this key is already downloaded, skip
# 1E-arm/0C/POLL/5A entirely. Only 0A + 1F(browse) are needed
# to advance the device's internal pointer to the next event.
# This is identical to the browse-mode walk in count_events().
if skip_waveform_for_keys and cur_key.hex() in skip_waveform_for_keys:
log.debug("get_events: key=%s already seen -- fast-advance only", cur_key.hex())
try:
proto.read_waveform_header(cur_key)
except ProtocolError as exc:
log.warning(
"get_events: 0A failed for key=%s (skip path): %s -- stopping",
cur_key.hex(), exc,
)
break
try:
key4, data8 = proto.advance_event(browse=True)
except ProtocolError as exc:
log.warning(
"get_events: 1F failed for key=%s (skip path): %s -- stopping",
cur_key.hex(), exc,
)
break
idx += 1
if stop_after_index is not None and idx > stop_after_index:
break
continue
ev = Event(index=idx)
ev._waveform_key = cur_key
@@ -574,72 +561,96 @@ class MiniMateClient:
"get_events: 0C failed for key=%s: %s", cur_key.hex(), exc
)
# SUB 1F (download-arm) — send token=0xFE BEFORE POLL+5A to arm the
# device's bulk stream state machine. Cache the returned key as a
# fallback for loop iteration when 5A fails (see iteration block below).
# Confirmed from 4-2-26 capture frames 66-67 (1F before frames 68-73 POLL).
arm_key4: Optional[bytes] = None
try:
arm_key4, _ = proto.advance_event(browse=False) # arm 5A
log.info("get_events: 1F(download) — 5A armed, arm_key=%s", arm_key4.hex())
except ProtocolError as exc:
log.warning("get_events: 1F(download) arm failed: %s", exc)
# ── Skip-5A decision based on (key, timestamp) match ──────
# If skip_waveform_for_events maps cur_key.hex() to a non-empty
# ISO timestamp matching what we just read from 0C, this is
# the same physical event we already have on disk — bypass
# the 1F(arm)+POLL+5A bulk download. Otherwise (no entry, or
# timestamp mismatch indicating post-erase reuse) fall through
# to the full download.
expected_ts = skip_evts.get(cur_key.hex(), "")
actual_ts = _event_timestamp_iso(ev)
skip_5a = bool(expected_ts and actual_ts and expected_ts == actual_ts)
if skip_5a:
log.info(
"get_events: key=%s (key, ts=%s) match — skipping 5A bulk download",
cur_key.hex(), actual_ts,
)
# POLL × 3 — BW sends 3 full POLL cycles between 1F and 5A.
# Confirmed from 4-2-26 BW TX capture (frames 68-73 before 5A at 74).
log.info("get_events: POLL × 3 before 5A")
for _p in range(3):
arm_key4: Optional[bytes] = None
a5_ok = False
if not skip_5a:
# SUB 1F (download-arm) — send token=0xFE BEFORE POLL+5A to arm the
# device's bulk stream state machine. Cache the returned key as a
# fallback for loop iteration when 5A fails (see iteration block below).
# Confirmed from 4-2-26 capture frames 66-67 (1F before frames 68-73 POLL).
try:
proto.poll()
arm_key4, _ = proto.advance_event(browse=False) # arm 5A
log.info("get_events: 1F(download) — 5A armed, arm_key=%s", arm_key4.hex())
except ProtocolError as exc:
log.warning("get_events: POLL %d failed: %s", _p, exc)
log.warning("get_events: 1F(download) arm failed: %s", exc)
# POLL × 3 — BW sends 3 full POLL cycles between 1F and 5A.
# Confirmed from 4-2-26 BW TX capture (frames 68-73 before 5A at 74).
log.info("get_events: POLL × 3 before 5A")
for _p in range(3):
try:
proto.poll()
except ProtocolError as exc:
log.warning("get_events: POLL %d failed: %s", _p, exc)
# SUB 5A — bulk waveform stream (uses cur_key, the event set up by 0A+1E+0C).
# By default (full_waveform=False): stop after frame 7 for metadata only.
# When full_waveform=True: fetch all chunks and decode raw ADC samples.
a5_ok = False
try:
if full_waveform:
log.info(
"get_events: 5A full waveform download for key=%s", cur_key.hex()
)
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=False, max_chunks=128,
include_terminator=True,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
_decode_a5_waveform(a5_frames, ev)
#
# Bypassed when skip_5a is True — the event is left with
# _a5_frames=None, which signals to the caller (e.g.
# ach_server.py) that this event was matched by (key, ts) and
# already has a stored .file in the persistent waveform store.
if not skip_5a:
try:
if full_waveform:
log.info(
"get_events: 5A decoded %d sample-sets",
len((ev.raw_samples or {}).get("Tran", [])),
"get_events: 5A full waveform download for key=%s", cur_key.hex()
)
else:
log.info(
"get_events: 5A metadata-only download for key=%s", cur_key.hex()
)
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=True,
include_terminator=True,
extra_chunks_after_metadata=extra_chunks_after_metadata,
max_chunks=128,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
log.debug(
"get_events: 5A metadata client=%r operator=%r",
ev.project_info.client if ev.project_info else None,
ev.project_info.operator if ev.project_info else None,
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=False, max_chunks=128,
include_terminator=True,
)
except ProtocolError as exc:
log.warning(
"get_events: 5A failed for key=%s: %s — metadata unavailable",
cur_key.hex(), exc,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
_decode_a5_waveform(a5_frames, ev)
log.info(
"get_events: 5A decoded %d sample-sets",
len((ev.raw_samples or {}).get("Tran", [])),
)
else:
log.info(
"get_events: 5A metadata-only download for key=%s", cur_key.hex()
)
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=True,
include_terminator=True,
extra_chunks_after_metadata=extra_chunks_after_metadata,
max_chunks=128,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
log.debug(
"get_events: 5A metadata client=%r operator=%r",
ev.project_info.client if ev.project_info else None,
ev.project_info.operator if ev.project_info else None,
)
except ProtocolError as exc:
log.warning(
"get_events: 5A failed for key=%s: %s — metadata unavailable",
cur_key.hex(), exc,
)
# SUB 1F — loop iteration.
#
@@ -652,7 +663,14 @@ class MiniMateClient:
# Confirmed from 4-3-26 browse-mode captures: browse=True params
# are correct for multi-event iteration. Conditional logic added
# 2026-04-06 to avoid post-failure state disruption.
if a5_ok:
#
# NEW 2026-05-06: when skip_5a=True we never entered the 5A
# state at all (we read 0A+1E(arm)+0C and chose to bypass).
# 1F(browse) is safe in this scenario — the device's iteration
# pointer is independent of the bulk-stream state machine, and
# we never put it into the half-attempted 5A state that the
# earlier "post-failure 1F disruption" warning is about.
if skip_5a or a5_ok:
# 5A succeeded — use browse 1F for reliable key advancement.
try:
key4, data8 = proto.advance_event(browse=True)
@@ -1174,6 +1192,27 @@ class MiniMateClient:
# Pure functions: bytes → model field population.
# Kept here (not in models.py) to isolate protocol knowledge from data shapes.
def _event_timestamp_iso(event: Event) -> str:
"""
Return a stable ISO-8601 string for the event's 0C-derived timestamp,
or "" if the event has no timestamp populated.
The format intentionally matches what `bridges/ach_server.py` writes
into `ach_state.json:downloaded_events[*]` so the (key, ts) compare
in get_events()'s skip path is a simple string equality.
"""
ts = getattr(event, "timestamp", None)
if ts is None:
return ""
try:
return datetime.datetime(
ts.year, ts.month, ts.day,
ts.hour or 0, ts.minute or 0, ts.second or 0,
).isoformat()
except Exception:
return str(ts)
def _decode_serial_number(data: bytes) -> DeviceInfo:
"""
Decode SUB EA (SERIAL_NUMBER_RESPONSE) payload into a new DeviceInfo.
@@ -1323,29 +1362,36 @@ def _decode_waveform_record_into(data: bytes, event: Event) -> None:
Modifies event in-place.
"""
# ── Record type ───────────────────────────────────────────────────────────
# Decoded from byte[1] (sub_code) first so we can gate timestamp parsing.
# ── Record type + format detection ────────────────────────────────────────
# `record_type` is the user-facing label ("Waveform" for any triggered
# event regardless of timestamp-header layout). `fmt` is the internal
# format code used to pick the right Timestamp parser; it stays
# internal and doesn't leak to the API / sidecar / UI.
try:
event.record_type = _extract_record_type(data)
except Exception as exc:
log.warning("waveform record type decode failed: %s", exc)
fmt = _detect_record_format(data)
# ── Timestamp ─────────────────────────────────────────────────────────────
# 9-byte format for sub_code=0x10 Waveform records:
# [day][sub_code][month][year:2 BE][unknown][hour][min][sec]
# sub_code=0x10 and sub_code=0x03 have different timestamp byte layouts.
# Both confirmed against Blastware event reports (BE11529, 2026-04-01 and 2026-04-03).
if event.record_type == "Waveform":
# Three timestamp-header layouts have been observed across BE11529
# firmware S338.17 — each picks a different Timestamp parser:
# "single_shot": 9-byte [day][0x10][month][year:2][unk][h][m][s]
# "continuous": 10-byte [0x10][day][0x10][month][year:2][unk][h][m][s]
# "short": 8-byte [day][month][year:2][unk][h][m][s]
# All decoded into the same Timestamp dataclass — only the byte
# offsets differ.
if fmt == "single_shot":
try:
event.timestamp = Timestamp.from_waveform_record(data)
except Exception as exc:
log.warning("waveform record timestamp decode failed: %s", exc)
elif event.record_type == "Waveform (Continuous)":
log.warning("single_shot record timestamp decode failed: %s", exc)
elif fmt == "continuous":
try:
event.timestamp = Timestamp.from_continuous_record(data)
except Exception as exc:
log.warning("continuous record timestamp decode failed: %s", exc)
elif event.record_type == "Waveform (Short)":
elif fmt == "short":
try:
event.timestamp = Timestamp.from_short_record(data)
except Exception as exc:
@@ -1523,46 +1569,109 @@ def _decode_a5_waveform(
log.warning("_decode_a5_waveform: STRT record truncated (%dB)", len(strt))
return
total_samples = struct.unpack_from(">H", strt, 8)[0]
pretrig_samples = struct.unpack_from(">H", strt, 16)[0]
rectime_seconds = strt[18]
# STRT byte layout (21 bytes; verified against M529LIY6 reference files
# and re-confirmed against live BE11529 captures, 2026-05-08):
# [0:4] b'STRT'
# [4:6] 0xff 0xfe sentinel
# [6:10] end_key 4-byte BE flash address where event ends
# [10:14] start_key 4-byte BE flash address where event starts
# [14:18] device-specific (semantics not pinned; values vary across events
# and don't hold authoritative total_samples / pretrig)
# [18] 0x46 record-type marker (NOT rectime)
# [19] device-specific
# [20] sometimes rectime, sometimes 0 — not reliable
#
# AUTHORITATIVE values must come from compliance_config (sample_rate,
# record_time) and from end_offset - start_offset arithmetic (event size).
# Earlier code claimed STRT[8:10]=total_samples and STRT[16:18]=pretrig;
# those positions actually overlap end_key low-word and dev-specific bytes
# respectively. We surface the address-derived event size so consumers
# can sanity-check chunk-loop bounds, but `total_samples` per channel must
# be derived externally (sample_rate × record_time, or computed from the
# decoded sample count below).
end_key = strt[6:10]
start_key = strt[10:14]
end_offset_in_strt = (end_key[2] << 8) | end_key[3]
start_offset_in_strt = (start_key[2] << 8) | start_key[3]
is_event_1 = (start_offset_in_strt == 0x0000)
event.total_samples = total_samples
event.pretrig_samples = pretrig_samples
event.rectime_seconds = rectime_seconds
# Don't trust STRT for these — leave them as None so the caller can
# backfill from compliance_config (the authoritative source).
event.total_samples = None
event.pretrig_samples = None
event.rectime_seconds = None
log.debug(
"_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds",
total_samples, pretrig_samples, rectime_seconds,
"_decode_a5_waveform: STRT start_key=%s end_key=%s "
"start_off=0x%04X end_off=0x%04X is_event_1=%s "
"dev-specific[14:18]=%s strt[20]=0x%02X",
start_key.hex(), end_key.hex(),
start_offset_in_strt, end_offset_in_strt, is_event_1,
strt[14:18].hex(), strt[20],
)
# ── Collect per-frame waveform bytes with global offset tracking ─────────
# global_offset is the cumulative byte count across all frames, used to
# compute the channel alignment at each frame boundary.
#
# Frame layout under the v0.14.0+ walk:
# frames_data[0] = probe response (page_addr 0x0000;
# contains STRT + post-STRT data)
# frames_data[1..2] = (event 1 only) metadata pages
# page_addr = 0x1002 / 0x1004
# frames_data[mid] = sample chunks at flash addresses
# 0x0600, 0x0800, … (page_addr in
# {0x0600..0x1FFE})
# frames_data[last] = TERM response (page_key=0x0000)
#
# We identify metadata pages by their PAGE ADDRESS at db.data[4:6] (the
# 2-byte counter the device echoes back), NOT by content scan. An earlier
# needle-based detection (b"Project:", b"Client:", etc.) was the wrong
# layer of abstraction:
# • The actual metadata pages 0x1002 / 0x1004 do NOT contain ASCII
# project strings on this firmware (S338.17 / BE11529).
# • The strings physically live at flash address 0x1600 — which falls
# inside the sample-chunk address range. Skipping that frame would
# drop a real sample chunk.
# BW handles the "samples region happens to contain string bytes" case
# by just rendering the bytes verbatim; we do the same.
_METADATA_PAGES = (b"\x10\x02", b"\x10\x04")
chunks: list[tuple[int, bytes]] = [] # (frame_idx, waveform_bytes)
global_offset = 0
for fi, db in enumerate(frames_data):
page_addr = db.data[4:6] if len(db.data) >= 6 else b""
w = db.data[7:] # frame.data[7:]
# A5[0]: waveform begins after the 21-byte STRT record and 6-byte preamble.
# Layout: STRT(21B) + null-pad(2B) + 0xFF sentinel(4B) = 27 bytes total.
# A5[0]: probe response. Two cases:
# - Event 1 (start_offset_in_strt == 0x0000): the bytes after STRT
# are the device's *pre-event reserved area* (flash 0x0046 to
# 0x0600), NOT samples. We must skip them; samples begin at
# the first dedicated chunk frame at counter=0x0600.
# - Event N (continuation, start_offset != 0x0000): the bytes after
# the STRT record ARE the first slice of real samples for the
# event (BW's chunk loop addresses the probe as a sample chunk).
if fi == 0:
sp = w.find(b"STRT")
if sp < 0:
continue
if is_event_1:
# No usable samples in the probe — pre-event reserved bytes.
continue
# Layout: STRT(21B) + null-pad(2B) + 0xFF sentinel(4B) = 27 bytes total.
wave = w[sp + 27 :]
# Frame 7 carries event-time metadata strings ("Project:", "Client:", …)
# and no waveform ADC data.
elif fi == 7:
# Skip the dedicated metadata pages (event 1 only): page_addr 0x1002 / 0x1004.
elif page_addr in _METADATA_PAGES:
log.debug(
"_decode_a5_waveform: skipping metadata page fi=%d page_addr=%s",
fi, page_addr.hex(),
)
continue
# Terminator frames have page_key=0x0000 and are excluded upstream
# (read_bulk_waveform_stream returns early on page_key==0).
# No hardcoded frame-index skip here — all non-metadata frames are data.
# Sample chunk (or TERM): strip the 8-byte per-frame header.
else:
# Strip the 8-byte per-frame header (ctr + 6 zero bytes)
if len(w) < 8:
continue
wave = w[8:]
@@ -1576,10 +1685,8 @@ def _decode_a5_waveform(
total_bytes = global_offset
n_sets = total_bytes // 8
log.debug(
"_decode_a5_waveform: %d chunks, %dB total → %d complete sample-sets "
"(%d of %d expected; %.0f%%)",
len(chunks), total_bytes, n_sets, n_sets, total_samples,
100.0 * n_sets / total_samples if total_samples else 0,
"_decode_a5_waveform: %d chunks, %dB total → %d complete sample-sets",
len(chunks), total_bytes, n_sets,
)
if n_sets == 0:
@@ -1637,7 +1744,7 @@ def _decode_a5_waveform(
"Tran": tran,
"Vert": vert,
"Long": long_,
"Mic": mic,
"MicL": mic,
}
@@ -1685,22 +1792,30 @@ def _detect_record_format(data: bytes) -> Optional[str]:
def _extract_record_type(data: bytes) -> Optional[str]:
"""
Return a human-readable name for the waveform record format detected
in the first bytes of a 210-byte 0C record.
Return a user-facing name for a waveform record. All three internal
timestamp-header layouts represent the *same* user concept — a
triggered seismic event — so they all surface as just "Waveform".
Maps to the format codes returned by _detect_record_format():
"single_shot""Waveform"
"continuous""Waveform (Continuous)"
"short""Waveform (Short)"
None → "Unknown(XX.YY.ZZ)"
The internal format code is preserved for parsing logic (timestamp
decoder selection) but doesn't leak into the API / UI / sidecar.
Callers that need the raw layout can call `_detect_record_format`
directly.
Background: across BE11529 firmware S338.17 we've observed three
different byte layouts for the timestamp header at the start of the
0C record (8 / 9 / 10 bytes, distinguished by the position of the
BE-encoded year and the presence of `0x10` marker bytes). An older
revision of this code labelled them "Waveform" / "Waveform
(Continuous)" / "Waveform (Short)", which created the false
impression that there were three distinct event "types" the user
could configure. In reality the user only ever picks Single Shot
vs Continuous vs Histogram in the compliance config — the byte
layout is a firmware-internal detail that doesn't always correlate
with that choice.
"""
fmt = _detect_record_format(data)
if fmt == "single_shot":
if fmt in ("single_shot", "continuous", "short"):
return "Waveform"
if fmt == "continuous":
return "Waveform (Continuous)"
if fmt == "short":
return "Waveform (Short)"
if len(data) >= 3:
log.warning(
"_extract_record_type: unrecognized header: data[0:3]=%02X %02X %02X",
+518
View File
@@ -0,0 +1,518 @@
"""
minimateplus/event_file_io.py — modern event-file (.sfm.json sidecar) IO.
This module is the single home for event-file conversion code that doesn't
fit cleanly inside `blastware_file.py` (which is the BW binary codec):
- sidecar JSON read/write (the modern per-event metadata file)
- read_blastware_file() — reverse of write_blastware_file, used by
the BW-importer flow when SFM is ingesting files produced by
Blastware's own ACH (where the source A5 frames aren't available).
Sidecar schema v1 layout — see docs in the project plan or the schema
declared in `event_to_sidecar_dict()`.
"""
from __future__ import annotations
import datetime
import hashlib
import json
import logging
import os
import struct
from pathlib import Path
from typing import Optional, Union
from .models import Event, PeakValues, ProjectInfo, Timestamp
from . import blastware_file as _bw # avoid circular reference at module load
log = logging.getLogger(__name__)
# Schema version for the sidecar JSON. Bump when fields change shape.
# Older readers must reject anything > SCHEMA_VERSION; newer fields added
# inside `extensions` are forward-compatible without a bump.
SCHEMA_VERSION = 1
SIDECAR_KIND = "sfm.event"
# Default tool_version stamp; callers can override. Hard-coded here
# rather than read via importlib.metadata because the latter reflects the
# *installed* dist-info, which doesn't update when pyproject.toml is
# bumped without a `pip install` re-run — leading to confusing stale
# version stamps in sidecars. Bump this constant and CHANGELOG.md
# together at release time.
TOOL_VERSION = "0.15.0"
try:
# Best-effort: prefer the installed metadata when it's NEWER than the
# baked-in constant (e.g. a downstream packager bumped the wheel
# without editing this file). Otherwise fall back to TOOL_VERSION.
from importlib.metadata import version as _pkg_version
_meta_v = _pkg_version("seismo-relay")
def _vtuple(s):
try:
return tuple(int(p) for p in s.split(".")[:3])
except Exception:
return (0, 0, 0)
_TOOL_VERSION_DEFAULT = (
_meta_v if _vtuple(_meta_v) > _vtuple(TOOL_VERSION) else TOOL_VERSION
)
except Exception:
_TOOL_VERSION_DEFAULT = TOOL_VERSION
# ── Sidecar dict construction ─────────────────────────────────────────────────
def _ts_iso(ts: Optional[Timestamp]) -> Optional[str]:
if ts is None:
return None
try:
return datetime.datetime(
ts.year, ts.month, ts.day,
ts.hour or 0, ts.minute or 0, ts.second or 0,
).isoformat()
except Exception:
return str(ts)
def _peak_values_to_dict(pv: Optional[PeakValues]) -> dict:
if pv is None:
return {
"transverse": None,
"vertical": None,
"longitudinal": None,
"vector_sum": None,
"mic_psi": None,
}
return {
"transverse": pv.tran,
"vertical": pv.vert,
"longitudinal": pv.long,
"vector_sum": pv.peak_vector_sum,
"mic_psi": pv.micl,
}
def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict:
if pi is None:
return {
"project": None,
"client": None,
"operator": None,
"sensor_location": None,
}
return {
"project": pi.project,
"client": pi.client,
"operator": pi.operator,
"sensor_location": pi.sensor_location,
}
def event_to_sidecar_dict(
event: Event,
*,
serial: str,
blastware_filename: str,
blastware_filesize: int,
blastware_sha256: str,
source_kind: str = "sfm-live",
a5_pickle_filename: Optional[str] = None,
tool_version: str = _TOOL_VERSION_DEFAULT,
captured_at: Optional[datetime.datetime] = None,
review: Optional[dict] = None,
extensions: Optional[dict] = None,
) -> dict:
"""
Build a v1 sidecar dict from an Event + the surrounding metadata.
Pure helper — no file I/O. Callers stitch the result into a sidecar
via `write_sidecar()` (or POST it back via the PATCH endpoint).
"""
if source_kind not in {"sfm-live", "sfm-ach", "bw-import"}:
raise ValueError(f"unknown source_kind: {source_kind!r}")
captured_at = captured_at or datetime.datetime.utcnow()
return {
"schema_version": SCHEMA_VERSION,
"kind": SIDECAR_KIND,
"event": {
"serial": serial,
"timestamp": _ts_iso(event.timestamp),
"waveform_key": event._waveform_key.hex() if event._waveform_key else None,
"record_type": event.record_type,
"sample_rate": event.sample_rate,
"rectime_seconds": event.rectime_seconds,
"total_samples": event.total_samples,
"pretrig_samples": event.pretrig_samples,
},
"peak_values": _peak_values_to_dict(event.peak_values),
"project_info": _project_info_to_dict(event.project_info),
"blastware": {
"filename": blastware_filename,
"filesize": blastware_filesize,
"sha256": blastware_sha256,
"available": True,
},
"source": {
"kind": source_kind,
"captured_at": captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat(),
"tool_version": tool_version,
"a5_pickle_filename": a5_pickle_filename,
},
"review": review or {
"false_trigger": False,
"reviewer": None,
"reviewed_at": None,
"notes": "",
},
"extensions": extensions or {},
}
# ── Sidecar IO ────────────────────────────────────────────────────────────────
def write_sidecar(path: Union[str, Path], data: dict) -> None:
"""
Atomic write of a sidecar dict to <path>.
Validates schema_version is supported before writing so we don't
silently drop a future-format sidecar over the wire.
"""
path = Path(path)
sv = data.get("schema_version")
if not isinstance(sv, int) or sv < 1 or sv > SCHEMA_VERSION:
raise ValueError(
f"write_sidecar: unsupported schema_version={sv!r} "
f"(this build supports 1..{SCHEMA_VERSION})"
)
tmp = path.with_suffix(path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(data, f, indent=2, sort_keys=False, default=str)
f.write("\n")
f.flush()
os.fsync(f.fileno())
os.replace(tmp, path)
def read_sidecar(path: Union[str, Path]) -> dict:
"""
Load a sidecar JSON file.
Raises FileNotFoundError if missing, ValueError on bad shape /
unsupported schema_version. Unknown keys at the top level are
preserved in the returned dict (forward-compat).
"""
path = Path(path)
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
raise ValueError(f"sidecar at {path}: top-level is not a JSON object")
sv = data.get("schema_version")
if not isinstance(sv, int) or sv < 1:
raise ValueError(f"sidecar at {path}: missing or invalid schema_version")
if sv > SCHEMA_VERSION:
raise ValueError(
f"sidecar at {path}: schema_version={sv} > supported {SCHEMA_VERSION}; "
"upgrade seismo-relay to read this file"
)
if data.get("kind") != SIDECAR_KIND:
raise ValueError(f"sidecar at {path}: unexpected kind={data.get('kind')!r}")
return data
def patch_sidecar(
path: Union[str, Path],
*,
review: Optional[dict] = None,
extensions: Optional[dict] = None,
reviewer_now: bool = True,
) -> dict:
"""
Atomically apply a JSON-merge-patch to a sidecar file's `review`
and/or `extensions` blocks. Other top-level keys are untouched.
`review_now`: when True (default) and `review` is non-empty, stamps
`review.reviewed_at` with the current UTC time so the review-time is
auditable without the caller having to pass it.
Returns the new full sidecar dict.
"""
path = Path(path)
data = read_sidecar(path)
if review:
merged = dict(data.get("review") or {})
merged.update({k: v for k, v in review.items() if v is not None or k in merged})
if reviewer_now:
merged["reviewed_at"] = datetime.datetime.utcnow().isoformat() + "Z"
data["review"] = merged
if extensions:
merged_ext = dict(data.get("extensions") or {})
merged_ext.update(extensions)
data["extensions"] = merged_ext
write_sidecar(path, data)
return data
def sidecar_path_for(blastware_path: Union[str, Path]) -> Path:
"""Convention: <bw_path>.sfm.json sits next to the BW binary."""
p = Path(blastware_path)
return p.with_name(p.name + ".sfm.json")
def file_sha256(path: Union[str, Path], chunk_size: int = 65536) -> str:
"""Compute SHA-256 of a file as a hex string."""
h = hashlib.sha256()
with open(path, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
# ── Blastware-file reader ─────────────────────────────────────────────────────
#
# Reverse of `blastware_file.write_blastware_file`. Used by the BW-import
# flow to ingest files produced by Blastware's own ACH (where the source
# A5 frames are not available).
#
# File structure (recap):
# [22B header] [21B STRT record] [body bytes] [26B footer]
#
# The body holds:
# - 6B preamble (00 00 ff ff ff ff) immediately after the STRT
# - 4-channel interleaved int16 LE samples
# - Embedded ASCII metadata strings (Project: / Client: / User Name: /
# Seis Loc: / Extended Notes) from the device's session-start config
#
# The 0C waveform record (per-event peaks, project name) is NOT in the
# BW file — those are computed by the device firmware and only carried
# in the live SUB 0C response. read_blastware_file() therefore computes
# peaks from the raw samples assuming Normal-range (10 in/s full-scale)
# geophone sensitivity. Imported events surface that assumption via the
# sidecar's `peak_values.computed_from_samples` flag.
# Geophone scale factor, in/s per ADC unit, for Normal range (10 in/s FS).
# Confirmed from CLAUDE.md (geo_hardware_constant = 6.206053 in/s per V,
# ADC full-scale = 1.61133 V Normal range = 10.0 in/s peak; per-count
# resolution ≈ 10.0 / 32768).
_GEO_NORMAL_FS_INS = 10.0
_GEO_SENSITIVE_FS_INS = 1.250
_INT16_FS = 32768.0
# Microphone scale factor, psi per ADC count. Approximate — exact factor
# depends on the geophone-vs-mic ADC scaling and the firmware reference.
# We mark mic_psi as "computed approximate" in the sidecar.
_MIC_FS_PSI = 0.0125 / _INT16_FS # ~0.5 psi full-scale assumption
def _decode_strt(strt: bytes) -> dict:
"""
Decode the 21-byte STRT record from a BW file.
Returns dict with waveform_key (4B), total_samples, pretrig_samples,
rectime_seconds. Falls back to None on truncated/missing fields.
"""
if len(strt) < 21 or strt[0:4] != b"STRT":
return {}
return {
"waveform_key": strt[6:10].hex(),
"total_samples": struct.unpack_from(">H", strt, 8)[0],
"pretrig_samples": struct.unpack_from(">H", strt, 16)[0],
"rectime_seconds": strt[18],
}
def _find_first_string(buf: bytes, label: bytes, max_len: int = 256) -> Optional[str]:
"""
Search `buf` for `label` (e.g. b"Project:") and return the
null-terminated ASCII string that follows, stripped.
"""
pos = buf.find(label)
if pos < 0:
return None
start = pos + len(label)
end = buf.find(b"\x00", start, start + max_len)
if end < 0:
end = start + max_len
text = buf[start:end].decode("ascii", errors="replace").strip()
return text or None
def _decode_samples_4ch_int16_le(stream: bytes) -> dict[str, list[int]]:
"""
Decode a 4-channel interleaved int16 LE byte stream into per-channel
lists. Channels are [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3].
Truncates to a multiple of 8 bytes (one full sample-set).
"""
n_complete = (len(stream) // 8) * 8
if n_complete == 0:
return {"Tran": [], "Vert": [], "Long": [], "MicL": []}
fmt = "<" + "h" * (n_complete // 2)
flat = list(struct.unpack(fmt, stream[:n_complete]))
return {
"Tran": flat[0::4],
"Vert": flat[1::4],
"Long": flat[2::4],
"MicL": flat[3::4],
}
def _peaks_from_samples(samples: dict[str, list[int]]) -> PeakValues:
"""
Compute approximate peaks from raw int16 samples assuming Normal-range
geophone sensitivity. Used by the BW-importer when the 0C waveform
record (the device's authoritative peaks) is unavailable.
"""
def _peak_ins(ch: list[int]) -> float:
if not ch:
return 0.0
m = max(abs(int(v)) for v in ch)
return m / _INT16_FS * _GEO_NORMAL_FS_INS
tran = _peak_ins(samples.get("Tran", []))
vert = _peak_ins(samples.get("Vert", []))
long_ = _peak_ins(samples.get("Long", []))
# Mic in psi (approximate)
mic_ch = samples.get("MicL", []) or []
mic = max((abs(int(v)) for v in mic_ch), default=0) * _MIC_FS_PSI
# Peak vector sum: max over time of sqrt(T^2 + V^2 + L^2)
pvs = 0.0
n = min(len(samples.get("Tran", [])), len(samples.get("Vert", [])), len(samples.get("Long", [])))
if n:
scale = _GEO_NORMAL_FS_INS / _INT16_FS
T = samples["Tran"]; V = samples["Vert"]; L = samples["Long"]
for i in range(n):
t = T[i] * scale
v = V[i] * scale
l = L[i] * scale
mag = (t*t + v*v + l*l) ** 0.5
if mag > pvs:
pvs = mag
return PeakValues(
tran=tran, vert=vert, long=long_,
peak_vector_sum=pvs, micl=mic,
)
def read_blastware_file(path: Union[str, Path]) -> Event:
"""
Parse a Blastware waveform file into an Event.
Recovers:
- waveform_key, rectime_seconds, total_samples, pretrig_samples
(from the STRT record)
- timestamp (from the footer's start-time field)
- project_info (from ASCII labels embedded in the body)
- raw_samples (Tran/Vert/Long/MicL int16 lists)
- peak_values (computed from raw_samples; approximate — see notes
on _peaks_from_samples about Normal-range assumption)
Does NOT recover the source A5 frames (they aren't in the BW file).
The returned Event has `_a5_frames = None`, signalling that
byte-for-byte regeneration of the BW file from this Event alone is
not possible — the on-disk BW file IS the byte-for-byte source.
"""
path = Path(path)
raw = path.read_bytes()
if len(raw) < _bw._WAVEFORM_HEADER_SIZE + 21 + 26:
raise ValueError(f"{path}: file too short ({len(raw)} bytes) to be a BW event")
# Header: validate magic prefix.
header = raw[:_bw._WAVEFORM_HEADER_SIZE]
if not header.startswith(_bw._FILE_HEADER_PREFIX):
raise ValueError(f"{path}: not a Blastware file (bad header prefix)")
# STRT record: 21 bytes immediately after the header.
strt_raw = raw[_bw._WAVEFORM_HEADER_SIZE : _bw._WAVEFORM_HEADER_SIZE + 21]
strt_fields = _decode_strt(strt_raw)
if not strt_fields:
raise ValueError(f"{path}: STRT record missing or malformed")
# Footer: locate the 0e 08 marker, validating the year is in a sane range.
body_start = _bw._WAVEFORM_HEADER_SIZE + 21
footer_pos = -1
pos = body_start
while True:
pos = raw.find(b"\x0e\x08", pos)
if pos < 0 or pos + 26 > len(raw):
break
yr = (raw[pos + 4] << 8) | raw[pos + 5]
if 2015 <= yr <= 2050:
footer_pos = pos
break
pos += 1
if footer_pos < 0 and len(raw) >= 26:
footer_pos = len(raw) - 26
if footer_pos < body_start:
raise ValueError(f"{path}: footer not found")
body = raw[body_start : footer_pos]
footer = raw[footer_pos : footer_pos + 26]
# Footer layout:
# [0:2] 0e 08 marker
# [2:10] ts1 (start) BE 8B
# [10:18] ts2 (stop) BE 8B
# [18:24] 00 01 00 02 00 00
# [24:26] crc
ts1 = _bw._decode_ts_be(footer[2:10])
ts2 = _bw._decode_ts_be(footer[10:18])
# Body: first 6 bytes are the preamble (00 00 ff ff ff ff). Strip
# them before decoding samples. Any trailing tail past the last
# full sample-set is silently truncated by _decode_samples_4ch.
sample_bytes = body[6:] if body[:6].hex() in ("0000ffffffff", "0000FFFFFFFF") else body
samples = _decode_samples_4ch_int16_le(sample_bytes)
# Metadata strings (label-anchored search across the body).
project = _find_first_string(body, b"Project:")
client = _find_first_string(body, b"Client:")
user = _find_first_string(body, b"User Name:")
seisloc = _find_first_string(body, b"Seis Loc:")
# Build the Event.
ev = Event(index=-1)
if strt_fields.get("waveform_key"):
ev._waveform_key = bytes.fromhex(strt_fields["waveform_key"])
ev.record_type = "Waveform"
ev.rectime_seconds = strt_fields.get("rectime_seconds")
ev.total_samples = strt_fields.get("total_samples")
ev.pretrig_samples = strt_fields.get("pretrig_samples")
if ts1 is not None:
ev.timestamp = Timestamp(
raw=footer[2:10],
flag=0x10,
year=ts1.year, unknown_byte=0, month=ts1.month, day=ts1.day,
hour=ts1.hour, minute=ts1.minute, second=ts1.second,
)
ev.project_info = ProjectInfo(
project=project, client=client, operator=user, sensor_location=seisloc,
)
ev.raw_samples = samples
ev.peak_values = _peaks_from_samples(samples)
ev._a5_frames = None # not recoverable from BW file
return ev