feat: add waveform store handling #16

Merged
serversdown merged 5 commits from sfm-waveform-store into main 2026-05-08 15:03:33 -04:00
19 changed files with 5188 additions and 462 deletions
+46
View File
@@ -4,6 +4,52 @@ All notable changes to seismo-relay are documented here.
---
## v0.15.0 — 2026-05-07
### Added
- **Layered event storage architecture.** Each event now lands as four
files in the per-serial waveform store, each with a clear role:
- `<filename>` — the Blastware-readable binary (BW file). Untouched.
- `<filename>.a5.pkl` — the raw 5A frames (regenerative source).
- `<filename>.h5` — clean per-channel waveform arrays in physical
units (in/s for geo, psi for mic) plus event metadata (HDF5 with
gzip compression). This is the canonical format for downstream
analysis tools.
- `<filename>.sfm.json` — the modern review/metadata sidecar (peaks,
project, source provenance, review state, extensions).
SQLite (`seismo_relay.db`) is the searchable index over all four.
- **Plot-ready waveform JSON (`sfm.plot.v1`).** The `/device/event/{idx}/waveform`
and `/db/events/{id}/waveform.json` endpoints now return samples in
physical units with explicit time-axis metadata, peak markers, and
per-channel unit hints — no more guessing the ADC-to-velocity scale
client-side. The webapp waveform viewer was rewritten to consume
this shape.
- **In-app waveform viewer accuracy fix.** The standalone SFM webapp
viewer was scaling geophone amplitudes by `geoAdcScale / 32767`
(≈ 6.206 / 32767), where `geoAdcScale = 6.206053` is the device's
*in/s per V* hardware constant — not the ADC-counts-to-velocity
factor. This silently scaled every plot ~38% too low for Normal-range
geophones (the correct full-scale is 10.0 in/s, or 1.25 in/s for
Sensitive). Conversion is now done server-side using the geo_range
from compliance config; the client just plots.
- New `sfm/event_hdf5.py` module: `write_event_hdf5()`,
`read_event_hdf5()`, plus a plot-JSON helper.
- Backfill script extended to also emit `.h5` for existing events.
### Dependencies
- Added `h5py>=3.10` and `numpy>=1.24` for the HDF5 storage layer.
- Added `python-multipart>=0.0.7` (required by FastAPI for the
`/db/import/blastware_file` endpoint introduced in this release).
---
## v0.14.3 — 2026-05-05
### Fixed
+220 -117
View File
@@ -70,42 +70,77 @@ from minimateplus.transport import SocketTransport
from minimateplus.client import MiniMateClient
from minimateplus.models import DeviceInfo, Event, MonitorLogEntry
from sfm.database import SeismoDb
from sfm.waveform_store import WaveformStore
log = logging.getLogger("ach_server")
# ── Per-unit state (downloaded-key set) ───────────────────────────────────────
# ── Per-unit state (downloaded events index) ──────────────────────────────────
# Persisted as <output_dir>/ach_state.json
# Format:
# Format (current — v2):
# {
# "BE11529": {
# "downloaded_keys": ["01110000", "0111245a"], # hex keys already on disk
# "max_downloaded_key": "0111245a", # highest key ever seen
# "last_seen": "2026-04-11T01:04:36"
# "downloaded_events": { # key_hex → ISO timestamp string
# "01110000": "2026-04-11T00:42:17",
# "0111245a": "2026-04-11T01:04:30"
# },
# "max_downloaded_key": "0111245a",
# "last_seen": "2026-04-11T01:04:36",
# "serial": "BE11529",
# "peer": "63.43.212.232:51920"
# }
# }
#
# Key-based deduplication works well within a single "key generation" (between
# erases). After the device memory is erased the event counter resets to
# 0x01110000, so the first new event has the SAME key as the very first event
# we ever downloaded. We detect this situation with max_downloaded_key:
# Why (key, timestamp) and not key alone:
# The device's event-key counter resets to 0x01110000 after every memory
# erase (internal or external). A bare-key dedup (the v1 format) cannot
# distinguish a re-recorded event with the same key from one we already
# downloaded. The 0C waveform record's timestamp IS unique per physical
# event, so we pair (key, timestamp) and treat a key with a different
# timestamp as a new event regardless of `max_downloaded_key`.
#
# if max(current_device_keys) < max_downloaded_key
# → device was wiped and keys have restarted → treat all device keys as new
#
# After our own erase (--clear-after-download) we also explicitly clear
# downloaded_keys and max_downloaded_key so the next session starts fresh.
# Legacy v1 format (`downloaded_keys: list[str]` only) is auto-migrated on
# read: the keys are kept under a sentinel of "" (empty string) timestamp so
# the (key, timestamp) compare always sees a mismatch and forces a one-time
# re-download. After that pass the state is rewritten in v2 form.
_state_lock = threading.Lock()
def _load_state(state_path: Path) -> dict:
if state_path.exists():
try:
with open(state_path) as f:
return json.load(f)
except Exception:
pass
return {}
"""
Load ach_state.json, transparently migrating any legacy
`downloaded_keys: list` entries into the v2 `downloaded_events: dict`
schema. Returns the migrated state.
"""
if not state_path.exists():
return {}
try:
with open(state_path) as f:
state = json.load(f)
except Exception:
return {}
# Per-unit migration: legacy list → dict-with-empty-timestamps
for unit_key, unit_state in list(state.items()):
if not isinstance(unit_state, dict):
continue
if "downloaded_events" in unit_state:
continue
legacy_keys = unit_state.get("downloaded_keys")
if isinstance(legacy_keys, list):
unit_state["downloaded_events"] = {k: "" for k in legacy_keys}
log.info(
"ach_state: migrated %s from v1 (downloaded_keys list) → v2 "
"(downloaded_events dict, %d keys with empty timestamps; "
"they will re-validate on next session)",
unit_key, len(legacy_keys),
)
else:
unit_state["downloaded_events"] = {}
# keep legacy field for one cycle; cleared on next save
unit_state.pop("downloaded_keys", None)
return state
def _save_state(state_path: Path, state: dict) -> None:
@@ -139,8 +174,10 @@ class AchSession:
max_events: Optional[int],
state_path: Path,
db: "SeismoDb",
store: "WaveformStore",
clear_after_download: bool = False,
restart_monitoring: bool = False,
force_redownload: bool = False,
) -> None:
self.sock = sock
self.peer = peer
@@ -150,8 +187,14 @@ class AchSession:
self.max_events = max_events
self.state_path = state_path
self.db = db
self.store = store
self.clear_after_download = clear_after_download
self.restart_monitoring = restart_monitoring
# `force_redownload` tells this session to ignore ach_state and
# re-download every event currently on the device, regardless of any
# (key, timestamp) match. Useful as a manual override when state has
# become inconsistent with what's actually on disk / in the DB.
self.force_redownload = force_redownload
def run(self) -> None:
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -273,11 +316,20 @@ class AchSession:
state = _load_state(self.state_path)
unit_key = serial or self.peer # fall back to IP if no serial
unit_state = state.get(unit_key, {})
seen_keys: set[str] = set(unit_state.get("downloaded_keys", []))
# Highest event key ever downloaded from this unit (hex string, 8 chars).
# Used to detect post-erase key reuse — see comment block above.
# downloaded_events is the v2 (key_hex → timestamp_iso) dict.
# Empty-string timestamps are migrated v1 entries — they force a
# one-time re-download because the (key, timestamp) compare always
# mismatches against any non-empty timestamp from a fresh 0C read.
seen_events: dict[str, str] = dict(unit_state.get("downloaded_events", {}))
max_seen_key: str = unit_state.get("max_downloaded_key", "00000000")
if self.force_redownload:
log.info(" --force-redownload-all set — ignoring %d cached "
"(key, timestamp) entries for this session",
len(seen_events))
seen_events = {}
# Walk the event index (browse-mode, no 5A) to get the actual current
# key list. The SUB 08 event_count field is a lifetime "total events
# ever recorded" counter that does NOT decrement on erase — confirmed
@@ -290,11 +342,10 @@ class AchSession:
log.warning(" list_event_keys failed: %s -- falling back to full download", exc)
device_keys = None
# Use the walk result as our authoritative current count.
current_count = len(device_keys) if device_keys is not None else 0
log.info(" Unit has %d stored event(s); %d key(s) previously downloaded",
current_count, len(seen_keys))
log.info(" Unit has %d stored event(s); %d (key, ts) entr(ies) previously downloaded",
current_count, len(seen_events))
if device_keys is not None and current_count == 0:
log.info(" [OK] No events on device -- nothing to download")
@@ -302,75 +353,29 @@ class AchSession:
return
if device_keys is not None:
# ── Post-erase detection ──────────────────────────────────────
# After the device memory is erased, new events start from key
# 01110000 again — the same keys we already downloaded. Detect
# this by comparing the device's current highest key against the
# historical maximum. If the device has rolled back below our
# high-water mark, its counter was reset and we must treat all
# its keys as new, regardless of what seen_keys contains.
# ── Post-erase detection (best-effort, key-only signal) ───────
# After erase the device's key counter resets to 01110000.
# If the device's current max key is below our high-water mark
# we know erase happened. This catches the cleanest case but
# does NOT catch erase-then-record-many-events (where the new
# max may climb past the old max). The (key, timestamp) check
# in get_events() is what handles those.
if device_keys and max_seen_key != "00000000":
max_device_key = max(device_keys) # lexicographic; safe because
# keys share the same 4-char prefix
max_device_key = max(device_keys)
if max_device_key < max_seen_key:
log.info(
" Post-erase reset detected: "
"device max key %s < historical max %s "
"-- treating all device keys as new",
"-- discarding stale (key, ts) state for this session",
max_device_key, max_seen_key,
)
seen_keys = set() # discard stale dedup info for this session
seen_events = {}
new_key_set = set(device_keys) - seen_keys
log.info(" Device has %d key(s): %d new, %d already seen",
len(device_keys), len(new_key_set), len(device_keys) - len(new_key_set))
if not new_key_set:
log.info(" [OK] All events already downloaded -- nothing to do")
# Refresh state timestamp; preserve max_seen_key unchanged.
state[unit_key] = {
"downloaded_keys": sorted(seen_keys | set(device_keys)),
"max_downloaded_key": max_seen_key,
"last_seen": datetime.datetime.now().isoformat(),
"serial": serial,
"peer": self.peer,
}
_save_state(self.state_path, state)
# ── Erase even when no new events (if requested) ──────────
# Blastware ACH always erases after every session — even when
# nothing new was downloaded. Without the erase the device
# still sees stored events in its memory and immediately
# retries the call-home, causing the looping we observed.
# Only erase when device actually has events stored; skip
# the erase if device_keys is empty (nothing to erase).
if self.clear_after_download and device_keys:
log.info(
" Clearing device memory (--clear-after-download, "
"no new events but device has %d stored)...",
len(device_keys),
)
try:
client.delete_all_events()
log.info(" [OK] Device memory cleared")
# Reset state so the next session starts fresh.
state[unit_key] = {
"downloaded_keys": [],
"max_downloaded_key": "00000000",
"last_seen": datetime.datetime.now().isoformat(),
"serial": serial,
"peer": self.peer,
}
_save_state(self.state_path, state)
except Exception as exc:
log.error(
" [WARN] Event deletion failed: %s -- events NOT cleared",
exc,
)
log.info("Session complete (no new events) -> %s", session_dir)
return
else:
new_key_set = None # unknown; proceed with full download
# Note: no early-exit "all already downloaded" short-circuit
# here. Without per-event timestamps we cannot tell whether
# device_keys ⊆ seen_events.keys() actually means we have
# those physical events. get_events() will read 0C on its
# skip path and decide per event.
# Apply max_events cap
# stop_idx: when we know the count from list_event_keys, use it as
@@ -388,27 +393,67 @@ class AchSession:
)
try:
# Pass `seen_events` (key → ISO timestamp) so the client can
# read 0C on its skip path and only skip 5A when the per-event
# timestamp matches what we already have on disk. When force_-
# redownload is set, seen_events was already cleared above.
#
# Filter out empty-string timestamps (legacy v1 entries) — the
# client's 0C-on-skip-path only trusts entries with a
# populated timestamp; otherwise it falls through to a full
# 5A download.
skip_dict = {k: ts for k, ts in seen_events.items() if ts}
all_events = client.get_events(
full_waveform=True,
stop_after_index=stop_idx,
skip_waveform_for_keys=seen_keys if seen_keys else None,
skip_waveform_for_events=skip_dict if skip_dict else None,
)
# Filter to events whose keys we haven't saved before.
# New events are those that came back with _a5_frames populated
# (= 5A actually ran on this session). Skipped events have
# _a5_frames = None because the client matched (key, timestamp)
# against skip_dict and bypassed 5A.
new_events = [
e for e in all_events
if e._waveform_key is None
or e._waveform_key.hex() not in seen_keys
if getattr(e, "_a5_frames", None)
]
skipped = len(all_events) - len(new_events)
log.info(" [OK] Downloaded %d event(s): %d new, %d skipped (already seen)",
log.info(" [OK] Walked %d event(s): %d downloaded, %d skipped (matched (key, ts) in state)",
len(all_events), len(new_events), skipped)
if skipped:
log.info(" (skipped %d already-downloaded event(s))", skipped)
# ── Persist event file + A5 sidecar to the waveform store ──
# Saves ride alongside the existing JSON dump so the on-disk
# event file and events.json reference the same set of events.
waveform_records: dict[str, dict] = {}
for ev in new_events:
if not ev._a5_frames:
continue
try:
rec = self.store.save(
ev,
serial=serial or "UNKNOWN",
a5_frames=ev._a5_frames,
)
if ev._waveform_key is not None:
waveform_records[ev._waveform_key.hex()] = rec
log.info(
" [WAVE] saved %s (%d bytes)",
rec["filename"], rec["filesize"],
)
except Exception as exc:
key_hex = ev._waveform_key.hex() if ev._waveform_key else "????????"
log.warning(
" [WARN] Waveform store save failed for %s: %s",
key_hex, exc,
)
if new_events:
_save_json(session_dir / "events.json", [_event_to_dict(e) for e in new_events])
_save_json(
session_dir / "events.json",
[_event_to_dict(e, waveform_records) for e in new_events],
)
for ev in new_events:
pv = ev.peak_values
@@ -467,7 +512,10 @@ class AchSession:
_session_start = datetime.datetime.now()
try:
_ev_ins, _ev_skip = self.db.insert_events(
new_events, serial=serial or self.peer, session_id=None
new_events,
serial=serial or self.peer,
session_id=None,
waveform_records=waveform_records,
)
_ml_ins, _ml_skip = self.db.insert_monitor_log(
new_monitor_entries, session_id=None
@@ -502,35 +550,64 @@ class AchSession:
)
# ── Update persistent state ───────────────────────────────────
# Include both triggered-event keys and monitor-log keys in the
# downloaded set so they are not re-processed on the next call-home.
current_event_keys = [
e._waveform_key.hex()
for e in all_events
if e._waveform_key is not None
]
current_monitor_keys = [e.key for e in new_monitor_entries]
current_keys = current_event_keys + current_monitor_keys
# Build a fresh (key → ISO timestamp) map from THIS session's
# results. For each event currently on the device, prefer the
# timestamp we just observed (from 0C); fall back to whatever
# was already in seen_events for that key (so we don't lose an
# entry just because get_events skipped it on the (key, ts)
# match path).
def _ts_iso(ev) -> str:
ts = getattr(ev, "timestamp", None)
if ts is None:
return ""
try:
return datetime.datetime(
ts.year, ts.month, ts.day,
ts.hour or 0, ts.minute or 0, ts.second or 0,
).isoformat()
except Exception:
return str(ts)
current_events_map: dict[str, str] = {}
for ev in all_events:
if ev._waveform_key is None:
continue
key_hex = ev._waveform_key.hex()
ts_iso = _ts_iso(ev) or seen_events.get(key_hex, "")
current_events_map[key_hex] = ts_iso
# Monitor-log entries don't have a 0C-style timestamp, but
# they DO have a start_time; use that so the monitor-log keys
# are properly entered into the (key, ts) map.
for ml in new_monitor_entries:
key_hex = ml.key
ts = ml.start_time
ts_iso = ts.isoformat() if ts else seen_events.get(key_hex, "")
# If a triggered event already populated this key, keep
# whichever has a non-empty timestamp.
if key_hex not in current_events_map or not current_events_map[key_hex]:
current_events_map[key_hex] = ts_iso
if erased_successfully:
# Device memory is clear. Reset downloaded_keys and the
# high-water mark so the next call-home starts fresh and
# doesn't mis-identify the recycled key 01110000 as "seen".
updated_keys = []
updated_events: dict[str, str] = {}
new_max_key = "00000000"
log.info(
" State reset after erase -- next session will download "
"from key 0 (device counter resets after erase)"
)
else:
# Normal (no erase): union of previously-seen + all keys on
# device now. Includes already-seen survivors so we never
# re-download them if the device somehow keeps old records.
updated_keys = sorted(set(seen_keys) | set(current_keys))
new_max_key = updated_keys[-1] if updated_keys else max_seen_key
# Merge: keep prior (key, ts) entries we still have evidence
# of (for survivors of any partial failure), plus this
# session's authoritative (key, ts) pairs.
updated_events = dict(seen_events)
updated_events.update(current_events_map)
new_max_key = (
max(updated_events.keys())
if updated_events else max_seen_key
)
state[unit_key] = {
"downloaded_keys": updated_keys,
"downloaded_events": updated_events,
"max_downloaded_key": new_max_key,
"last_seen": datetime.datetime.now().isoformat(),
"serial": serial,
@@ -592,7 +669,10 @@ def _device_info_to_dict(d: DeviceInfo) -> dict:
}
def _event_to_dict(e: Event) -> dict:
def _event_to_dict(
e: Event,
waveform_records: Optional[dict[str, dict]] = None,
) -> dict:
pv = e.peak_values
pi = e.project_info
peaks = {}
@@ -611,6 +691,11 @@ def _event_to_dict(e: Event) -> dict:
for ch, vals in e.raw_samples.items()
}
samples["__note__"] = "first 20 sample-sets only; see raw_rx.bin for full waveform"
rec: dict = {}
if waveform_records and e._waveform_key is not None:
rec = waveform_records.get(e._waveform_key.hex(), {}) or {}
return {
"timestamp": str(e.timestamp) if e.timestamp else None,
"project": pi.project if pi else None,
@@ -619,6 +704,9 @@ def _event_to_dict(e: Event) -> dict:
"sensor_location": pi.sensor_location if pi else None,
"peaks": peaks,
"raw_samples_preview": samples,
"blastware_filename": rec.get("filename"),
"blastware_filesize": rec.get("filesize"),
"a5_pickle_filename": rec.get("a5_pickle_filename"),
}
@@ -640,6 +728,7 @@ def serve(args: argparse.Namespace) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
state_path = output_dir / "ach_state.json"
db = SeismoDb(output_dir / "seismo_relay.db")
store = WaveformStore(output_dir / "waveforms")
server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
@@ -657,6 +746,7 @@ def serve(args: argparse.Namespace) -> None:
print(f" Max events per session: {max_ev if max_ev else 'unlimited'}")
print(f" Clear device after download: {'YES' if args.clear_after_download else 'no'}")
print(f" Restart monitoring after download: {'YES' if args.restart_monitoring else 'no'}")
print(f" Force re-download all (ignore state): {'YES' if args.force_redownload_all else 'no'}")
print(f"{'='*60}")
print(f"\n Point your test unit's ACEmanager call-home settings to:")
print(f" Remote Host: <this machine's LAN IP>")
@@ -694,8 +784,10 @@ def serve(args: argparse.Namespace) -> None:
max_events=max_ev,
state_path=state_path,
db=db,
store=store,
clear_after_download=args.clear_after_download,
restart_monitoring=args.restart_monitoring,
force_redownload=args.force_redownload_all,
)
t = threading.Thread(target=session.run, daemon=True, name=f"ach-{peer}")
t.start()
@@ -780,6 +872,17 @@ def parse_args() -> argparse.Namespace:
"This mirrors the standard Blastware ACH workflow."
),
)
p.add_argument(
"--force-redownload-all",
action="store_true",
default=False,
help=(
"Manual override: ignore ach_state.json's downloaded_events map "
"for this session and re-download every event currently on the "
"device, regardless of (key, timestamp) match. Useful when state "
"has become inconsistent with the on-disk waveform store / DB."
),
)
p.add_argument(
"--verbose", "-v",
action="store_true",
+244 -129
View File
@@ -449,7 +449,7 @@ class MiniMateClient:
proto.confirm_erase_all()
log.info("delete_all_events: erase confirmed — device memory cleared")
def get_events(self, full_waveform: bool = False, debug: bool = False, stop_after_index: Optional[int] = None, skip_waveform_for_keys: Optional[set] = None, extra_chunks_after_metadata: int = 1) -> list[Event]:
def get_events(self, full_waveform: bool = False, debug: bool = False, stop_after_index: Optional[int] = None, skip_waveform_for_keys: Optional[set] = None, skip_waveform_for_events: Optional[dict] = None, extra_chunks_after_metadata: int = 1) -> list[Event]:
"""
Download all stored events from the device using the confirmed
1E 0A 0C 5A 1F event-iterator protocol.
@@ -497,37 +497,24 @@ class MiniMateClient:
events: list[Event] = []
idx = 0
# Legacy bare-key skip set is deprecated: the device's key counter
# resets to 0x01110000 after every memory erase, so a key in this set
# cannot be trusted to identify the same physical event across erases.
# If a caller still passes it, log a warning and ignore — full
# downloads will run for every event so the bug never silently bites.
if skip_waveform_for_keys:
log.warning(
"get_events: skip_waveform_for_keys is deprecated and unsafe "
"(post-erase key reuse); ignoring %d entries. Use "
"skip_waveform_for_events={key: timestamp_iso} instead.",
len(skip_waveform_for_keys),
)
skip_evts: dict[str, str] = dict(skip_waveform_for_events or {})
while data8[4:8] != b"\x00\x00\x00\x00":
cur_key = key4 # key for this event's 0A/1E-arm/0C/5A calls
log.info("get_events: record %d key=%s", idx, cur_key.hex())
# Fast-advance path: if this key is already downloaded, skip
# 1E-arm/0C/POLL/5A entirely. Only 0A + 1F(browse) are needed
# to advance the device's internal pointer to the next event.
# This is identical to the browse-mode walk in count_events().
if skip_waveform_for_keys and cur_key.hex() in skip_waveform_for_keys:
log.debug("get_events: key=%s already seen -- fast-advance only", cur_key.hex())
try:
proto.read_waveform_header(cur_key)
except ProtocolError as exc:
log.warning(
"get_events: 0A failed for key=%s (skip path): %s -- stopping",
cur_key.hex(), exc,
)
break
try:
key4, data8 = proto.advance_event(browse=True)
except ProtocolError as exc:
log.warning(
"get_events: 1F failed for key=%s (skip path): %s -- stopping",
cur_key.hex(), exc,
)
break
idx += 1
if stop_after_index is not None and idx > stop_after_index:
break
continue
ev = Event(index=idx)
ev._waveform_key = cur_key
@@ -574,72 +561,96 @@ class MiniMateClient:
"get_events: 0C failed for key=%s: %s", cur_key.hex(), exc
)
# SUB 1F (download-arm) — send token=0xFE BEFORE POLL+5A to arm the
# device's bulk stream state machine. Cache the returned key as a
# fallback for loop iteration when 5A fails (see iteration block below).
# Confirmed from 4-2-26 capture frames 66-67 (1F before frames 68-73 POLL).
arm_key4: Optional[bytes] = None
try:
arm_key4, _ = proto.advance_event(browse=False) # arm 5A
log.info("get_events: 1F(download) — 5A armed, arm_key=%s", arm_key4.hex())
except ProtocolError as exc:
log.warning("get_events: 1F(download) arm failed: %s", exc)
# ── Skip-5A decision based on (key, timestamp) match ──────
# If skip_waveform_for_events maps cur_key.hex() to a non-empty
# ISO timestamp matching what we just read from 0C, this is
# the same physical event we already have on disk — bypass
# the 1F(arm)+POLL+5A bulk download. Otherwise (no entry, or
# timestamp mismatch indicating post-erase reuse) fall through
# to the full download.
expected_ts = skip_evts.get(cur_key.hex(), "")
actual_ts = _event_timestamp_iso(ev)
skip_5a = bool(expected_ts and actual_ts and expected_ts == actual_ts)
if skip_5a:
log.info(
"get_events: key=%s (key, ts=%s) match — skipping 5A bulk download",
cur_key.hex(), actual_ts,
)
# POLL × 3 — BW sends 3 full POLL cycles between 1F and 5A.
# Confirmed from 4-2-26 BW TX capture (frames 68-73 before 5A at 74).
log.info("get_events: POLL × 3 before 5A")
for _p in range(3):
arm_key4: Optional[bytes] = None
a5_ok = False
if not skip_5a:
# SUB 1F (download-arm) — send token=0xFE BEFORE POLL+5A to arm the
# device's bulk stream state machine. Cache the returned key as a
# fallback for loop iteration when 5A fails (see iteration block below).
# Confirmed from 4-2-26 capture frames 66-67 (1F before frames 68-73 POLL).
try:
proto.poll()
arm_key4, _ = proto.advance_event(browse=False) # arm 5A
log.info("get_events: 1F(download) — 5A armed, arm_key=%s", arm_key4.hex())
except ProtocolError as exc:
log.warning("get_events: POLL %d failed: %s", _p, exc)
log.warning("get_events: 1F(download) arm failed: %s", exc)
# POLL × 3 — BW sends 3 full POLL cycles between 1F and 5A.
# Confirmed from 4-2-26 BW TX capture (frames 68-73 before 5A at 74).
log.info("get_events: POLL × 3 before 5A")
for _p in range(3):
try:
proto.poll()
except ProtocolError as exc:
log.warning("get_events: POLL %d failed: %s", _p, exc)
# SUB 5A — bulk waveform stream (uses cur_key, the event set up by 0A+1E+0C).
# By default (full_waveform=False): stop after frame 7 for metadata only.
# When full_waveform=True: fetch all chunks and decode raw ADC samples.
a5_ok = False
try:
if full_waveform:
log.info(
"get_events: 5A full waveform download for key=%s", cur_key.hex()
)
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=False, max_chunks=128,
include_terminator=True,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
_decode_a5_waveform(a5_frames, ev)
#
# Bypassed when skip_5a is True — the event is left with
# _a5_frames=None, which signals to the caller (e.g.
# ach_server.py) that this event was matched by (key, ts) and
# already has a stored .file in the persistent waveform store.
if not skip_5a:
try:
if full_waveform:
log.info(
"get_events: 5A decoded %d sample-sets",
len((ev.raw_samples or {}).get("Tran", [])),
"get_events: 5A full waveform download for key=%s", cur_key.hex()
)
else:
log.info(
"get_events: 5A metadata-only download for key=%s", cur_key.hex()
)
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=True,
include_terminator=True,
extra_chunks_after_metadata=extra_chunks_after_metadata,
max_chunks=128,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
log.debug(
"get_events: 5A metadata client=%r operator=%r",
ev.project_info.client if ev.project_info else None,
ev.project_info.operator if ev.project_info else None,
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=False, max_chunks=128,
include_terminator=True,
)
except ProtocolError as exc:
log.warning(
"get_events: 5A failed for key=%s: %s — metadata unavailable",
cur_key.hex(), exc,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
_decode_a5_waveform(a5_frames, ev)
log.info(
"get_events: 5A decoded %d sample-sets",
len((ev.raw_samples or {}).get("Tran", [])),
)
else:
log.info(
"get_events: 5A metadata-only download for key=%s", cur_key.hex()
)
a5_frames = proto.read_bulk_waveform_stream(
cur_key, stop_after_metadata=True,
include_terminator=True,
extra_chunks_after_metadata=extra_chunks_after_metadata,
max_chunks=128,
)
if a5_frames:
a5_ok = True
ev._a5_frames = a5_frames # store for write_blastware_file
_decode_a5_metadata_into(a5_frames, ev)
log.debug(
"get_events: 5A metadata client=%r operator=%r",
ev.project_info.client if ev.project_info else None,
ev.project_info.operator if ev.project_info else None,
)
except ProtocolError as exc:
log.warning(
"get_events: 5A failed for key=%s: %s — metadata unavailable",
cur_key.hex(), exc,
)
# SUB 1F — loop iteration.
#
@@ -652,7 +663,14 @@ class MiniMateClient:
# Confirmed from 4-3-26 browse-mode captures: browse=True params
# are correct for multi-event iteration. Conditional logic added
# 2026-04-06 to avoid post-failure state disruption.
if a5_ok:
#
# NEW 2026-05-06: when skip_5a=True we never entered the 5A
# state at all (we read 0A+1E(arm)+0C and chose to bypass).
# 1F(browse) is safe in this scenario — the device's iteration
# pointer is independent of the bulk-stream state machine, and
# we never put it into the half-attempted 5A state that the
# earlier "post-failure 1F disruption" warning is about.
if skip_5a or a5_ok:
# 5A succeeded — use browse 1F for reliable key advancement.
try:
key4, data8 = proto.advance_event(browse=True)
@@ -1174,6 +1192,27 @@ class MiniMateClient:
# Pure functions: bytes → model field population.
# Kept here (not in models.py) to isolate protocol knowledge from data shapes.
def _event_timestamp_iso(event: Event) -> str:
"""
Return a stable ISO-8601 string for the event's 0C-derived timestamp,
or "" if the event has no timestamp populated.
The format intentionally matches what `bridges/ach_server.py` writes
into `ach_state.json:downloaded_events[*]` so the (key, ts) compare
in get_events()'s skip path is a simple string equality.
"""
ts = getattr(event, "timestamp", None)
if ts is None:
return ""
try:
return datetime.datetime(
ts.year, ts.month, ts.day,
ts.hour or 0, ts.minute or 0, ts.second or 0,
).isoformat()
except Exception:
return str(ts)
def _decode_serial_number(data: bytes) -> DeviceInfo:
"""
Decode SUB EA (SERIAL_NUMBER_RESPONSE) payload into a new DeviceInfo.
@@ -1323,29 +1362,36 @@ def _decode_waveform_record_into(data: bytes, event: Event) -> None:
Modifies event in-place.
"""
# ── Record type ───────────────────────────────────────────────────────────
# Decoded from byte[1] (sub_code) first so we can gate timestamp parsing.
# ── Record type + format detection ────────────────────────────────────────
# `record_type` is the user-facing label ("Waveform" for any triggered
# event regardless of timestamp-header layout). `fmt` is the internal
# format code used to pick the right Timestamp parser; it stays
# internal and doesn't leak to the API / sidecar / UI.
try:
event.record_type = _extract_record_type(data)
except Exception as exc:
log.warning("waveform record type decode failed: %s", exc)
fmt = _detect_record_format(data)
# ── Timestamp ─────────────────────────────────────────────────────────────
# 9-byte format for sub_code=0x10 Waveform records:
# [day][sub_code][month][year:2 BE][unknown][hour][min][sec]
# sub_code=0x10 and sub_code=0x03 have different timestamp byte layouts.
# Both confirmed against Blastware event reports (BE11529, 2026-04-01 and 2026-04-03).
if event.record_type == "Waveform":
# Three timestamp-header layouts have been observed across BE11529
# firmware S338.17 — each picks a different Timestamp parser:
# "single_shot": 9-byte [day][0x10][month][year:2][unk][h][m][s]
# "continuous": 10-byte [0x10][day][0x10][month][year:2][unk][h][m][s]
# "short": 8-byte [day][month][year:2][unk][h][m][s]
# All decoded into the same Timestamp dataclass — only the byte
# offsets differ.
if fmt == "single_shot":
try:
event.timestamp = Timestamp.from_waveform_record(data)
except Exception as exc:
log.warning("waveform record timestamp decode failed: %s", exc)
elif event.record_type == "Waveform (Continuous)":
log.warning("single_shot record timestamp decode failed: %s", exc)
elif fmt == "continuous":
try:
event.timestamp = Timestamp.from_continuous_record(data)
except Exception as exc:
log.warning("continuous record timestamp decode failed: %s", exc)
elif event.record_type == "Waveform (Short)":
elif fmt == "short":
try:
event.timestamp = Timestamp.from_short_record(data)
except Exception as exc:
@@ -1523,46 +1569,109 @@ def _decode_a5_waveform(
log.warning("_decode_a5_waveform: STRT record truncated (%dB)", len(strt))
return
total_samples = struct.unpack_from(">H", strt, 8)[0]
pretrig_samples = struct.unpack_from(">H", strt, 16)[0]
rectime_seconds = strt[18]
# STRT byte layout (21 bytes; verified against M529LIY6 reference files
# and re-confirmed against live BE11529 captures, 2026-05-08):
# [0:4] b'STRT'
# [4:6] 0xff 0xfe sentinel
# [6:10] end_key 4-byte BE flash address where event ends
# [10:14] start_key 4-byte BE flash address where event starts
# [14:18] device-specific (semantics not pinned; values vary across events
# and don't hold authoritative total_samples / pretrig)
# [18] 0x46 record-type marker (NOT rectime)
# [19] device-specific
# [20] sometimes rectime, sometimes 0 — not reliable
#
# AUTHORITATIVE values must come from compliance_config (sample_rate,
# record_time) and from end_offset - start_offset arithmetic (event size).
# Earlier code claimed STRT[8:10]=total_samples and STRT[16:18]=pretrig;
# those positions actually overlap end_key low-word and dev-specific bytes
# respectively. We surface the address-derived event size so consumers
# can sanity-check chunk-loop bounds, but `total_samples` per channel must
# be derived externally (sample_rate × record_time, or computed from the
# decoded sample count below).
end_key = strt[6:10]
start_key = strt[10:14]
end_offset_in_strt = (end_key[2] << 8) | end_key[3]
start_offset_in_strt = (start_key[2] << 8) | start_key[3]
is_event_1 = (start_offset_in_strt == 0x0000)
event.total_samples = total_samples
event.pretrig_samples = pretrig_samples
event.rectime_seconds = rectime_seconds
# Don't trust STRT for these — leave them as None so the caller can
# backfill from compliance_config (the authoritative source).
event.total_samples = None
event.pretrig_samples = None
event.rectime_seconds = None
log.debug(
"_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds",
total_samples, pretrig_samples, rectime_seconds,
"_decode_a5_waveform: STRT start_key=%s end_key=%s "
"start_off=0x%04X end_off=0x%04X is_event_1=%s "
"dev-specific[14:18]=%s strt[20]=0x%02X",
start_key.hex(), end_key.hex(),
start_offset_in_strt, end_offset_in_strt, is_event_1,
strt[14:18].hex(), strt[20],
)
# ── Collect per-frame waveform bytes with global offset tracking ─────────
# global_offset is the cumulative byte count across all frames, used to
# compute the channel alignment at each frame boundary.
#
# Frame layout under the v0.14.0+ walk:
# frames_data[0] = probe response (page_addr 0x0000;
# contains STRT + post-STRT data)
# frames_data[1..2] = (event 1 only) metadata pages
# page_addr = 0x1002 / 0x1004
# frames_data[mid] = sample chunks at flash addresses
# 0x0600, 0x0800, … (page_addr in
# {0x0600..0x1FFE})
# frames_data[last] = TERM response (page_key=0x0000)
#
# We identify metadata pages by their PAGE ADDRESS at db.data[4:6] (the
# 2-byte counter the device echoes back), NOT by content scan. An earlier
# needle-based detection (b"Project:", b"Client:", etc.) was the wrong
# layer of abstraction:
# • The actual metadata pages 0x1002 / 0x1004 do NOT contain ASCII
# project strings on this firmware (S338.17 / BE11529).
# • The strings physically live at flash address 0x1600 — which falls
# inside the sample-chunk address range. Skipping that frame would
# drop a real sample chunk.
# BW handles the "samples region happens to contain string bytes" case
# by just rendering the bytes verbatim; we do the same.
_METADATA_PAGES = (b"\x10\x02", b"\x10\x04")
chunks: list[tuple[int, bytes]] = [] # (frame_idx, waveform_bytes)
global_offset = 0
for fi, db in enumerate(frames_data):
page_addr = db.data[4:6] if len(db.data) >= 6 else b""
w = db.data[7:] # frame.data[7:]
# A5[0]: waveform begins after the 21-byte STRT record and 6-byte preamble.
# Layout: STRT(21B) + null-pad(2B) + 0xFF sentinel(4B) = 27 bytes total.
# A5[0]: probe response. Two cases:
# - Event 1 (start_offset_in_strt == 0x0000): the bytes after STRT
# are the device's *pre-event reserved area* (flash 0x0046 to
# 0x0600), NOT samples. We must skip them; samples begin at
# the first dedicated chunk frame at counter=0x0600.
# - Event N (continuation, start_offset != 0x0000): the bytes after
# the STRT record ARE the first slice of real samples for the
# event (BW's chunk loop addresses the probe as a sample chunk).
if fi == 0:
sp = w.find(b"STRT")
if sp < 0:
continue
if is_event_1:
# No usable samples in the probe — pre-event reserved bytes.
continue
# Layout: STRT(21B) + null-pad(2B) + 0xFF sentinel(4B) = 27 bytes total.
wave = w[sp + 27 :]
# Frame 7 carries event-time metadata strings ("Project:", "Client:", …)
# and no waveform ADC data.
elif fi == 7:
# Skip the dedicated metadata pages (event 1 only): page_addr 0x1002 / 0x1004.
elif page_addr in _METADATA_PAGES:
log.debug(
"_decode_a5_waveform: skipping metadata page fi=%d page_addr=%s",
fi, page_addr.hex(),
)
continue
# Terminator frames have page_key=0x0000 and are excluded upstream
# (read_bulk_waveform_stream returns early on page_key==0).
# No hardcoded frame-index skip here — all non-metadata frames are data.
# Sample chunk (or TERM): strip the 8-byte per-frame header.
else:
# Strip the 8-byte per-frame header (ctr + 6 zero bytes)
if len(w) < 8:
continue
wave = w[8:]
@@ -1576,10 +1685,8 @@ def _decode_a5_waveform(
total_bytes = global_offset
n_sets = total_bytes // 8
log.debug(
"_decode_a5_waveform: %d chunks, %dB total → %d complete sample-sets "
"(%d of %d expected; %.0f%%)",
len(chunks), total_bytes, n_sets, n_sets, total_samples,
100.0 * n_sets / total_samples if total_samples else 0,
"_decode_a5_waveform: %d chunks, %dB total → %d complete sample-sets",
len(chunks), total_bytes, n_sets,
)
if n_sets == 0:
@@ -1637,7 +1744,7 @@ def _decode_a5_waveform(
"Tran": tran,
"Vert": vert,
"Long": long_,
"Mic": mic,
"MicL": mic,
}
@@ -1685,22 +1792,30 @@ def _detect_record_format(data: bytes) -> Optional[str]:
def _extract_record_type(data: bytes) -> Optional[str]:
"""
Return a human-readable name for the waveform record format detected
in the first bytes of a 210-byte 0C record.
Return a user-facing name for a waveform record. All three internal
timestamp-header layouts represent the *same* user concept a
triggered seismic event so they all surface as just "Waveform".
Maps to the format codes returned by _detect_record_format():
"single_shot" "Waveform"
"continuous" "Waveform (Continuous)"
"short" "Waveform (Short)"
None "Unknown(XX.YY.ZZ)"
The internal format code is preserved for parsing logic (timestamp
decoder selection) but doesn't leak into the API / UI / sidecar.
Callers that need the raw layout can call `_detect_record_format`
directly.
Background: across BE11529 firmware S338.17 we've observed three
different byte layouts for the timestamp header at the start of the
0C record (8 / 9 / 10 bytes, distinguished by the position of the
BE-encoded year and the presence of `0x10` marker bytes). An older
revision of this code labelled them "Waveform" / "Waveform
(Continuous)" / "Waveform (Short)", which created the false
impression that there were three distinct event "types" the user
could configure. In reality the user only ever picks Single Shot
vs Continuous vs Histogram in the compliance config the byte
layout is a firmware-internal detail that doesn't always correlate
with that choice.
"""
fmt = _detect_record_format(data)
if fmt == "single_shot":
if fmt in ("single_shot", "continuous", "short"):
return "Waveform"
if fmt == "continuous":
return "Waveform (Continuous)"
if fmt == "short":
return "Waveform (Short)"
if len(data) >= 3:
log.warning(
"_extract_record_type: unrecognized header: data[0:3]=%02X %02X %02X",
+518
View File
@@ -0,0 +1,518 @@
"""
minimateplus/event_file_io.py modern event-file (.sfm.json sidecar) IO.
This module is the single home for event-file conversion code that doesn't
fit cleanly inside `blastware_file.py` (which is the BW binary codec):
- sidecar JSON read/write (the modern per-event metadata file)
- read_blastware_file() reverse of write_blastware_file, used by
the BW-importer flow when SFM is ingesting files produced by
Blastware's own ACH (where the source A5 frames aren't available).
Sidecar schema v1 layout see docs in the project plan or the schema
declared in `event_to_sidecar_dict()`.
"""
from __future__ import annotations
import datetime
import hashlib
import json
import logging
import os
import struct
from pathlib import Path
from typing import Optional, Union
from .models import Event, PeakValues, ProjectInfo, Timestamp
from . import blastware_file as _bw # avoid circular reference at module load
log = logging.getLogger(__name__)
# Schema version for the sidecar JSON. Bump when fields change shape.
# Older readers must reject anything > SCHEMA_VERSION; newer fields added
# inside `extensions` are forward-compatible without a bump.
SCHEMA_VERSION = 1
SIDECAR_KIND = "sfm.event"
# Default tool_version stamp; callers can override. Hard-coded here
# rather than read via importlib.metadata because the latter reflects the
# *installed* dist-info, which doesn't update when pyproject.toml is
# bumped without a `pip install` re-run — leading to confusing stale
# version stamps in sidecars. Bump this constant and CHANGELOG.md
# together at release time.
TOOL_VERSION = "0.15.0"
try:
# Best-effort: prefer the installed metadata when it's NEWER than the
# baked-in constant (e.g. a downstream packager bumped the wheel
# without editing this file). Otherwise fall back to TOOL_VERSION.
from importlib.metadata import version as _pkg_version
_meta_v = _pkg_version("seismo-relay")
def _vtuple(s):
try:
return tuple(int(p) for p in s.split(".")[:3])
except Exception:
return (0, 0, 0)
_TOOL_VERSION_DEFAULT = (
_meta_v if _vtuple(_meta_v) > _vtuple(TOOL_VERSION) else TOOL_VERSION
)
except Exception:
_TOOL_VERSION_DEFAULT = TOOL_VERSION
# ── Sidecar dict construction ─────────────────────────────────────────────────
def _ts_iso(ts: Optional[Timestamp]) -> Optional[str]:
if ts is None:
return None
try:
return datetime.datetime(
ts.year, ts.month, ts.day,
ts.hour or 0, ts.minute or 0, ts.second or 0,
).isoformat()
except Exception:
return str(ts)
def _peak_values_to_dict(pv: Optional[PeakValues]) -> dict:
if pv is None:
return {
"transverse": None,
"vertical": None,
"longitudinal": None,
"vector_sum": None,
"mic_psi": None,
}
return {
"transverse": pv.tran,
"vertical": pv.vert,
"longitudinal": pv.long,
"vector_sum": pv.peak_vector_sum,
"mic_psi": pv.micl,
}
def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict:
if pi is None:
return {
"project": None,
"client": None,
"operator": None,
"sensor_location": None,
}
return {
"project": pi.project,
"client": pi.client,
"operator": pi.operator,
"sensor_location": pi.sensor_location,
}
def event_to_sidecar_dict(
event: Event,
*,
serial: str,
blastware_filename: str,
blastware_filesize: int,
blastware_sha256: str,
source_kind: str = "sfm-live",
a5_pickle_filename: Optional[str] = None,
tool_version: str = _TOOL_VERSION_DEFAULT,
captured_at: Optional[datetime.datetime] = None,
review: Optional[dict] = None,
extensions: Optional[dict] = None,
) -> dict:
"""
Build a v1 sidecar dict from an Event + the surrounding metadata.
Pure helper no file I/O. Callers stitch the result into a sidecar
via `write_sidecar()` (or POST it back via the PATCH endpoint).
"""
if source_kind not in {"sfm-live", "sfm-ach", "bw-import"}:
raise ValueError(f"unknown source_kind: {source_kind!r}")
captured_at = captured_at or datetime.datetime.utcnow()
return {
"schema_version": SCHEMA_VERSION,
"kind": SIDECAR_KIND,
"event": {
"serial": serial,
"timestamp": _ts_iso(event.timestamp),
"waveform_key": event._waveform_key.hex() if event._waveform_key else None,
"record_type": event.record_type,
"sample_rate": event.sample_rate,
"rectime_seconds": event.rectime_seconds,
"total_samples": event.total_samples,
"pretrig_samples": event.pretrig_samples,
},
"peak_values": _peak_values_to_dict(event.peak_values),
"project_info": _project_info_to_dict(event.project_info),
"blastware": {
"filename": blastware_filename,
"filesize": blastware_filesize,
"sha256": blastware_sha256,
"available": True,
},
"source": {
"kind": source_kind,
"captured_at": captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat(),
"tool_version": tool_version,
"a5_pickle_filename": a5_pickle_filename,
},
"review": review or {
"false_trigger": False,
"reviewer": None,
"reviewed_at": None,
"notes": "",
},
"extensions": extensions or {},
}
# ── Sidecar IO ────────────────────────────────────────────────────────────────
def write_sidecar(path: Union[str, Path], data: dict) -> None:
"""
Atomic write of a sidecar dict to <path>.
Validates schema_version is supported before writing so we don't
silently drop a future-format sidecar over the wire.
"""
path = Path(path)
sv = data.get("schema_version")
if not isinstance(sv, int) or sv < 1 or sv > SCHEMA_VERSION:
raise ValueError(
f"write_sidecar: unsupported schema_version={sv!r} "
f"(this build supports 1..{SCHEMA_VERSION})"
)
tmp = path.with_suffix(path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(data, f, indent=2, sort_keys=False, default=str)
f.write("\n")
f.flush()
os.fsync(f.fileno())
os.replace(tmp, path)
def read_sidecar(path: Union[str, Path]) -> dict:
"""
Load a sidecar JSON file.
Raises FileNotFoundError if missing, ValueError on bad shape /
unsupported schema_version. Unknown keys at the top level are
preserved in the returned dict (forward-compat).
"""
path = Path(path)
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
raise ValueError(f"sidecar at {path}: top-level is not a JSON object")
sv = data.get("schema_version")
if not isinstance(sv, int) or sv < 1:
raise ValueError(f"sidecar at {path}: missing or invalid schema_version")
if sv > SCHEMA_VERSION:
raise ValueError(
f"sidecar at {path}: schema_version={sv} > supported {SCHEMA_VERSION}; "
"upgrade seismo-relay to read this file"
)
if data.get("kind") != SIDECAR_KIND:
raise ValueError(f"sidecar at {path}: unexpected kind={data.get('kind')!r}")
return data
def patch_sidecar(
path: Union[str, Path],
*,
review: Optional[dict] = None,
extensions: Optional[dict] = None,
reviewer_now: bool = True,
) -> dict:
"""
Atomically apply a JSON-merge-patch to a sidecar file's `review`
and/or `extensions` blocks. Other top-level keys are untouched.
`review_now`: when True (default) and `review` is non-empty, stamps
`review.reviewed_at` with the current UTC time so the review-time is
auditable without the caller having to pass it.
Returns the new full sidecar dict.
"""
path = Path(path)
data = read_sidecar(path)
if review:
merged = dict(data.get("review") or {})
merged.update({k: v for k, v in review.items() if v is not None or k in merged})
if reviewer_now:
merged["reviewed_at"] = datetime.datetime.utcnow().isoformat() + "Z"
data["review"] = merged
if extensions:
merged_ext = dict(data.get("extensions") or {})
merged_ext.update(extensions)
data["extensions"] = merged_ext
write_sidecar(path, data)
return data
def sidecar_path_for(blastware_path: Union[str, Path]) -> Path:
"""Convention: <bw_path>.sfm.json sits next to the BW binary."""
p = Path(blastware_path)
return p.with_name(p.name + ".sfm.json")
def file_sha256(path: Union[str, Path], chunk_size: int = 65536) -> str:
"""Compute SHA-256 of a file as a hex string."""
h = hashlib.sha256()
with open(path, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
# ── Blastware-file reader ─────────────────────────────────────────────────────
#
# Reverse of `blastware_file.write_blastware_file`. Used by the BW-import
# flow to ingest files produced by Blastware's own ACH (where the source
# A5 frames are not available).
#
# File structure (recap):
# [22B header] [21B STRT record] [body bytes] [26B footer]
#
# The body holds:
# - 6B preamble (00 00 ff ff ff ff) immediately after the STRT
# - 4-channel interleaved int16 LE samples
# - Embedded ASCII metadata strings (Project: / Client: / User Name: /
# Seis Loc: / Extended Notes) from the device's session-start config
#
# The 0C waveform record (per-event peaks, project name) is NOT in the
# BW file — those are computed by the device firmware and only carried
# in the live SUB 0C response. read_blastware_file() therefore computes
# peaks from the raw samples assuming Normal-range (10 in/s full-scale)
# geophone sensitivity. Imported events surface that assumption via the
# sidecar's `peak_values.computed_from_samples` flag.
# Geophone scale factor, in/s per ADC unit, for Normal range (10 in/s FS).
# Confirmed from CLAUDE.md (geo_hardware_constant = 6.206053 in/s per V,
# ADC full-scale = 1.61133 V Normal range = 10.0 in/s peak; per-count
# resolution ≈ 10.0 / 32768).
_GEO_NORMAL_FS_INS = 10.0
_GEO_SENSITIVE_FS_INS = 1.250
_INT16_FS = 32768.0
# Microphone scale factor, psi per ADC count. Approximate — exact factor
# depends on the geophone-vs-mic ADC scaling and the firmware reference.
# We mark mic_psi as "computed approximate" in the sidecar.
_MIC_FS_PSI = 0.0125 / _INT16_FS # ~0.5 psi full-scale assumption
def _decode_strt(strt: bytes) -> dict:
"""
Decode the 21-byte STRT record from a BW file.
Returns dict with waveform_key (4B), total_samples, pretrig_samples,
rectime_seconds. Falls back to None on truncated/missing fields.
"""
if len(strt) < 21 or strt[0:4] != b"STRT":
return {}
return {
"waveform_key": strt[6:10].hex(),
"total_samples": struct.unpack_from(">H", strt, 8)[0],
"pretrig_samples": struct.unpack_from(">H", strt, 16)[0],
"rectime_seconds": strt[18],
}
def _find_first_string(buf: bytes, label: bytes, max_len: int = 256) -> Optional[str]:
"""
Search `buf` for `label` (e.g. b"Project:") and return the
null-terminated ASCII string that follows, stripped.
"""
pos = buf.find(label)
if pos < 0:
return None
start = pos + len(label)
end = buf.find(b"\x00", start, start + max_len)
if end < 0:
end = start + max_len
text = buf[start:end].decode("ascii", errors="replace").strip()
return text or None
def _decode_samples_4ch_int16_le(stream: bytes) -> dict[str, list[int]]:
"""
Decode a 4-channel interleaved int16 LE byte stream into per-channel
lists. Channels are [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3].
Truncates to a multiple of 8 bytes (one full sample-set).
"""
n_complete = (len(stream) // 8) * 8
if n_complete == 0:
return {"Tran": [], "Vert": [], "Long": [], "MicL": []}
fmt = "<" + "h" * (n_complete // 2)
flat = list(struct.unpack(fmt, stream[:n_complete]))
return {
"Tran": flat[0::4],
"Vert": flat[1::4],
"Long": flat[2::4],
"MicL": flat[3::4],
}
def _peaks_from_samples(samples: dict[str, list[int]]) -> PeakValues:
"""
Compute approximate peaks from raw int16 samples assuming Normal-range
geophone sensitivity. Used by the BW-importer when the 0C waveform
record (the device's authoritative peaks) is unavailable.
"""
def _peak_ins(ch: list[int]) -> float:
if not ch:
return 0.0
m = max(abs(int(v)) for v in ch)
return m / _INT16_FS * _GEO_NORMAL_FS_INS
tran = _peak_ins(samples.get("Tran", []))
vert = _peak_ins(samples.get("Vert", []))
long_ = _peak_ins(samples.get("Long", []))
# Mic in psi (approximate)
mic_ch = samples.get("MicL", []) or []
mic = max((abs(int(v)) for v in mic_ch), default=0) * _MIC_FS_PSI
# Peak vector sum: max over time of sqrt(T^2 + V^2 + L^2)
pvs = 0.0
n = min(len(samples.get("Tran", [])), len(samples.get("Vert", [])), len(samples.get("Long", [])))
if n:
scale = _GEO_NORMAL_FS_INS / _INT16_FS
T = samples["Tran"]; V = samples["Vert"]; L = samples["Long"]
for i in range(n):
t = T[i] * scale
v = V[i] * scale
l = L[i] * scale
mag = (t*t + v*v + l*l) ** 0.5
if mag > pvs:
pvs = mag
return PeakValues(
tran=tran, vert=vert, long=long_,
peak_vector_sum=pvs, micl=mic,
)
def read_blastware_file(path: Union[str, Path]) -> Event:
"""
Parse a Blastware waveform file into an Event.
Recovers:
- waveform_key, rectime_seconds, total_samples, pretrig_samples
(from the STRT record)
- timestamp (from the footer's start-time field)
- project_info (from ASCII labels embedded in the body)
- raw_samples (Tran/Vert/Long/MicL int16 lists)
- peak_values (computed from raw_samples; approximate see notes
on _peaks_from_samples about Normal-range assumption)
Does NOT recover the source A5 frames (they aren't in the BW file).
The returned Event has `_a5_frames = None`, signalling that
byte-for-byte regeneration of the BW file from this Event alone is
not possible the on-disk BW file IS the byte-for-byte source.
"""
path = Path(path)
raw = path.read_bytes()
if len(raw) < _bw._WAVEFORM_HEADER_SIZE + 21 + 26:
raise ValueError(f"{path}: file too short ({len(raw)} bytes) to be a BW event")
# Header: validate magic prefix.
header = raw[:_bw._WAVEFORM_HEADER_SIZE]
if not header.startswith(_bw._FILE_HEADER_PREFIX):
raise ValueError(f"{path}: not a Blastware file (bad header prefix)")
# STRT record: 21 bytes immediately after the header.
strt_raw = raw[_bw._WAVEFORM_HEADER_SIZE : _bw._WAVEFORM_HEADER_SIZE + 21]
strt_fields = _decode_strt(strt_raw)
if not strt_fields:
raise ValueError(f"{path}: STRT record missing or malformed")
# Footer: locate the 0e 08 marker, validating the year is in a sane range.
body_start = _bw._WAVEFORM_HEADER_SIZE + 21
footer_pos = -1
pos = body_start
while True:
pos = raw.find(b"\x0e\x08", pos)
if pos < 0 or pos + 26 > len(raw):
break
yr = (raw[pos + 4] << 8) | raw[pos + 5]
if 2015 <= yr <= 2050:
footer_pos = pos
break
pos += 1
if footer_pos < 0 and len(raw) >= 26:
footer_pos = len(raw) - 26
if footer_pos < body_start:
raise ValueError(f"{path}: footer not found")
body = raw[body_start : footer_pos]
footer = raw[footer_pos : footer_pos + 26]
# Footer layout:
# [0:2] 0e 08 marker
# [2:10] ts1 (start) BE 8B
# [10:18] ts2 (stop) BE 8B
# [18:24] 00 01 00 02 00 00
# [24:26] crc
ts1 = _bw._decode_ts_be(footer[2:10])
ts2 = _bw._decode_ts_be(footer[10:18])
# Body: first 6 bytes are the preamble (00 00 ff ff ff ff). Strip
# them before decoding samples. Any trailing tail past the last
# full sample-set is silently truncated by _decode_samples_4ch.
sample_bytes = body[6:] if body[:6].hex() in ("0000ffffffff", "0000FFFFFFFF") else body
samples = _decode_samples_4ch_int16_le(sample_bytes)
# Metadata strings (label-anchored search across the body).
project = _find_first_string(body, b"Project:")
client = _find_first_string(body, b"Client:")
user = _find_first_string(body, b"User Name:")
seisloc = _find_first_string(body, b"Seis Loc:")
# Build the Event.
ev = Event(index=-1)
if strt_fields.get("waveform_key"):
ev._waveform_key = bytes.fromhex(strt_fields["waveform_key"])
ev.record_type = "Waveform"
ev.rectime_seconds = strt_fields.get("rectime_seconds")
ev.total_samples = strt_fields.get("total_samples")
ev.pretrig_samples = strt_fields.get("pretrig_samples")
if ts1 is not None:
ev.timestamp = Timestamp(
raw=footer[2:10],
flag=0x10,
year=ts1.year, unknown_byte=0, month=ts1.month, day=ts1.day,
hour=ts1.hour, minute=ts1.minute, second=ts1.second,
)
ev.project_info = ProjectInfo(
project=project, client=client, operator=user, sensor_location=seisloc,
)
ev.raw_samples = samples
ev.peak_values = _peaks_from_samples(samples)
ev._a5_frames = None # not recoverable from BW file
return ev
+4 -1
View File
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "seismo-relay"
version = "0.12.0"
version = "0.15.0"
description = "Python client and REST server for MiniMate Plus seismographs"
requires-python = ">=3.10"
dependencies = [
@@ -12,6 +12,9 @@ dependencies = [
"uvicorn[standard]>=0.24",
"pyserial>=3.5",
"sqlalchemy>=2.0",
"python-multipart>=0.0.7",
"h5py>=3.10",
"numpy>=1.24",
]
[tool.setuptools.packages.find]
+3
View File
@@ -2,3 +2,6 @@ fastapi
uvicorn
sqlalchemy
pyserial
python-multipart
h5py
numpy
+346
View File
@@ -0,0 +1,346 @@
"""
scripts/backfill_sidecars.py generate .sfm.json sidecars AND .h5
clean-waveform files for existing events already in the waveform store
that predate those features.
Walks `<store_root>/<serial>/<filename>` and for each BW event file:
Sidecar (.sfm.json):
- Skip when an existing sidecar's blastware.sha256 matches the
current BW file's sha256.
- Else regenerate: prefer .a5.pkl (full fidelity); fall back to
parsing the BW binary directly (peaks computed from samples).
Clean waveform (.h5):
- Skip when <filename>.h5 already exists (idempotent).
- Else write from .a5.pkl (preferred) or BW binary parse (fallback).
Usage:
python scripts/backfill_sidecars.py [--store-root PATH]
[--db-path PATH]
[--dry-run]
[--skip-hdf5]
[-v]
"""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
# Allow running from the repo root without installation.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from minimateplus import event_file_io
from sfm import event_hdf5
from sfm.waveform_store import WaveformStore, _frame_to_dict, _dict_to_frame # noqa: F401
from sfm.database import SeismoDb
log = logging.getLogger("backfill_sidecars")
def _looks_like_event_file(path: Path) -> bool:
"""Same heuristic as the importer CLI."""
if not path.is_file():
return False
if path.name.endswith((".a5.pkl", ".sfm.json")):
return False
ext = path.suffix.lstrip(".")
if not (3 <= len(ext) <= 4):
return False
if not (ext[-1].upper() in {"W", "H"} or ext.endswith("0")):
return False
try:
return path.stat().st_size >= 70
except OSError:
return False
def main(argv=None) -> int:
p = argparse.ArgumentParser(description=__doc__)
p.add_argument(
"--db-path",
default=str(Path(__file__).resolve().parent.parent / "bridges" / "captures" / "seismo_relay.db"),
)
p.add_argument("--store-root", default=None)
p.add_argument("--dry-run", action="store_true")
p.add_argument(
"--skip-hdf5", action="store_true",
help="Don't generate .h5 clean-waveform files (only sidecars).",
)
p.add_argument(
"--force", action="store_true",
help=(
"Regenerate sidecars + .h5 even when an existing sidecar's "
"blastware.sha256 matches the current BW file. Use this after "
"upgrading seismo-relay to pull in decoder bug fixes (e.g. the "
"STRT-rectime byte-offset fix in v0.15.x)."
),
)
p.add_argument("-v", "--verbose", action="store_true")
args = p.parse_args(argv)
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s %(message)s",
datefmt="%H:%M:%S",
)
db_path = Path(args.db_path).expanduser().resolve()
store_root = (
Path(args.store_root).expanduser().resolve()
if args.store_root else db_path.parent / "waveforms"
)
if not store_root.exists():
print(f"error: store root does not exist: {store_root}", file=sys.stderr)
return 2
store = WaveformStore(store_root)
db = SeismoDb(db_path)
written = skipped = errors = 0
for serial_dir in sorted(p for p in store_root.iterdir() if p.is_dir()):
serial = serial_dir.name
for path in sorted(serial_dir.iterdir()):
if not _looks_like_event_file(path):
continue
sidecar_path = store.sidecar_path_for(serial, path.name)
try:
bw_sha = event_file_io.file_sha256(path)
except Exception as exc:
log.error("sha256 failed for %s: %s", path, exc)
errors += 1
continue
# Skip when an up-to-date sidecar already exists.
#
# Two-part freshness check:
# 1. blastware.sha256 must match the current BW file (proves
# the sidecar describes THIS file).
# 2. source.tool_version must be ≥ current TOOL_VERSION (proves
# the sidecar was written by a build that includes any
# decoder fixes shipped since).
# Either part failing → regenerate. --force bypasses both.
if sidecar_path.exists() and not args.force:
try:
existing = event_file_io.read_sidecar(sidecar_path)
sha_ok = existing.get("blastware", {}).get("sha256") == bw_sha
src_ver = existing.get("source", {}).get("tool_version", "")
def _vt(s):
try:
return tuple(int(p) for p in str(s).split(".")[:3])
except Exception:
return (0, 0, 0)
ver_ok = _vt(src_ver) >= _vt(event_file_io.TOOL_VERSION)
if sha_ok and ver_ok:
skipped += 1
continue
if sha_ok and not ver_ok:
log.info(
"regenerating %s (sidecar tool_version=%s < current %s)",
sidecar_path.name, src_ver or "(none)",
event_file_io.TOOL_VERSION,
)
except Exception:
pass # fall through to rewrite
# Decide path: A5-based (high-fidelity) or BW-only.
a5_path = serial_dir / f"{path.name}.a5.pkl"
try:
if a5_path.exists():
frames = store.load_a5(serial, path.name)
if not frames:
raise RuntimeError("a5_pickle present but unreadable")
# Build an Event by replaying the A5 decoders. Note:
# the .a5.pkl alone CANNOT recover timestamp /
# record_type / waveform_key / per-channel peaks —
# those live in the 0C record, which isn't saved
# separately. We seed those from the DB row + the
# existing sidecar below so a re-backfill doesn't
# nuke fields the original save populated.
from minimateplus.client import (
_decode_a5_metadata_into,
_decode_a5_waveform,
)
from minimateplus.models import Event, PeakValues, ProjectInfo, Timestamp
ev = Event(index=-1)
_decode_a5_metadata_into(frames, ev)
_decode_a5_waveform(frames, ev)
source_kind = "sfm-live"
a5_filename = a5_path.name
else:
ev = event_file_io.read_blastware_file(path)
source_kind = "bw-import"
a5_filename = None
from minimateplus.models import Event, PeakValues, ProjectInfo, Timestamp
# ── Seed missing fields from the SeismoDb events row ──
# The DB row was populated at original save time with peaks,
# project info, timestamp, record_type, sample_rate, etc.
# All of those survive intact in SQLite; pull them onto the
# rebuilt Event so the regenerated sidecar matches what was
# there before the backfill ran.
db_row = None
try:
import sqlite3 as _sql
with _sql.connect(str(db.db_path)) as _conn:
_conn.row_factory = _sql.Row
db_row = _conn.execute(
"SELECT * FROM events "
"WHERE serial=? AND blastware_filename=? "
"LIMIT 1",
(serial, path.name),
).fetchone()
except Exception as exc:
log.debug("DB lookup failed for %s: %s", path.name, exc)
if db_row is not None:
if ev.sample_rate is None and db_row["sample_rate"]:
ev.sample_rate = int(db_row["sample_rate"])
if not ev.record_type and db_row["record_type"]:
ev.record_type = db_row["record_type"]
if ev._waveform_key is None and db_row["waveform_key"]:
try:
ev._waveform_key = bytes.fromhex(db_row["waveform_key"])
except Exception:
pass
# Timestamp from the ISO-8601 string in the DB row.
if ev.timestamp is None and db_row["timestamp"]:
try:
import datetime as _dt
_t = _dt.datetime.fromisoformat(db_row["timestamp"])
ev.timestamp = Timestamp(
raw=b"", flag=0x10,
year=_t.year, unknown_byte=0,
month=_t.month, day=_t.day,
hour=_t.hour, minute=_t.minute, second=_t.second,
)
except Exception:
pass
# Peaks from the DB row when the A5 decode didn't supply them.
if ev.peak_values is None:
ev.peak_values = PeakValues(
tran=db_row["tran_ppv"],
vert=db_row["vert_ppv"],
long=db_row["long_ppv"],
peak_vector_sum=db_row["peak_vector_sum"],
micl=db_row["mic_ppv"],
)
# Project info from the DB row when the A5 metadata-page
# decode didn't pick it up.
if ev.project_info is None or all(
v in (None, "")
for v in (
(ev.project_info.project if ev.project_info else None),
(ev.project_info.client if ev.project_info else None),
(ev.project_info.operator if ev.project_info else None),
(ev.project_info.sensor_location if ev.project_info else None),
)
):
ev.project_info = ProjectInfo(
project=db_row["project"],
client=db_row["client"],
operator=db_row["operator"],
sensor_location=db_row["sensor_location"],
)
# Derive total_samples when we have both rectime + sample_rate.
# The decoder's STRT-derived value can be a buffer offset
# rather than a sample count — drop it in that case.
if ev.sample_rate and ev.rectime_seconds:
derived = int(round(ev.sample_rate * ev.rectime_seconds))
if (ev.total_samples is None
or ev.total_samples > derived * 2
or ev.total_samples < derived // 4):
ev.total_samples = derived
# Preserve user-edited review state + extensions from the
# existing sidecar (false_trigger flag, notes, etc.) so a
# backfill never wipes them out.
preserved_review = None
preserved_ext = None
if sidecar_path.exists():
try:
_existing = event_file_io.read_sidecar(sidecar_path)
preserved_review = _existing.get("review")
preserved_ext = _existing.get("extensions")
except Exception:
pass
sidecar = event_file_io.event_to_sidecar_dict(
ev,
serial=serial,
blastware_filename=path.name,
blastware_filesize=path.stat().st_size,
blastware_sha256=bw_sha,
source_kind=source_kind,
a5_pickle_filename=a5_filename,
review=preserved_review,
extensions=preserved_ext,
)
# Also emit the .h5 clean-waveform file when missing OR when
# --force was passed (so a re-backfill picks up decoder fixes).
hdf5_path = store.hdf5_path_for(serial, path.name)
hdf5_filename = hdf5_path.name if hdf5_path.exists() else None
hdf5_action = "kept"
need_h5 = not args.skip_hdf5 and (args.force or not hdf5_path.exists())
if need_h5:
if args.dry_run:
hdf5_action = "would (re)write"
else:
try:
event_hdf5.write_event_hdf5(
hdf5_path, ev,
serial=serial,
geo_range="normal",
source_kind=source_kind,
)
hdf5_filename = hdf5_path.name
hdf5_action = "rewrote" if hdf5_path.exists() else "wrote"
except Exception as exc:
log.warning("HDF5 write failed for %s: %s", path.name, exc)
hdf5_action = "FAILED"
if args.dry_run:
print(f" [DRY ] would write {sidecar_path.name} "
f"+ .h5 ({hdf5_action}) source={source_kind}")
written += 1
continue
event_file_io.write_sidecar(sidecar_path, sidecar)
# Best-effort: keep the SQL row's sidecar_filename in sync
# by upserting via insert_events (it dedups on serial+ts).
try:
db.insert_events(
[ev], serial=serial,
waveform_records=(
{ev._waveform_key.hex(): {
"filename": path.name,
"filesize": path.stat().st_size,
"a5_pickle_filename": a5_filename,
"sidecar_filename": sidecar_path.name,
}}
if ev._waveform_key else None
),
)
except Exception as exc:
log.warning("DB upsert failed for %s: %s", path.name, exc)
print(f" [OK ] {path.name}{sidecar_path.name} "
f"+ h5 ({hdf5_action}) source={source_kind}")
written += 1
except Exception as exc:
log.error("backfill failed for %s: %s", path, exc, exc_info=args.verbose)
errors += 1
print(f"\nDone. written={written} skipped(uptodate)={skipped} errors={errors}")
return 0 if errors == 0 else 1
if __name__ == "__main__":
sys.exit(main())
+140 -11
View File
@@ -83,13 +83,24 @@ class CachedEvent(Base):
Events are immutable once recorded on the device; once we have an event in
the cache it never needs to be re-downloaded unless explicitly requested.
The two extra columns `waveform_key` and `event_timestamp` are an
integrity stamp: when set_event() / set_waveform() are called with a
different (waveform_key, event_timestamp) for the same (conn_key, index),
we know the device was erased and re-recorded the cached row no longer
refers to the same physical event and the entire device's cache is
flushed before the new entry is written. This catches the post-erase
key-reuse bug where the device's first new event (key 01110000) collides
with the first event we previously downloaded.
"""
__tablename__ = "cached_events"
conn_key = sa.Column(sa.String, primary_key=True)
index = sa.Column(sa.Integer, primary_key=True)
event_json = sa.Column(sa.Text, nullable=False) # serialised Event dict
cached_at = sa.Column(sa.Float, nullable=False) # Unix timestamp
conn_key = sa.Column(sa.String, primary_key=True)
index = sa.Column(sa.Integer, primary_key=True)
event_json = sa.Column(sa.Text, nullable=False) # serialised Event dict
cached_at = sa.Column(sa.Float, nullable=False) # Unix timestamp
waveform_key = sa.Column(sa.String, nullable=True) # 8-hex device key
event_timestamp = sa.Column(sa.String, nullable=True) # ISO-8601 from 0C
class CachedWaveform(Base):
@@ -97,14 +108,18 @@ class CachedWaveform(Base):
Full raw ADC waveform for a single event (SUB 5A full download).
These are large (up to several MB) and expensive to fetch over cellular.
Once downloaded they are immutable and cached permanently.
Once downloaded they are immutable and cached permanently but the
cache row is invalidated when the device is erased and a new event lands
at the same index (see CachedEvent docstring).
"""
__tablename__ = "cached_waveforms"
conn_key = sa.Column(sa.String, primary_key=True)
index = sa.Column(sa.Integer, primary_key=True)
waveform_json = sa.Column(sa.Text, nullable=False) # full /device/event/{idx}/waveform response JSON
cached_at = sa.Column(sa.Float, nullable=False)
conn_key = sa.Column(sa.String, primary_key=True)
index = sa.Column(sa.Integer, primary_key=True)
waveform_json = sa.Column(sa.Text, nullable=False) # full /device/event/{idx}/waveform response JSON
cached_at = sa.Column(sa.Float, nullable=False)
waveform_key = sa.Column(sa.String, nullable=True) # 8-hex device key
event_timestamp = sa.Column(sa.String, nullable=True) # ISO-8601 from 0C
class CachedMonitorStatus(Base):
@@ -149,6 +164,23 @@ class SFMCache:
engine = sa.create_engine(url, connect_args={"check_same_thread": False})
Base.metadata.create_all(engine)
self._Session = orm.sessionmaker(bind=engine)
# In-place schema migration: add the (waveform_key, event_timestamp)
# integrity-stamp columns to legacy cache DBs that predate the
# post-erase eviction logic. ALTER TABLE ADD COLUMN is idempotent
# via the column-presence check below.
with engine.begin() as conn:
for table in ("cached_events", "cached_waveforms"):
cols = {
r[1]
for r in conn.exec_driver_sql(f"PRAGMA table_info({table})").fetchall()
}
for new_col, ddl in (
("waveform_key", "TEXT"),
("event_timestamp", "TEXT"),
):
if new_col not in cols:
log.info("cache schema: %s ADD COLUMN %s %s", table, new_col, ddl)
conn.exec_driver_sql(f"ALTER TABLE {table} ADD COLUMN {new_col} {ddl}")
log.info("SFM cache opened: %s", db_path)
# ── Connection key ────────────────────────────────────────────────────────
@@ -242,15 +274,91 @@ class SFMCache:
row = s.get(CachedEvent, (conn_key, index))
return json.loads(row.event_json) if row else None
@staticmethod
def _event_signature(ev: dict) -> tuple[Optional[str], Optional[str]]:
"""
Extract the (waveform_key_hex, timestamp_iso) integrity stamp from
a serialised event dict. Either field may be None if the source
Event was missing it; the comparison logic in set_events/set_waveform
treats "both sides have a value AND they differ" as the only
eviction trigger, so partial data never spuriously flushes cache.
"""
key = ev.get("waveform_key") or ev.get("_waveform_key")
if isinstance(key, (bytes, bytearray)):
key = bytes(key).hex()
ts = ev.get("timestamp")
if isinstance(ts, dict):
# _serialise_timestamp returns a dict like {"iso": "...", ...}
ts = ts.get("iso") or ts.get("string") or None
return (key if isinstance(key, str) else None,
ts if isinstance(ts, str) else None)
def _maybe_flush_on_mismatch(
self,
s,
conn_key: str,
index: int,
new_key: Optional[str],
new_ts: Optional[str],
) -> bool:
"""
Check whether the cached entry at (conn_key, index) has a different
(waveform_key, timestamp) than the incoming one. If so, treat it as
a post-erase key-reuse signal and flush ALL cached events/waveforms
for this device, then return True.
Returns False when no flush was needed.
"""
if not new_key and not new_ts:
return False # nothing to compare against
existing = s.get(CachedEvent, (conn_key, index))
if existing is None:
existing = s.get(CachedWaveform, (conn_key, index))
if existing is None:
return False
old_key = existing.waveform_key
old_ts = existing.event_timestamp
# Only flush when both sides have populated values and they differ.
differs = (
(new_key and old_key and new_key != old_key)
or (new_ts and old_ts and new_ts != old_ts)
)
if not differs:
return False
log.warning(
"cache: device %s — index %d (key=%s, ts=%s) replaces (key=%s, ts=%s); "
"flushing all cached events/waveforms for this device "
"(post-erase key reuse detected)",
conn_key, index, new_key, new_ts, old_key, old_ts,
)
s.query(CachedEvent).filter_by(conn_key=conn_key).delete()
s.query(CachedWaveform).filter_by(conn_key=conn_key).delete()
return True
def set_events(self, conn_key: str, events: list[dict]) -> None:
"""
Upsert a list of event dicts. Existing rows are updated; new rows are
inserted. This is used to add newly-discovered events to the cache.
Eviction: if any incoming event has a different (waveform_key,
timestamp) than the row currently cached at the same index, we flush
the entire device's cache before inserting the new entries. Catches
post-erase key reuse where index 0 silently switches identity.
"""
now = time.time()
with self._Session() as s:
# Eviction check: scan incoming events for any (index, key, ts)
# that conflicts with a cached row. A single conflict triggers
# a full device-wide flush so we don't end up with a mixed-era
# cache.
for ev in events:
key, ts = self._event_signature(ev)
if self._maybe_flush_on_mismatch(s, conn_key, ev["index"], key, ts):
s.commit()
break # cache is now empty for this device; carry on
for ev in events:
idx = ev["index"]
key, ts = self._event_signature(ev)
row = s.get(CachedEvent, (conn_key, idx))
if row is None:
row = CachedEvent(
@@ -258,12 +366,18 @@ class SFMCache:
index=idx,
event_json=json.dumps(ev),
cached_at=now,
waveform_key=key,
event_timestamp=ts,
)
s.add(row)
log.debug("cached new event %d for %s", idx, conn_key)
else:
# Refresh in case project_info was backfilled after initial store
row.event_json = json.dumps(ev)
if key:
row.waveform_key = key
if ts:
row.event_timestamp = ts
s.commit()
# ── Waveforms ─────────────────────────────────────────────────────────────
@@ -278,8 +392,16 @@ class SFMCache:
return json.loads(row.waveform_json)
def set_waveform(self, conn_key: str, index: int, waveform: dict) -> None:
"""Store a full waveform response dict permanently."""
"""
Store a full waveform response dict permanently.
Like set_events, this checks the (waveform_key, timestamp) signature
of the incoming entry against what's currently cached at the same
index. A mismatch flushes the entire device's cache before insert.
"""
key, ts = self._event_signature(waveform)
with self._Session() as s:
self._maybe_flush_on_mismatch(s, conn_key, index, key, ts)
row = s.get(CachedWaveform, (conn_key, index))
if row is None:
row = CachedWaveform(
@@ -287,13 +409,20 @@ class SFMCache:
index=index,
waveform_json=json.dumps(waveform),
cached_at=time.time(),
waveform_key=key,
event_timestamp=ts,
)
s.add(row)
else:
row.waveform_json = json.dumps(waveform)
row.cached_at = time.time()
if key:
row.waveform_key = key
if ts:
row.event_timestamp = ts
s.commit()
log.debug("cached waveform for %s event %d", conn_key, index)
log.debug("cached waveform for %s event %d (key=%s, ts=%s)",
conn_key, index, key, ts)
# ── Monitor status ────────────────────────────────────────────────────────
+100 -2
View File
@@ -81,6 +81,10 @@ CREATE TABLE IF NOT EXISTS events (
sample_rate INTEGER,
record_type TEXT, -- "single_shot" | "continuous"
false_trigger INTEGER NOT NULL DEFAULT 0, -- 0=no, 1=yes (manual flag)
blastware_filename TEXT, -- event file within waveform store; extension is per-event (AB0T encodes timestamp)
blastware_filesize INTEGER, -- bytes; NULL if no event file saved
a5_pickle_filename TEXT, -- "<filename>.a5.pkl" sidecar
sidecar_filename TEXT, -- "<filename>.sfm.json" review/metadata sidecar
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
UNIQUE(serial, timestamp)
);
@@ -184,6 +188,21 @@ class SeismoDb:
""")
log.info("_migrate: events table rebuilt OK")
# Migration 1b: add Blastware-file columns to existing events tables.
# New columns are NULLable so old rows just read NULL.
existing_cols = {
r[1] for r in conn.execute("PRAGMA table_info(events)").fetchall()
}
for col, ddl in (
("blastware_filename", "TEXT"),
("blastware_filesize", "INTEGER"),
("a5_pickle_filename", "TEXT"),
("sidecar_filename", "TEXT"),
):
if col not in existing_cols:
log.info("_migrate: events ADD COLUMN %s %s", col, ddl)
conn.execute(f"ALTER TABLE events ADD COLUMN {col} {ddl}")
# Migration 2: change monitor_log UNIQUE from (serial, waveform_key) to
# (serial, start_time) — same reasoning as events.
row = conn.execute(
@@ -282,12 +301,24 @@ class SeismoDb:
*,
serial: str,
session_id: Optional[str] = None,
waveform_records: Optional[dict[str, dict]] = None,
) -> tuple[int, int]:
"""
Insert triggered events. Silently skips duplicates (serial+timestamp).
Returns (inserted, skipped).
``waveform_records`` (optional): dict keyed by event waveform_key (hex)
whose value is a record from ``WaveformStore.save()``:
{"filename": str, "filesize": int, "a5_pickle_filename": str}
For events whose key is in this dict, the matching columns are
populated. If a row with the same (serial, timestamp) already exists
(dedup hit), the matching waveform record is upserted onto the
existing row so a re-download via the live endpoint refreshes the
file metadata.
"""
inserted = skipped = 0
wave_recs = waveform_records or {}
with self._connect() as conn:
for ev in events:
key = ev._waveform_key.hex() if ev._waveform_key else None
@@ -307,6 +338,7 @@ class SeismoDb:
pv = ev.peak_values
pi = ev.project_info
rec = wave_recs.get(key) or {}
try:
conn.execute(
@@ -315,8 +347,10 @@ class SeismoDb:
(id, serial, waveform_key, session_id, timestamp,
tran_ppv, vert_ppv, long_ppv, peak_vector_sum, mic_ppv,
project, client, operator, sensor_location,
sample_rate, record_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
sample_rate, record_type,
blastware_filename, blastware_filesize,
a5_pickle_filename, sidecar_filename)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
self._new_id(), serial, key, session_id, ts,
@@ -331,16 +365,50 @@ class SeismoDb:
pi.sensor_location if pi else None,
ev.sample_rate,
ev.record_type,
rec.get("filename"),
rec.get("filesize"),
rec.get("a5_pickle_filename"),
rec.get("sidecar_filename"),
),
)
inserted += 1
except sqlite3.IntegrityError:
skipped += 1
# Upsert waveform fields onto the existing dedup row so a
# re-download via the live endpoint refreshes filename /
# size / sidecar without churning the rest of the row.
if rec and ts:
conn.execute(
"""
UPDATE events
SET blastware_filename = ?,
blastware_filesize = ?,
a5_pickle_filename = ?,
sidecar_filename = ?
WHERE serial = ? AND timestamp = ?
""",
(
rec.get("filename"),
rec.get("filesize"),
rec.get("a5_pickle_filename"),
rec.get("sidecar_filename"),
serial,
ts,
),
)
log.debug("insert_events serial=%s inserted=%d skipped=%d",
serial, inserted, skipped)
return inserted, skipped
def get_event(self, event_id: str) -> Optional[dict]:
"""Return one event row by id, or None."""
with self._connect() as conn:
row = conn.execute(
"SELECT * FROM events WHERE id = ?", (event_id,),
).fetchone()
return dict(row) if row else None
def query_events(
self,
serial: Optional[str] = None,
@@ -387,6 +455,36 @@ class SeismoDb:
)
return cur.rowcount > 0
def update_event_review(self, event_id: str, review: dict) -> bool:
"""
Sync derived index columns from a sidecar's `review` block.
Currently the only derived index is `events.false_trigger` kept
in sync so `/db/events?false_trigger=true` queries don't have to
scan every sidecar JSON on disk. The sidecar JSON itself remains
the source of truth for the full review state.
Returns True when the row exists, False otherwise. No-op fields
(review without `false_trigger`) leave the column untouched.
"""
if not isinstance(review, dict):
return False
if "false_trigger" not in review:
# Nothing derived to update; just confirm the row exists.
with self._connect() as conn:
row = conn.execute(
"SELECT 1 FROM events WHERE id=?", (event_id,),
).fetchone()
return row is not None
flag = 1 if review.get("false_trigger") else 0
with self._connect() as conn:
cur = conn.execute(
"UPDATE events SET false_trigger=? WHERE id=?",
(flag, event_id),
)
return cur.rowcount > 0
# ── Monitor log ───────────────────────────────────────────────────────────
def insert_monitor_log(
+530
View File
@@ -0,0 +1,530 @@
"""
sfm/event_hdf5.py HDF5 codec for the canonical "clean waveform" file.
Layout written to `<filename>.h5`:
/
samples/
Tran (float32, in/s) shape: (N,)
Vert (float32, in/s) shape: (N,)
Long (float32, in/s) shape: (N,)
MicL (float32, psi) shape: (N,)
samples_int16/ (optional)
Tran (int16, raw ADC counts) shape: (N,)
... per channel (only when present in the source)
root attrs (event metadata):
schema_version int = 1
kind str = "sfm.event.hdf5"
serial str
waveform_key str (8-hex)
timestamp str (ISO-8601)
record_type str
sample_rate int (sps)
pretrig_samples int
total_samples int
rectime_seconds float
geo_range str "normal" | "sensitive"
geo_full_scale_ips float (10.0 or 1.250)
project str
client str
operator str
sensor_location str
peak_tran_ips float (from 0C; authoritative)
peak_vert_ips float
peak_long_ips float
peak_pvs_ips float
peak_mic_psi float
tool_version str
captured_at str (ISO-8601 UTC)
source_kind str "sfm-live" | "sfm-ach" | "bw-import"
Why HDF5 and not just JSON for the canonical clean format:
- Native float32 arrays (no base64 dance, no per-value JSON parsing).
- Per-dataset gzip compression sample arrays compress 3-5×.
- Cross-language: h5py (Python), HDF5.jl (Julia), io.netcdf (R), etc.
Analysis pipelines don't have to know anything about Blastware.
- Self-describing via attributes; future fields don't break readers.
The plot-ready `sfm.plot.v1` JSON returned by the REST endpoints is
derived from this HDF5 (or computed on-the-fly when no .h5 exists yet).
"""
from __future__ import annotations
import datetime
import logging
from pathlib import Path
from typing import Optional, Union
import h5py
import numpy as np
from minimateplus.event_file_io import TOOL_VERSION as _DEFAULT_TOOL_VERSION
from minimateplus.models import Event
log = logging.getLogger(__name__)
SCHEMA_VERSION = 1
HDF5_KIND = "sfm.event.hdf5"
# Geophone full-scale velocity per range (in/s). Confirmed in CLAUDE.md
# from 4-20-26 captures: Normal=0x00 → 10 in/s, Sensitive=0x01 → 1.25 in/s.
_GEO_FS_BY_RANGE = {
"normal": 10.000,
"sensitive": 1.2500,
0: 10.000,
1: 1.2500,
}
_INT16_FS = 32768.0
# Default mic conversion: ADC count → psi. Approximate; exact factor
# depends on firmware reference voltage and mic sensitivity, neither of
# which is independently confirmed. We try to refine it from the device-
# reported peak when available (peak_mic_psi / max_abs_int16).
_MIC_DEFAULT_FS_PSI = 0.0125 # ≈ 0.5 psi at full scale (rough)
def _resolve_geo_full_scale(geo_range) -> float:
"""Map a geo_range value (string or int from compliance config) to the
full-scale velocity in in/s. Defaults to Normal range (10.0) when the
value is unknown same default as Blastware itself."""
if geo_range is None:
return _GEO_FS_BY_RANGE["normal"]
if isinstance(geo_range, str):
return _GEO_FS_BY_RANGE.get(geo_range.lower(), _GEO_FS_BY_RANGE["normal"])
return _GEO_FS_BY_RANGE.get(int(geo_range), _GEO_FS_BY_RANGE["normal"])
def _normalise_range(geo_range) -> str:
"""Return 'normal' or 'sensitive' (string) regardless of input form."""
if isinstance(geo_range, str):
v = geo_range.lower()
if v in ("normal", "sensitive"):
return v
return "normal"
if geo_range == 1:
return "sensitive"
return "normal"
def _ts_iso(ts) -> str:
if ts is None:
return ""
try:
return datetime.datetime(
ts.year, ts.month, ts.day,
ts.hour or 0, ts.minute or 0, ts.second or 0,
).isoformat()
except Exception:
return str(ts)
def _samples_to_float(
samples_int16: list[int],
full_scale: float,
) -> np.ndarray:
"""Convert int16 ADC counts → float32 physical units.
Uses _INT16_FS=32768 (not 32767) so that a count of -32768 maps to
exactly -full_scale and +32767 maps to ~+full_scale * 32767/32768.
Matches the device firmware's documented mapping (see CLAUDE.md
geo_hardware_constant rationale).
"""
if not samples_int16:
return np.array([], dtype=np.float32)
arr = np.asarray(samples_int16, dtype=np.int32) # int32 to avoid overflow during scale
return (arr.astype(np.float32) * (full_scale / _INT16_FS)).astype(np.float32)
def _mic_scale_factor(
samples_int16: list[int],
peak_mic_psi: Optional[float],
) -> float:
"""Resolve the per-count psi factor for the microphone channel.
When the device reports a peak mic value via the 0C record, we
back-solve the per-count factor from `peak_psi / max(|samples|)` so
the plotted waveform peaks land exactly at the device-reported value.
Otherwise fall back to the rough _MIC_DEFAULT_FS_PSI estimate.
"""
if peak_mic_psi is not None and peak_mic_psi > 0 and samples_int16:
max_count = max(abs(int(v)) for v in samples_int16) or 1
return float(peak_mic_psi) / float(max_count)
return _MIC_DEFAULT_FS_PSI / _INT16_FS
def write_event_hdf5(
path: Union[str, Path],
event: Event,
*,
serial: str,
geo_range = "normal",
source_kind: str = "sfm-live",
tool_version: Optional[str] = None,
captured_at: Optional[datetime.datetime] = None,
include_int16: bool = True,
) -> dict:
"""
Persist a decoded Event as an HDF5 file with samples in physical units.
Returns a small summary dict suitable for logging:
{"path": Path, "n_samples": int, "geo_full_scale_ips": float}
"""
path = Path(path)
raw = event.raw_samples or {}
pv = event.peak_values
pi = event.project_info
geo_fs = _resolve_geo_full_scale(geo_range)
geo_range_str = _normalise_range(geo_range)
captured_at = captured_at or datetime.datetime.utcnow()
tool_version = tool_version or _DEFAULT_TOOL_VERSION
# Per-channel float32 arrays in physical units.
geo_arrays = {}
for ch in ("Tran", "Vert", "Long"):
geo_arrays[ch] = _samples_to_float(raw.get(ch, []), geo_fs)
# Mic channel — the per-count factor is resolved from the device-reported
# peak when available so the plot peaks the BW value exactly.
mic_int16 = raw.get("MicL", [])
mic_factor = _mic_scale_factor(
mic_int16,
getattr(pv, "micl", None) if pv else None,
)
if mic_int16:
mic_arr = (np.asarray(mic_int16, dtype=np.int32).astype(np.float32) * mic_factor).astype(np.float32)
else:
mic_arr = np.array([], dtype=np.float32)
n_samples = max(
(len(geo_arrays[ch]) for ch in geo_arrays),
default=0,
)
# Atomic write: temp file + os.replace.
tmp = path.with_suffix(path.suffix + ".tmp")
with h5py.File(tmp, "w") as f:
# Root attrs — event-level metadata.
attrs = f.attrs
attrs["schema_version"] = SCHEMA_VERSION
attrs["kind"] = HDF5_KIND
attrs["serial"] = serial or ""
attrs["waveform_key"] = event._waveform_key.hex() if event._waveform_key else ""
attrs["timestamp"] = _ts_iso(event.timestamp)
attrs["record_type"] = event.record_type or ""
attrs["sample_rate"] = int(event.sample_rate or 0)
attrs["pretrig_samples"] = int(event.pretrig_samples or 0)
attrs["total_samples"] = int(event.total_samples or n_samples)
attrs["rectime_seconds"] = float(event.rectime_seconds or 0.0)
attrs["geo_range"] = geo_range_str
attrs["geo_full_scale_ips"] = float(geo_fs)
attrs["project"] = (pi.project if pi else "") or ""
attrs["client"] = (pi.client if pi else "") or ""
attrs["operator"] = (pi.operator if pi else "") or ""
attrs["sensor_location"] = (pi.sensor_location if pi else "") or ""
attrs["peak_tran_ips"] = float(pv.tran if pv and pv.tran is not None else 0.0)
attrs["peak_vert_ips"] = float(pv.vert if pv and pv.vert is not None else 0.0)
attrs["peak_long_ips"] = float(pv.long if pv and pv.long is not None else 0.0)
attrs["peak_pvs_ips"] = float(pv.peak_vector_sum if pv and pv.peak_vector_sum is not None else 0.0)
attrs["peak_mic_psi"] = float(pv.micl if pv and pv.micl is not None else 0.0)
attrs["tool_version"] = tool_version or ""
attrs["captured_at"] = captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat()
attrs["source_kind"] = source_kind
# /samples — physical-units float32 (the primary data).
sgrp = f.create_group("samples")
for ch, arr in geo_arrays.items():
sgrp.create_dataset(
ch, data=arr, dtype="float32",
compression="gzip", compression_opts=4, shuffle=True,
)
sgrp.create_dataset(
"MicL", data=mic_arr, dtype="float32",
compression="gzip", compression_opts=4, shuffle=True,
)
# /samples_int16 — optional raw ADC counts (preserved for analysis
# tools that want pre-conversion data). Cheap to include.
if include_int16:
igrp = f.create_group("samples_int16")
for ch in ("Tran", "Vert", "Long", "MicL"):
vals = raw.get(ch, [])
if vals:
igrp.create_dataset(
ch, data=np.asarray(vals, dtype=np.int16),
compression="gzip", compression_opts=4, shuffle=True,
)
igrp.attrs["mic_psi_per_count"] = float(mic_factor)
import os
os.replace(tmp, path)
log.info(
"write_event_hdf5: %s n_samples=%d geo_fs=%.3f filesize=%d",
path, n_samples, geo_fs, path.stat().st_size,
)
return {
"path": path,
"n_samples": n_samples,
"geo_full_scale_ips": geo_fs,
}
def read_event_hdf5(path: Union[str, Path]) -> dict:
"""
Load an event HDF5 into a plain dict (no Event reconstruction
callers that want an Event can use the data directly).
Returns:
{
"schema_version": int,
"kind": str,
"attrs": dict[str, ], # all root attributes
"samples": { # float32 lists in physical units
"Tran": ndarray, "Vert": ndarray, "Long": ndarray, "MicL": ndarray,
},
"samples_int16": {} or None,
"mic_psi_per_count": float | None,
}
Raises FileNotFoundError if missing, ValueError on bad shape /
unsupported schema_version.
"""
path = Path(path)
with h5py.File(path, "r") as f:
attrs = {k: _h5_attr_value(v) for k, v in f.attrs.items()}
sv = attrs.get("schema_version", 0)
if not isinstance(sv, int) or sv < 1 or sv > SCHEMA_VERSION:
raise ValueError(
f"{path}: unsupported HDF5 schema_version={sv} "
f"(this build supports 1..{SCHEMA_VERSION})"
)
if attrs.get("kind") != HDF5_KIND:
raise ValueError(f"{path}: kind != {HDF5_KIND!r} (got {attrs.get('kind')!r})")
samples = {}
for ch in ("Tran", "Vert", "Long", "MicL"):
ds = f.get(f"samples/{ch}")
samples[ch] = np.asarray(ds[()]) if ds is not None else np.array([], dtype=np.float32)
samples_int16 = None
mic_psi = None
igrp = f.get("samples_int16")
if igrp is not None:
samples_int16 = {}
for ch in ("Tran", "Vert", "Long", "MicL"):
ds = igrp.get(ch)
if ds is not None:
samples_int16[ch] = np.asarray(ds[()])
mic_attr = igrp.attrs.get("mic_psi_per_count")
if mic_attr is not None:
mic_psi = float(mic_attr)
return {
"schema_version": sv,
"kind": attrs.get("kind"),
"attrs": attrs,
"samples": samples,
"samples_int16": samples_int16,
"mic_psi_per_count": mic_psi,
}
def _h5_attr_value(v):
"""Convert an h5py attribute value to a plain Python type."""
if isinstance(v, bytes):
return v.decode("utf-8", errors="replace")
if isinstance(v, np.generic):
return v.item()
return v
# ── Plot-ready JSON ──────────────────────────────────────────────────────────
def event_to_plot_json(
event: Event,
*,
serial: str,
geo_range = "normal",
event_id: Optional[str] = None,
index: Optional[int] = None,
) -> dict:
"""
Build a `sfm.plot.v1` JSON dict directly from an Event (skipping HDF5).
Used by:
- `/device/event/{idx}/waveform` (live device path)
- The CLI / tests for in-memory conversion sanity-checks.
Stored events go through `plot_json_from_hdf5()` so the wire format
is identical regardless of whether the data came from the live device
or the on-disk HDF5.
"""
raw = event.raw_samples or {}
pv = event.peak_values
geo_fs = _resolve_geo_full_scale(geo_range)
geo_range_str = _normalise_range(geo_range)
sr = int(event.sample_rate or 0) or 1024
pretrig = int(event.pretrig_samples or 0)
geo_arrays = {ch: _samples_to_float(raw.get(ch, []), geo_fs).tolist()
for ch in ("Tran", "Vert", "Long")}
mic_int16 = raw.get("MicL", [])
mic_factor = _mic_scale_factor(
mic_int16,
getattr(pv, "micl", None) if pv else None,
)
mic_arr = [float(v) * mic_factor for v in mic_int16] if mic_int16 else []
n = max(
(len(geo_arrays[ch]) for ch in geo_arrays),
default=len(mic_arr),
)
return _build_plot_dict(
n_samples=n,
sample_rate=sr,
pretrig_samples=pretrig,
total_samples=int(event.total_samples or n),
rectime_seconds=float(event.rectime_seconds or 0.0),
timestamp_iso=_ts_iso(event.timestamp),
serial=serial,
record_type=event.record_type,
waveform_key=event._waveform_key.hex() if event._waveform_key else None,
geo_range=geo_range_str,
geo_fs=geo_fs,
channels_floats={
"Tran": geo_arrays["Tran"],
"Vert": geo_arrays["Vert"],
"Long": geo_arrays["Long"],
"MicL": mic_arr,
},
peaks_dict={
"tran": getattr(pv, "tran", None) if pv else None,
"vert": getattr(pv, "vert", None) if pv else None,
"long": getattr(pv, "long", None) if pv else None,
"pvs": getattr(pv, "peak_vector_sum", None) if pv else None,
"mic": getattr(pv, "micl", None) if pv else None,
},
event_id=event_id,
index=index if index is not None else event.index,
)
def plot_json_from_hdf5(
path: Union[str, Path],
*,
event_id: Optional[str] = None,
index: Optional[int] = None,
) -> dict:
"""Build a `sfm.plot.v1` JSON dict from a stored .h5 file."""
data = read_event_hdf5(path)
a = data["attrs"]
s = data["samples"]
return _build_plot_dict(
n_samples=len(s["Tran"]) if "Tran" in s else 0,
sample_rate=int(a.get("sample_rate", 1024) or 1024),
pretrig_samples=int(a.get("pretrig_samples", 0) or 0),
total_samples=int(a.get("total_samples", 0) or 0),
rectime_seconds=float(a.get("rectime_seconds", 0.0) or 0.0),
timestamp_iso=a.get("timestamp", ""),
serial=a.get("serial", ""),
record_type=a.get("record_type", ""),
waveform_key=a.get("waveform_key", "") or None,
geo_range=a.get("geo_range", "normal"),
geo_fs=float(a.get("geo_full_scale_ips", 10.0) or 10.0),
channels_floats={
"Tran": s.get("Tran", np.array([])).tolist(),
"Vert": s.get("Vert", np.array([])).tolist(),
"Long": s.get("Long", np.array([])).tolist(),
"MicL": s.get("MicL", np.array([])).tolist(),
},
peaks_dict={
"tran": float(a.get("peak_tran_ips", 0.0) or 0.0) or None,
"vert": float(a.get("peak_vert_ips", 0.0) or 0.0) or None,
"long": float(a.get("peak_long_ips", 0.0) or 0.0) or None,
"pvs": float(a.get("peak_pvs_ips", 0.0) or 0.0) or None,
"mic": float(a.get("peak_mic_psi", 0.0) or 0.0) or None,
},
event_id=event_id,
index=index,
)
def _build_plot_dict(
*,
n_samples: int,
sample_rate: int,
pretrig_samples: int,
total_samples: int,
rectime_seconds: float,
timestamp_iso: str,
serial: str,
record_type: Optional[str],
waveform_key: Optional[str],
geo_range: str,
geo_fs: float,
channels_floats: dict[str, list[float]],
peaks_dict: dict[str, Optional[float]],
event_id: Optional[str],
index: Optional[int] = None,
) -> dict:
dt_ms = (1000.0 / sample_rate) if sample_rate > 0 else 0.0
t0_ms = -pretrig_samples * dt_ms
def _ch(unit: str, values: list[float], peak: Optional[float]) -> dict:
# Locate the peak's time within the values array (max abs).
if values:
mags = [abs(v) for v in values]
i = mags.index(max(mags))
peak_t_ms = round(t0_ms + i * dt_ms, 4)
peak_value = peak if peak is not None else values[i]
else:
peak_t_ms = None
peak_value = peak
return {
"unit": unit,
"values": values,
"peak": peak_value,
"peak_t_ms": peak_t_ms,
}
return {
"schema": "sfm.plot.v1",
"event_id": event_id,
"index": index,
"serial": serial,
"timestamp": timestamp_iso,
"record_type": record_type,
"waveform_key": waveform_key,
"time_axis": {
"sample_rate": sample_rate,
"pretrig_samples": pretrig_samples,
"total_samples": total_samples or n_samples,
"n_samples": n_samples,
"t0_ms": round(t0_ms, 4),
"dt_ms": round(dt_ms, 6),
"rectime_seconds": rectime_seconds,
},
"geo_range": geo_range,
"geo_full_scale_ips": geo_fs,
"trigger_ms": 0.0,
"channels": {
"Tran": _ch("in/s", channels_floats.get("Tran", []), peaks_dict.get("tran")),
"Vert": _ch("in/s", channels_floats.get("Vert", []), peaks_dict.get("vert")),
"Long": _ch("in/s", channels_floats.get("Long", []), peaks_dict.get("long")),
"MicL": _ch("psi", channels_floats.get("MicL", []), peaks_dict.get("mic")),
},
"peak_values": {
"transverse": peaks_dict.get("tran"),
"vertical": peaks_dict.get("vert"),
"longitudinal": peaks_dict.get("long"),
"vector_sum": peaks_dict.get("pvs"),
"mic_psi": peaks_dict.get("mic"),
},
}
+194
View File
@@ -0,0 +1,194 @@
"""
sfm/import_bw.py CLI for ingesting Blastware-format event files.
Walks a path (file or directory), parses each recognised event-file
binary, copies it into the canonical waveform store, writes the
.sfm.json sidecar, and upserts a row in seismo_relay.db.
Use cases:
- Migrating a Blastware ACH inbox into SFM
- One-off imports of files emailed in by field crews
- Bulk-loading historical archives
Usage:
python -m sfm.import_bw <path-or-dir> [--serial BE11529]
[--db-path bridges/captures/seismo_relay.db]
[--store-root bridges/captures/waveforms]
[--dry-run]
[-v]
Examples:
python -m sfm.import_bw ~/Downloads/M529LKIQ.7M0W
python -m sfm.import_bw /path/to/blastware_archive --serial BE11529
"""
from __future__ import annotations
import argparse
import logging
import sys
from pathlib import Path
from typing import Iterator
# Allow running from the repo root without installation.
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from sfm.database import SeismoDb
from sfm.waveform_store import WaveformStore
log = logging.getLogger("sfm.import_bw")
# Blastware event-file extensions: 4-char `AB0T` (T = W or H) for ACH
# downloads, 3-char `AB0` for direct downloads. We discover candidates
# by length + last-char rather than enumerating every (A, B) pair.
def _looks_like_bw_event(path: Path) -> bool:
"""Heuristic: 3-char or 4-char extension, ends with W/H/0, and the
file is at least 70 bytes (header + STRT + footer minimum)."""
if not path.is_file():
return False
ext = path.suffix.lstrip(".")
if not (3 <= len(ext) <= 4):
return False
if not (ext[-1].upper() in {"W", "H"} or ext.endswith("0")):
return False
try:
return path.stat().st_size >= 70
except OSError:
return False
def _walk(path: Path) -> Iterator[Path]:
"""Yield candidate BW event-file paths under `path` (file or dir)."""
if path.is_file():
if _looks_like_bw_event(path):
yield path
return
if path.is_dir():
for p in sorted(path.rglob("*")):
if _looks_like_bw_event(p):
yield p
def main(argv: list[str] | None = None) -> int:
p = argparse.ArgumentParser(
description="Import Blastware-format event files into the SFM store + DB.",
)
p.add_argument("path", help="File or directory to import.")
p.add_argument(
"--serial", default=None, metavar="SERIAL",
help="Override the serial-number hint (e.g. BE11529). Defaults to "
"the value decoded from each BW filename's prefix.",
)
p.add_argument(
"--db-path",
default=str(Path(__file__).resolve().parent.parent / "bridges" / "captures" / "seismo_relay.db"),
help="Path to seismo_relay.db (default: bridges/captures/seismo_relay.db).",
)
p.add_argument(
"--store-root",
default=None,
help="Root of the waveform store (default: <db_dir>/waveforms).",
)
p.add_argument(
"--dry-run", action="store_true",
help="Parse and report per-file outcomes; don't write anything.",
)
p.add_argument("-v", "--verbose", action="store_true", help="Debug logging.")
args = p.parse_args(argv)
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s %(levelname)-7s %(name)s %(message)s",
datefmt="%H:%M:%S",
)
src = Path(args.path).expanduser().resolve()
if not src.exists():
print(f"error: {src} does not exist", file=sys.stderr)
return 2
db_path = Path(args.db_path).expanduser().resolve()
store_root = (
Path(args.store_root).expanduser().resolve()
if args.store_root else db_path.parent / "waveforms"
)
db = None if args.dry_run else SeismoDb(db_path)
store = None if args.dry_run else WaveformStore(store_root)
candidates = list(_walk(src))
if not candidates:
print(f"No BW event-file candidates found under {src}", file=sys.stderr)
return 1
print(f"Importing {len(candidates)} file(s) from {src}...")
if args.dry_run:
print("(dry-run — no writes will occur)")
ok = err = skipped = 0
for path in candidates:
try:
bw_bytes = path.read_bytes()
except Exception as exc:
print(f" [ERR ] {path}: read failed: {exc}")
err += 1
continue
if args.dry_run:
# Just parse to verify integrity; don't touch DB or store.
from minimateplus import event_file_io
try:
ev = event_file_io.read_blastware_file(path)
ts = ev.timestamp and (
f"{ev.timestamp.year}-{ev.timestamp.month:02d}-{ev.timestamp.day:02d} "
f"{ev.timestamp.hour:02d}:{ev.timestamp.minute:02d}:{ev.timestamp.second:02d}"
) or "?"
pv = ev.peak_values
pvs = pv.peak_vector_sum if pv and pv.peak_vector_sum is not None else 0.0
print(f" [OK ] {path.name} ts={ts} PVS={pvs:.4f}")
ok += 1
except Exception as exc:
print(f" [ERR ] {path}: parse failed: {exc}")
err += 1
continue
try:
ev, rec = store.save_imported_bw(
bw_bytes, source_path=path, serial_hint=args.serial,
)
# Resolve serial for the DB row. Prefer the hint, then the
# one decoded from the filename (already done by the store).
serial_used = args.serial or _infer_serial(path.name) or "UNKNOWN"
ins, sk = db.insert_events(
[ev], serial=serial_used,
waveform_records=(
{ev._waveform_key.hex(): rec}
if ev._waveform_key else None
),
)
tag = "OK " if ins else ("SKIP" if sk else "OK ")
print(f" [{tag}] {path.name}{rec['filename']} "
f"({rec['filesize']} B, sha256={rec['sha256'][:12]}…) "
f"serial={serial_used} ins={ins} skip={sk}")
if ins:
ok += 1
else:
skipped += 1
except Exception as exc:
print(f" [ERR ] {path}: import failed: {exc}")
log.debug("traceback", exc_info=True)
err += 1
print(f"\nDone. ok={ok} skipped={skipped} errors={err}")
return 0 if err == 0 else 1
def _infer_serial(filename: str):
"""Reuse WaveformStore's filename → serial decoder for log output."""
from sfm.waveform_store import _serial_from_bw_filename
return _serial_from_bw_filename(filename)
if __name__ == "__main__":
sys.exit(main())
+189
View File
@@ -0,0 +1,189 @@
"""
sfm/live_cache.py Thread-safe in-memory cache for live SFM device data.
Extracted from sfm/server.py so the cache logic is importable and testable
without pulling in fastapi/uvicorn.
Caching strategy
----------------
Keyed by `conn_key` ("tcp:host:port" or "serial:port:baud"). Does NOT
persist across server restarts.
device_info cached until POST /device/config marks it dirty
events cached by (conn_key, device_event_count); re-fetched when
a quick count_events() probe shows new events on the device
monitor_status 30-second TTL (changes frequently during monitoring)
waveforms permanent within a process but auto-evicted at the device
level when a (waveform_key, timestamp) mismatch is detected
at the same index (post-erase key reuse the device's
event-key counter resets to 0x01110000 after every erase,
so the same `(conn_key, index)` slot can refer to a
brand-new physical event).
All endpoints accept ?force=true to bypass the cache and re-read.
"""
from __future__ import annotations
import threading
import time
from typing import Optional
_MONITOR_STATUS_TTL = 30.0 # seconds
class LiveCache:
"""
Thread-safe in-memory cache for live SFM device data.
One singleton per server process.
"""
def __init__(self) -> None:
self._lock = threading.Lock()
self._device_info: dict[str, dict] = {}
self._events: dict[str, tuple[int, list]] = {}
self._monitor_status: dict[str, tuple[float, dict]] = {}
self._config_dirty: dict[str, bool] = {}
self._waveforms: dict[tuple, dict] = {}
# ── Connection key ────────────────────────────────────────────────────────
@staticmethod
def make_conn_key(
host: Optional[str],
tcp_port: int,
port: Optional[str],
baud: int,
) -> str:
if host:
return f"tcp:{host}:{tcp_port}"
return f"serial:{port}:{baud}"
# ── Eviction signature ────────────────────────────────────────────────────
@staticmethod
def _event_signature(ev: dict) -> tuple[Optional[str], Optional[str]]:
"""Return (waveform_key_hex, timestamp_iso) from a serialised event."""
key = ev.get("waveform_key") or ev.get("_waveform_key")
if isinstance(key, (bytes, bytearray)):
key = bytes(key).hex()
ts = ev.get("timestamp")
if isinstance(ts, dict):
ts = ts.get("iso") or ts.get("string") or None
return (key if isinstance(key, str) else None,
ts if isinstance(ts, str) else None)
def _flush_device(self, conn_key: str) -> None:
"""Drop all cached events + waveforms for one device. Caller holds lock."""
self._events.pop(conn_key, None)
stale_wf_keys = [k for k in self._waveforms if k[0] == conn_key]
for k in stale_wf_keys:
self._waveforms.pop(k, None)
# ── Device info ───────────────────────────────────────────────────────────
def get_device_info(self, conn_key: str) -> Optional[dict]:
with self._lock:
if self._config_dirty.get(conn_key):
return None
return self._device_info.get(conn_key)
def set_device_info(self, conn_key: str, info: dict) -> None:
with self._lock:
self._device_info[conn_key] = info
self._config_dirty[conn_key] = False
# ── Events ────────────────────────────────────────────────────────────────
def get_events(self, conn_key: str, device_count: int) -> Optional[list]:
with self._lock:
if self._config_dirty.get(conn_key):
return None
entry = self._events.get(conn_key)
if entry is None:
return None
cached_count, events = entry
return events if cached_count == device_count else None
def set_events(self, conn_key: str, device_count: int, events: list) -> None:
"""
Replace the cached events list for `conn_key`. If any incoming event
has a different (waveform_key, timestamp) than the cached entry at
the same index, flush the entire conn_key's event + waveform cache
first. Catches post-erase key reuse.
"""
with self._lock:
cached_entry = self._events.get(conn_key)
cached_events = cached_entry[1] if cached_entry else []
cached_by_index = {e.get("index"): e for e in cached_events}
evict = False
for ev in events:
idx = ev.get("index")
if idx is None:
continue
cached = cached_by_index.get(idx)
if cached is None:
continue
new_key, new_ts = self._event_signature(ev)
old_key, old_ts = self._event_signature(cached)
if (new_key and old_key and new_key != old_key) or \
(new_ts and old_ts and new_ts != old_ts):
evict = True
break
if evict:
self._flush_device(conn_key)
self._events[conn_key] = (device_count, events)
# ── Monitor status ────────────────────────────────────────────────────────
def get_monitor_status(self, conn_key: str) -> Optional[dict]:
with self._lock:
entry = self._monitor_status.get(conn_key)
if entry is None:
return None
fetched_at, status = entry
if time.time() - fetched_at > _MONITOR_STATUS_TTL:
return None
return status
def set_monitor_status(self, conn_key: str, status: dict) -> None:
with self._lock:
self._monitor_status[conn_key] = (time.time(), status)
def invalidate_monitor_status(self, conn_key: str) -> None:
with self._lock:
self._monitor_status.pop(conn_key, None)
# ── Config dirty flag ─────────────────────────────────────────────────────
def mark_config_dirty(self, conn_key: str) -> None:
with self._lock:
self._config_dirty[conn_key] = True
self._events.pop(conn_key, None)
# ── Waveforms (permanent cache, evicted on (key,ts) mismatch) ─────────────
def get_waveform(self, conn_key: str, index: int) -> Optional[dict]:
with self._lock:
return self._waveforms.get((conn_key, index))
def set_waveform(self, conn_key: str, index: int, waveform: dict) -> None:
"""
Cache a waveform. Evicts the device's whole cache when the existing
entry at the same index has a different (waveform_key, timestamp).
"""
with self._lock:
existing = self._waveforms.get((conn_key, index))
if existing is not None:
new_key, new_ts = self._event_signature(waveform)
old_key, old_ts = self._event_signature(existing)
differs = (
(new_key and old_key and new_key != old_key)
or (new_ts and old_ts and new_ts != old_ts)
)
if differs:
self._flush_device(conn_key)
self._waveforms[(conn_key, index)] = waveform
+437 -141
View File
@@ -45,9 +45,9 @@ from typing import Optional
# FastAPI / Pydantic
try:
from fastapi import Body, FastAPI, HTTPException, Query
from fastapi import Body, FastAPI, File, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from pydantic import BaseModel
import uvicorn
except ImportError:
@@ -63,8 +63,12 @@ from minimateplus.protocol import ProtocolError
from minimateplus.models import CallHomeConfig, ComplianceConfig, DeviceInfo, Event, PeakValues, ProjectInfo, Timestamp
from minimateplus.transport import TcpTransport, DEFAULT_TCP_PORT
from minimateplus.blastware_file import write_blastware_file, blastware_filename
from minimateplus.client import _decode_a5_metadata_into, _decode_a5_waveform
from sfm import event_hdf5
from sfm.cache import SFMCache, get_cache
from sfm.database import SeismoDb
from sfm.live_cache import LiveCache as _LiveCache
from sfm.waveform_store import WaveformStore
logging.basicConfig(
level=logging.INFO,
@@ -101,6 +105,7 @@ app.add_middleware(
_DEFAULT_DB_PATH = Path(__file__).parent.parent / "bridges" / "captures" / "seismo_relay.db"
_db: Optional[SeismoDb] = None
_store: Optional[WaveformStore] = None
def _get_db() -> SeismoDb:
@@ -110,6 +115,18 @@ def _get_db() -> SeismoDb:
return _db
def _get_store() -> WaveformStore:
"""
Persistent event-file + A5-sidecar store, rooted at <db_dir>/waveforms/.
Mirrors the layout used by bridges/ach_server.py so files saved by ACH
ingestion and by live SFM downloads share one canonical location.
"""
global _store
if _store is None:
_store = WaveformStore(_get_db().db_path.parent / "waveforms")
return _store
# ── Live device cache ─────────────────────────────────────────────────────────
# In-memory cache for live device data. Avoids re-dialing the device on every
# request when the data hasn't changed.
@@ -127,116 +144,6 @@ def _get_db() -> SeismoDb:
#
# All endpoints accept ?force=true to bypass the cache and re-read from device.
_MONITOR_STATUS_TTL = 30.0 # seconds
class _LiveCache:
"""
Thread-safe in-memory cache for live SFM device data.
One singleton per server process.
"""
def __init__(self) -> None:
self._lock = threading.Lock()
# conn_key → serialised device info dict
self._device_info: dict[str, dict] = {}
# conn_key → (device_event_count_when_cached, [event dicts])
self._events: dict[str, tuple[int, list]] = {}
# conn_key → (fetched_at_unix, status_dict)
self._monitor_status: dict[str, tuple[float, dict]] = {}
# conn_key → bool (True = re-read device on next /device/info)
self._config_dirty: dict[str, bool] = {}
# (conn_key, event_index) → waveform dict (permanent)
self._waveforms: dict[tuple, dict] = {}
# ── Connection key ────────────────────────────────────────────────────────
@staticmethod
def make_conn_key(
host: Optional[str],
tcp_port: int,
port: Optional[str],
baud: int,
) -> str:
if host:
return f"tcp:{host}:{tcp_port}"
return f"serial:{port}:{baud}"
# ── Device info ───────────────────────────────────────────────────────────
def get_device_info(self, conn_key: str) -> Optional[dict]:
with self._lock:
if self._config_dirty.get(conn_key):
return None
return self._device_info.get(conn_key)
def set_device_info(self, conn_key: str, info: dict) -> None:
with self._lock:
self._device_info[conn_key] = info
self._config_dirty[conn_key] = False
# ── Events ────────────────────────────────────────────────────────────────
def get_events(self, conn_key: str, device_count: int) -> Optional[list]:
"""
Return cached events if the device's current event count matches what
we had when we last fetched. Returns None (cache miss) otherwise.
"""
with self._lock:
if self._config_dirty.get(conn_key):
return None
entry = self._events.get(conn_key)
if entry is None:
return None
cached_count, events = entry
return events if cached_count == device_count else None
def set_events(self, conn_key: str, device_count: int, events: list) -> None:
with self._lock:
self._events[conn_key] = (device_count, events)
# ── Monitor status ────────────────────────────────────────────────────────
def get_monitor_status(self, conn_key: str) -> Optional[dict]:
with self._lock:
entry = self._monitor_status.get(conn_key)
if entry is None:
return None
fetched_at, status = entry
if time.time() - fetched_at > _MONITOR_STATUS_TTL:
return None
return status
def set_monitor_status(self, conn_key: str, status: dict) -> None:
with self._lock:
self._monitor_status[conn_key] = (time.time(), status)
def invalidate_monitor_status(self, conn_key: str) -> None:
with self._lock:
self._monitor_status.pop(conn_key, None)
# ── Config dirty flag ─────────────────────────────────────────────────────
def mark_config_dirty(self, conn_key: str) -> None:
"""
Called after a successful POST /device/config write.
Forces next /device/info and /device/events to re-read from the device.
"""
with self._lock:
self._config_dirty[conn_key] = True
self._events.pop(conn_key, None)
# ── Waveforms (permanent cache) ───────────────────────────────────────────
def get_waveform(self, conn_key: str, index: int) -> Optional[dict]:
with self._lock:
return self._waveforms.get((conn_key, index))
def set_waveform(self, conn_key: str, index: int, waveform: dict) -> None:
with self._lock:
self._waveforms[(conn_key, index)] = waveform
_live_cache = _LiveCache()
@@ -783,7 +690,7 @@ def device_event_waveform(
if the device is not storing all frames yet, or the capture was partial)
- **sample_rate**: samples per second (from compliance config)
- **channels**: dict of channel name list of signed int16 ADC counts
(keys: "Tran", "Vert", "Long", "Mic")
(keys: "Tran", "Vert", "Long", "MicL")
**Caching**: full waveforms are cached permanently after the first download
they are immutable once recorded on the device. Subsequent requests for the
@@ -826,26 +733,33 @@ def device_event_waveform(
detail=f"Event index {index} not found on device",
)
raw = getattr(ev, "raw_samples", None) or {}
samples_decoded = len(raw.get("Tran", []))
# Backfill from compliance_config: sample_rate, record_time, and
# derived total_samples. These are user-set authoritative values; the
# corresponding STRT-derived guesses in `_decode_a5_waveform` can be
# off (e.g. rectime used to read the 0x46 record-type marker = 70s).
cc = info.compliance_config
if cc:
if ev.sample_rate is None and cc.sample_rate:
ev.sample_rate = cc.sample_rate
if cc.record_time:
ev.rectime_seconds = cc.record_time
if ev.sample_rate and ev.rectime_seconds:
derived = int(round(ev.sample_rate * ev.rectime_seconds))
if (ev.total_samples is None
or ev.total_samples > derived * 2
or ev.total_samples < derived // 4):
ev.total_samples = derived
geo_range = getattr(cc, "geo_range", None) if cc else None
# Resolve sample_rate from compliance config if not on the event itself
sample_rate = ev.sample_rate
if sample_rate is None and info.compliance_config:
sample_rate = info.compliance_config.sample_rate
result = {
"index": ev.index,
"record_type": ev.record_type,
"timestamp": _serialise_timestamp(ev.timestamp),
"total_samples": ev.total_samples,
"pretrig_samples": ev.pretrig_samples,
"rectime_seconds": ev.rectime_seconds,
"samples_decoded": samples_decoded,
"sample_rate": sample_rate,
"peak_values": _serialise_peak_values(ev.peak_values),
"channels": raw,
}
# Build the plot.v1 JSON: samples in physical units (in/s for geo, psi
# for mic), explicit time axis, peak markers — the shape clients should
# consume directly without doing any ADC scaling.
serial = getattr(info, "serial", None) or ""
result = event_hdf5.event_to_plot_json(
ev, serial=serial,
geo_range=geo_range or "normal",
index=index,
)
cache.set_waveform(conn_key, index, result)
return result
@@ -857,6 +771,7 @@ def device_event_blastware_file(
baud: int = Query(38400, description="Serial baud rate"),
host: Optional[str] = Query(None, description="TCP host — modem IP or ACH relay"),
tcp_port: int = Query(DEFAULT_TCP_PORT, description=f"TCP port (default {DEFAULT_TCP_PORT})"),
force: bool = Query(False, description="Bypass any cached/dedup'd state and re-download from device"),
) -> FileResponse:
"""
Download the waveform for a single event (0-based index) and return it
@@ -874,24 +789,29 @@ def device_event_blastware_file(
triggered events; histogram requires recording_mode
to be populated from compliance config)
Performs: POLL startup get_events(full_waveform=False, extra_chunks=1,
stop_after_index=index) write_blastware_file() FileResponse.
Performs: POLL startup get_events(full_waveform=True,
stop_after_index=index) write_blastware_file() FileResponse +
persistent store + DB upsert.
"""
log.info(
"GET /device/event/%d/blastware_file port=%s host=%s",
index, port, host,
"GET /device/event/%d/blastware_file port=%s host=%s force=%s",
index, port, host, force,
)
# `force` always re-downloads from the device. This endpoint already
# never short-circuits via cache, so `force` is reserved for parity with
# the other live endpoints.
try:
def _do():
with _build_client(port, baud, host, tcp_port, timeout=120.0) as client:
info = client.connect()
# Under v0.14.0 BW-exact 5A walk, the chunk loop is bounded by
# the event end_offset extracted from STRT. No more
# stop_after_metadata / extra_chunks gymnastics — these
# kwargs are now no-ops.
# full_waveform=True pulls the complete 5A stream so the
# client populates STRT-derived fields (total_samples,
# pretrig_samples, rectime_seconds) AND raw_samples on the
# Event. Required for the .h5 + .sfm.json sidecar to be
# filled in correctly — without it, those land as nulls.
events = client.get_events(
full_waveform=False,
full_waveform=True,
stop_after_index=index,
)
matching = [ev for ev in events if ev.index == index]
@@ -946,6 +866,54 @@ def device_event_blastware_file(
out_path, len(a5_frames), serial,
)
# Promote to canonical persistent store + DB row so this event is
# queryable via /db/events afterwards (matches the ACH ingestion path).
if serial != "UNKNOWN" and ev._waveform_key is not None:
try:
cc = info.compliance_config
# Backfill authoritative compliance-config values onto the
# Event before persisting. These supersede whatever
# _decode_a5_waveform read from the STRT bytes (some of which
# have ambiguous semantics — e.g. STRT[20] is rectime but
# STRT[8:10] / STRT[16:18] are device-specific scratch fields
# that aren't reliable sample/pretrig counts).
if cc:
if ev.sample_rate is None and cc.sample_rate:
ev.sample_rate = cc.sample_rate
if cc.record_time:
# record_time from compliance is authoritative — the
# user-set value the device followed when recording.
ev.rectime_seconds = cc.record_time
# Derive total_samples from sample_rate × rectime when
# we can; the STRT-derived value can land at a buffer-
# offset rather than a sample count.
if ev.sample_rate and ev.rectime_seconds:
derived = int(round(ev.sample_rate * ev.rectime_seconds))
if (ev.total_samples is None
or ev.total_samples > derived * 2
or ev.total_samples < derived // 4):
ev.total_samples = derived
geo_range = getattr(cc, "geo_range", None) if cc else None
rec = _get_store().save(
ev, serial=serial, a5_frames=a5_frames,
geo_range=geo_range if geo_range is not None else "normal",
)
_get_db().insert_events(
[ev],
serial=serial,
waveform_records={ev._waveform_key.hex(): rec},
)
log.info(
"blastware_file: persisted to store (%s, %d bytes)",
rec["filename"], rec["filesize"],
)
except Exception as exc:
log.warning(
"blastware_file: persistent store save failed: %s "
"— temp file still served",
exc,
)
return FileResponse(
path=str(out_path),
filename=filename,
@@ -1435,6 +1403,334 @@ def db_set_false_trigger(
return {"status": "ok", "event_id": event_id, "false_trigger": value}
# ── /db/events/{id} — waveform file accessors ─────────────────────────────────
#
# These endpoints serve files from the persistent WaveformStore, so a Blastware
# file or its decoded JSON for a previously-ingested ACH event can be fetched
# without re-dialing the device.
@app.get("/db/events/{event_id}/blastware_file")
def db_event_blastware_file(event_id: str) -> FileResponse:
"""
Return the Blastware-format event file for a previously-ingested
event. Filename extension is per-event (timestamp-encoded
`AB0T` for ACH downloads, 3-char `AB0` for direct downloads).
404 if the event is unknown or has no event file in the store
(events ingested before the store was wired will show this
re-download via the live endpoint to populate).
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=(
f"Event {event_id} has no Blastware file in the store. "
"Re-download via the live endpoint to populate."
),
)
bw_path = _get_store().open_blastware(serial, filename)
if bw_path is None:
raise HTTPException(
status_code=410,
detail=f"Stored file missing on disk: {filename}",
)
return FileResponse(
path=str(bw_path),
filename=filename,
media_type="application/octet-stream",
)
@app.get("/db/events/{event_id}/waveform.json")
def db_event_waveform_json(event_id: str) -> dict:
"""
Return the plot-ready JSON (`sfm.plot.v1`) for a stored event.
Resolution order (cheapest first):
1. If `<filename>.h5` exists, serve it via `plot_json_from_hdf5`.
Samples are already in physical units; no decode work needed.
2. Else if `<filename>.a5.pkl` exists, replay the A5 decoders to
rebuild an Event and serialise via `event_to_plot_json`.
3. Else 404 the event has no waveform data on disk.
The shape is identical regardless of source, so clients (the SFM
webapp, Terra-View, etc.) consume the same `sfm.plot.v1` payload.
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=f"Event {event_id} has no event file in the store",
)
store = _get_store()
# Path 1: HDF5 (canonical clean format).
h5_path = store.hdf5_path_for(serial, filename)
if h5_path.exists():
try:
return event_hdf5.plot_json_from_hdf5(h5_path, event_id=event_id)
except Exception as exc:
log.warning("HDF5 read failed (%s); falling back to A5 path", exc)
# Path 2: A5 pickle replay.
a5_frames = store.load_a5(serial, filename)
if not a5_frames:
raise HTTPException(
status_code=404,
detail=(
f"Event {event_id} has no waveform data on disk "
"(no .h5 and no .a5.pkl). Run the backfill script or "
"re-download via the live endpoint to populate."
),
)
ev = Event(index=-1)
try:
_decode_a5_metadata_into(a5_frames, ev)
except Exception as exc:
log.warning("db_event_waveform_json: metadata decode failed: %s", exc)
try:
_decode_a5_waveform(a5_frames, ev)
except Exception as exc:
log.error("db_event_waveform_json: waveform decode failed: %s", exc, exc_info=True)
raise HTTPException(status_code=500, detail=f"Waveform decode failed: {exc}") from exc
# Carry over fields from the DB row when the A5 replay didn't fill them.
if ev.sample_rate is None and row.get("sample_rate"):
ev.sample_rate = row.get("sample_rate")
return event_hdf5.event_to_plot_json(
ev, serial=serial, geo_range="normal", event_id=event_id,
)
# ── /db/events/{id}/sidecar — modern .sfm.json review/metadata accessors ──────
class SidecarPatchBody(BaseModel):
"""Body for PATCH /db/events/{id}/sidecar.
JSON-merge-patch semantics: only the keys you include get updated.
`review` is the editable block for monthly-summary workflows
(false_trigger flag, reviewer notes, etc.); `extensions` is the
forward-compat namespace for vendor / future fields.
"""
review: Optional[dict] = None
extensions: Optional[dict] = None
@app.get("/db/events/{event_id}/sidecar")
def db_event_sidecar(event_id: str) -> dict:
"""
Return the .sfm.json sidecar for a stored event. 404 if the event
is unknown or has no sidecar in the store (events ingested before
the sidecar feature landed will show this until backfilled).
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=f"Event {event_id} has no event file in the store",
)
sidecar = _get_store().load_sidecar(serial, filename)
if sidecar is None:
raise HTTPException(
status_code=404,
detail=(
f"No .sfm.json sidecar on disk for {filename}. "
"Run scripts/backfill_sidecars.py to generate one."
),
)
return sidecar
@app.patch("/db/events/{event_id}/sidecar")
def db_event_sidecar_patch(event_id: str, body: SidecarPatchBody) -> dict:
"""
JSON-merge-patch the sidecar's `review` and/or `extensions` blocks.
The sidecar JSON is the source of truth for review state. When
`review.false_trigger` is updated, the SQL `events.false_trigger`
column is kept in sync as a derived index for fast filtering.
Returns the new full sidecar. 404 if the event or sidecar is missing.
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=f"Event {event_id} has no event file in the store",
)
if not (body.review or body.extensions):
raise HTTPException(
status_code=400,
detail="PATCH body must include `review` and/or `extensions`",
)
new_sidecar = _get_store().patch_sidecar(
serial, filename,
review=body.review,
extensions=body.extensions,
)
if new_sidecar is None:
raise HTTPException(
status_code=404,
detail=f"No .sfm.json sidecar on disk for {filename}",
)
# Mirror false_trigger from review block into the SQL index column.
if body.review is not None:
_get_db().update_event_review(event_id, new_sidecar.get("review", {}))
return new_sidecar
# ── /db/import/blastware_file — ingest BW-only event files ────────────────────
@app.post("/db/import/blastware_file")
async def db_import_blastware_file(
files: list[UploadFile] = File(...),
serial: Optional[str] = Query(None, description="Optional serial-number hint (e.g. BE11529); falls back to the BW filename's encoded prefix when omitted"),
) -> dict:
"""
Multipart upload of one or more Blastware event file binaries
(typically produced by Blastware's own ACH). For each file:
1. Parse the bytes via WaveformStore.save_imported_bw produces
a parsed Event + copies the file into the persistent store +
writes a .sfm.json sidecar with source.kind = "bw-import".
2. Upsert a row into `events` (dedup'd on serial+timestamp).
Response includes per-file outcomes so the caller can see which
landed cleanly and which failed (e.g. malformed file, unknown
serial, etc.).
"""
store = _get_store()
db = _get_db()
results: list[dict] = []
for upload in files:
try:
content = await upload.read()
except Exception as exc:
results.append({
"filename": upload.filename, "status": "error",
"detail": f"read failed: {exc}",
})
continue
try:
ev, rec = store.save_imported_bw(
content,
source_path=Path(upload.filename or "imported.bw"),
serial_hint=serial,
)
inserted, skipped = db.insert_events(
[ev],
serial=(serial or _serial_from_event(ev) or "UNKNOWN"),
waveform_records={
ev._waveform_key.hex(): rec
if ev._waveform_key else None
} if ev._waveform_key else None,
)
results.append({
"filename": upload.filename,
"status": "ok",
"stored_filename": rec["filename"],
"filesize": rec["filesize"],
"sha256": rec["sha256"],
"inserted": inserted,
"skipped": skipped,
})
except Exception as exc:
log.error("import failed for %s: %s", upload.filename, exc, exc_info=True)
results.append({
"filename": upload.filename, "status": "error",
"detail": str(exc),
})
return {"count": len(results), "results": results}
def _serial_from_event(ev) -> Optional[str]:
"""Fallback serial resolver — currently relies on the BW filename
decoder via WaveformStore.save_imported_bw, so this is just a
placeholder for future enhancement (e.g. inferring from project_info)."""
return None
@app.get("/db/units/{serial}/waveforms.zip")
def db_unit_waveforms_zip(
serial: str,
from_dt: Optional[str] = Query(None, description="ISO-8601 start datetime (inclusive)"),
to_dt: Optional[str] = Query(None, description="ISO-8601 end datetime (inclusive)"),
limit: int = Query(5000, description="Hard cap on events bundled (default 5000)"),
) -> StreamingResponse:
"""
Stream a ZIP of all event files for a serial in the optional date range.
Events without a stored event file are silently skipped.
"""
import io
import zipfile
from_parsed = datetime.datetime.fromisoformat(from_dt) if from_dt else None
to_parsed = datetime.datetime.fromisoformat(to_dt) if to_dt else None
rows = _get_db().query_events(
serial=serial,
from_dt=from_parsed,
to_dt=to_parsed,
limit=limit,
offset=0,
)
store = _get_store()
buf = io.BytesIO()
written = 0
with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
for row in rows:
fn = row.get("blastware_filename")
if not fn:
continue
bw_path = store.open_blastware(serial, fn)
if bw_path is None:
continue
zf.write(bw_path, arcname=fn)
written += 1
if written == 0:
raise HTTPException(
status_code=404,
detail=f"No stored Blastware files found for serial {serial} in range",
)
buf.seek(0)
safe_serial = serial.replace("/", "_")
headers = {
"Content-Disposition": f'attachment; filename="{safe_serial}_waveforms.zip"',
"X-Waveform-Count": str(written),
}
return StreamingResponse(buf, media_type="application/zip", headers=headers)
@app.get("/db/monitor_log")
def db_monitor_log(
serial: Optional[str] = Query(None, description="Filter by unit serial"),
+616 -61
View File
@@ -609,6 +609,147 @@
.section-btn:hover { color: var(--text); }
.section-btn.active { background: var(--blue); color: #fff; }
/* ── Force-refresh toggle ── */
.force-toggle {
display: flex;
align-items: center;
gap: 6px;
padding: 4px 10px;
border: 1px solid var(--border);
border-radius: 6px;
background: var(--bg);
cursor: pointer;
font-size: 11px;
font-weight: 600;
color: var(--text-dim);
user-select: none;
white-space: nowrap;
transition: background 0.12s, color 0.12s, border-color 0.12s;
}
.force-toggle input { margin: 0; cursor: pointer; }
.force-toggle:hover { color: var(--text); }
.force-toggle.active {
background: rgba(248, 81, 73, 0.18);
border-color: #f85149;
color: #ff7b72;
}
.force-toggle .ft-dot {
width: 6px; height: 6px; border-radius: 50%;
background: var(--text-mute);
}
.force-toggle.active .ft-dot { background: #f85149; box-shadow: 0 0 6px #f85149; }
/* ── Sidecar review modal ── */
.sc-overlay {
position: fixed; inset: 0;
background: rgba(0,0,0,0.55);
display: none;
align-items: center;
justify-content: center;
z-index: 100;
}
.sc-overlay.visible { display: flex; }
.sc-modal {
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 8px;
width: min(720px, 92vw);
max-height: 88vh;
display: flex;
flex-direction: column;
box-shadow: 0 8px 32px rgba(0,0,0,0.5);
}
.sc-header {
display: flex; align-items: center; justify-content: space-between;
padding: 14px 18px;
border-bottom: 1px solid var(--border);
}
.sc-header h3 {
margin: 0; font-size: 14px; font-weight: 600;
color: var(--text); font-family: monospace;
}
.sc-close {
background: none; border: none; cursor: pointer;
color: var(--text-mute); font-size: 18px; line-height: 1;
padding: 4px 8px; border-radius: 4px;
}
.sc-close:hover { background: var(--surface); color: var(--text); }
.sc-body {
flex: 1; overflow-y: auto;
padding: 16px 18px;
display: flex; flex-direction: column; gap: 14px;
}
.sc-section {
display: flex; flex-direction: column; gap: 6px;
}
.sc-section h4 {
margin: 0 0 4px;
font-size: 11px; font-weight: 600;
color: var(--text-mute); text-transform: uppercase;
letter-spacing: 0.6px;
}
.sc-grid {
display: grid;
grid-template-columns: 130px 1fr;
gap: 4px 12px;
font-size: 12px;
}
.sc-grid dt { color: var(--text-mute); }
.sc-grid dd { margin: 0; color: var(--text); font-family: monospace; word-break: break-all; }
.sc-row { display: flex; align-items: center; gap: 8px; font-size: 13px; }
.sc-row label { color: var(--text-dim); }
.sc-row input[type="checkbox"] { cursor: pointer; }
.sc-row input[type="text"], .sc-body textarea {
flex: 1;
background: var(--bg);
border: 1px solid var(--border);
border-radius: 5px;
padding: 6px 9px;
font-size: 12px;
color: var(--text);
font-family: monospace;
}
.sc-body textarea {
width: 100%;
min-height: 80px;
resize: vertical;
font-family: inherit;
}
.sc-raw {
border: 1px solid var(--border);
border-radius: 5px;
background: var(--bg);
}
.sc-raw summary {
padding: 6px 10px;
cursor: pointer;
font-size: 11px;
color: var(--text-dim);
user-select: none;
}
.sc-raw pre {
margin: 0;
padding: 8px 12px;
max-height: 240px;
overflow: auto;
font-size: 11px;
color: var(--text);
border-top: 1px solid var(--border);
}
.sc-footer {
display: flex; justify-content: flex-end; gap: 8px;
padding: 12px 18px;
border-top: 1px solid var(--border);
}
.sc-status {
flex: 1; align-self: center;
font-size: 11px; color: var(--text-mute);
}
.sc-status.error { color: #f85149; }
.sc-status.ok { color: #56d364; }
table.db-table tbody tr.clickable { cursor: pointer; }
table.db-table tbody tr.clickable:hover { background: var(--surface2); }
/* ── Section containers ── */
#section-live, #section-db {
display: flex;
@@ -654,6 +795,13 @@
<button class="section-btn active" onclick="switchSection('live')">Live Device</button>
<button class="section-btn" onclick="switchSection('db')">Database</button>
</div>
<div class="hdr-sep"></div>
<label class="force-toggle" id="force-toggle"
title="Bypass server cache and dedup. Forces a fresh download from the device on every live request — useful when the device has been erased and the cache is showing stale events.">
<input type="checkbox" id="force-cb" onchange="onForceToggle()">
<span class="ft-dot"></span>
<span>Force refresh</span>
</label>
</header>
<!-- ════════════════════════════════════════════════════════════════
@@ -769,6 +917,14 @@
<div class="event-toolbar">
<button class="btn btn-ghost" id="load-btn" onclick="loadWaveform()" disabled>Load Waveform</button>
<button class="btn btn-ghost" id="save-btn" onclick="saveEventToDb()" disabled
title="Download the full waveform from the device and save it to the SFM database + waveform store. Honors the Force refresh toggle.">
💾 Save to DB
</button>
<button class="btn btn-ghost" id="download-btn" onclick="downloadEventFile()" disabled
title="Download the Blastware-format event file to your computer (also saves it to the server's database + store).">
⬇ Download
</button>
<button class="btn btn-ghost" id="prev-btn" onclick="stepEvent(-1)" disabled></button>
<button class="btn btn-ghost" id="next-btn" onclick="stepEvent(+1)" disabled></button>
<div class="event-chips" id="event-chips"></div>
@@ -1187,7 +1343,7 @@ let currentEvent = 0;
let charts = {};
let geoAdcScale = 6.206;
const DBL_REF = 2.9e-9; // 20 µPa in psi — reference pressure for dBL
const CHANNEL_COLORS = { Tran:'#58a6ff', Vert:'#3fb950', Long:'#d29922', Mic:'#bc8cff' };
const CHANNEL_COLORS = { Tran:'#58a6ff', Vert:'#3fb950', Long:'#d29922', MicL:'#bc8cff' };
// ── Helpers ────────────────────────────────────────────────────────────────────
function api() { return document.getElementById('api-base').value.replace(/\/$/, ''); }
@@ -1214,8 +1370,21 @@ function setCfgStatus(msg, cls = '') {
el.className = cls;
}
// "Force refresh" override — when enabled, every live-device request is
// sent with ?force=true so the server bypasses its in-memory + persistent
// caches and re-reads from the device. Manual escape hatch for cases where
// the cache has gone stale (e.g. post-erase key reuse — see ach_server.py
// and sfm/cache.py for the eviction logic).
let forceRefresh = false;
function onForceToggle() {
forceRefresh = document.getElementById('force-cb').checked;
document.getElementById('force-toggle').classList.toggle('active', forceRefresh);
}
function deviceParams() {
return `host=${encodeURIComponent(devHost())}&tcp_port=${devPort()}`;
const base = `host=${encodeURIComponent(devHost())}&tcp_port=${devPort()}`;
return forceRefresh ? `${base}&force=true` : base;
}
// ── Section switching ─────────────────────────────────────────────────────────
@@ -1305,9 +1474,11 @@ async function connectUnit() {
document.getElementById('device-bar').style.display = 'flex';
document.getElementById('monitor-panel').style.display = 'flex';
document.getElementById('load-btn').disabled = eventList.length === 0;
document.getElementById('prev-btn').disabled = true;
document.getElementById('next-btn').disabled = eventList.length <= 1;
document.getElementById('load-btn').disabled = eventList.length === 0;
document.getElementById('save-btn').disabled = eventList.length === 0;
document.getElementById('download-btn').disabled = eventList.length === 0;
document.getElementById('prev-btn').disabled = true;
document.getElementById('next-btn').disabled = eventList.length <= 1;
document.getElementById('cfg-read-btn').disabled = false;
document.getElementById('cfg-write-btn').disabled = false;
document.getElementById('ch-read-btn').disabled = false;
@@ -1807,11 +1978,104 @@ async function loadWaveform() {
document.getElementById('load-btn').disabled = false;
}
// ── Persist current event to the SFM database + waveform store ──────────────
//
// Calls /device/event/{idx}/blastware_file, which on the server side:
// 1. Downloads the full waveform from the device (5A bulk stream)
// 2. Writes the Blastware-format event file into <db_dir>/waveforms/<serial>/
// 3. Writes the .a5.pkl sidecar next to it (so the file can be regenerated)
// 4. Upserts a row into seismo_relay.db `events` table (dedup'd on serial+timestamp)
//
// We discard the response body — the side effects are what we want. The
// filename comes back in the Content-Disposition header for confirmation.
async function saveEventToDb() {
if (!devHost()) { setStatus('Enter device host first.', 'error'); return; }
const idx = currentEvent;
const btn = document.getElementById('save-btn');
btn.disabled = true;
const orig = btn.textContent;
btn.textContent = '⏳ Saving…';
setStatus(`Downloading event #${idx} and saving to DB…`, 'loading');
try {
const r = await fetch(`${api()}/device/event/${idx}/blastware_file?${deviceParams()}`);
if (!r.ok) {
const e = await r.json().catch(() => ({}));
throw new Error(e.detail || r.statusText);
}
// Pull the body to completion so the connection releases promptly,
// then drop it on the floor — we just want the server-side persist.
await r.blob();
const filename = parseFilenameFromContentDisposition(r.headers.get('Content-Disposition'))
|| `event ${idx}`;
setStatus(`Saved ${filename} to database + waveform store`, 'ok');
} catch (e) {
setStatus(`Save error: ${e.message}`, 'error');
} finally {
btn.disabled = false;
btn.textContent = orig;
}
}
// ── Download the event file to the user's computer ──────────────────────────
//
// Uses a transient anchor + click trick so the browser surfaces its native
// "Save As" / Downloads behaviour. Same backend endpoint as Save to DB —
// the file is also persisted to the server store as a side effect.
function downloadEventFile() {
if (!devHost()) { setStatus('Enter device host first.', 'error'); return; }
const idx = currentEvent;
const url = `${api()}/device/event/${idx}/blastware_file?${deviceParams()}`;
setStatus(`Downloading event #${idx}…`, 'loading');
// Hidden iframe avoids navigating away from the SPA. FastAPI's FileResponse
// sets Content-Disposition: attachment so the browser saves rather than displays.
const a = document.createElement('a');
a.href = url;
a.style.display = 'none';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
// We can't reliably detect when the browser finishes downloading; show a
// soft confirmation immediately. Errors will surface as a download failure
// dialog from the browser itself.
setTimeout(() => setStatus(`Download started for event #${idx} (also saved server-side)`, 'ok'), 250);
}
function parseFilenameFromContentDisposition(header) {
if (!header) return null;
// RFC 6266: `attachment; filename="M529LKIQ.7M0W"` (or filename*=UTF-8''…)
const m = /filename\*?=(?:UTF-8'')?["']?([^"';]+)["']?/i.exec(header);
return m ? decodeURIComponent(m[1]) : null;
}
// renderWaveform consumes the `sfm.plot.v1` JSON shape:
// {
// schema: "sfm.plot.v1",
// time_axis: { sample_rate, pretrig_samples, t0_ms, dt_ms, n_samples, ... },
// channels: { Tran|Vert|Long|MicL: { unit, values, peak, peak_t_ms } },
// geo_range, geo_full_scale_ips, trigger_ms, peak_values, ...
// }
//
// All sample arrays are already in PHYSICAL UNITS (in/s for geo, psi for
// mic) — the server applied the right scaling for the unit's geo_range.
// The viewer used to multiply ADC ints by `geoAdcScale / 32767` here,
// which silently scaled every plot ~38% too low because `geoAdcScale` is
// the in/s-per-V hardware constant, not the ADC-counts-to-velocity
// factor. No scaling happens client-side now.
function renderWaveform(data) {
const sr = data.sample_rate || 1024;
const pretrig = data.pretrig_samples || 0;
const decoded = data.samples_decoded || 0;
const total = data.total_samples || decoded;
// Backward-compat shim: if we ever get the legacy shape from a stale
// cache, normalise it on the client so the viewer still works.
if (!data.schema && data.channels && Array.isArray(data.channels.Tran)) {
data = _legacyWaveformToPlotV1(data);
}
const t = data.time_axis || {};
const sr = t.sample_rate || 1024;
const pretrig = t.pretrig_samples || 0;
const total = t.total_samples || t.n_samples || 0;
const decoded = t.n_samples || 0;
const t0 = t.t0_ms ?? -(pretrig / sr * 1000);
const dt = t.dt_ms ?? (1000 / sr);
const channels = data.channels || {};
// Status bar
@@ -1819,70 +2083,83 @@ function renderWaveform(data) {
bar.innerHTML = '';
bar.className = 'ok';
const ts = data.timestamp;
bar.textContent = ts ? `Event #${data.index} — ${ts.display} ` : `Event #${data.index} `;
// Title prefers `index` (live device, 0-based slot on the unit) and
// falls back to event_id (DB lookup) when index is absent.
const eventLabel = (data.index != null) ? `#${data.index}` : (data.event_id || '');
bar.textContent = ts ? `Event ${eventLabel} — ${ts} ` : `Event ${eventLabel} `;
addPill(`${data.record_type || '?'}`);
addPill(`${sr} sps`);
addPill(`${decoded.toLocaleString()} / ${total.toLocaleString()} samples`);
addPill(`pretrig ${pretrig}`);
addPill(`${data.rectime_seconds ?? '?'} s`);
addPill(`${t.rectime_seconds ?? '?'} s`);
if (data.geo_range) addPill(`geo: ${data.geo_range} (${data.geo_full_scale_ips} in/s FS)`);
// Any record_type starting with "Waveform" is a viewable triggered
// event (the timestamp-header byte layout varies across firmware but
// doesn't change the sample stream). Only block when there's actually
// no waveform payload to plot.
const isWaveformLike = !!(data.record_type || '').match(/^Waveform/i);
if (decoded === 0) {
document.getElementById('empty-state').style.display = 'flex';
document.getElementById('empty-state').querySelector('p').textContent =
data.record_type === 'Waveform'
isWaveformLike
? 'No samples decoded — check server logs'
: `Record type "${data.record_type}" — waveform not supported yet`;
: `Record type "${data.record_type}" — not a waveform event`;
document.getElementById('charts').style.display = 'none';
Object.values(charts).forEach(c => c.destroy()); charts = {};
return;
}
const times = Array.from({length: decoded}, (_, i) => ((i - pretrig) / sr * 1000).toFixed(2));
// Time axis: explicit ms values from t0_ms + i*dt_ms. More precise
// than the old (i - pretrig) / sr * 1000 since dt_ms came from the
// server with full float precision.
const times = Array.from({length: decoded}, (_, i) => (t0 + i * dt).toFixed(2));
document.getElementById('empty-state').style.display = 'none';
const chartsDiv = document.getElementById('charts');
chartsDiv.style.display = 'flex';
chartsDiv.innerHTML = '';
Object.values(charts).forEach(c => c.destroy()); charts = {};
const micPeakPsi = data.peak_values?.micl_psi ?? null;
for (const [ch, color] of Object.entries(CHANNEL_COLORS)) {
const samples = channels[ch];
if (!samples || samples.length === 0) continue;
const chData = channels[ch];
if (!chData || !chData.values || chData.values.length === 0) continue;
const isGeo = ch !== 'Mic';
let plotData, peakLabel, yUnit, ttFmt, tickFmt;
const plotData = chData.values;
const unit = chData.unit || (ch === 'MicL' ? 'psi' : 'in/s');
const peak = chData.peak;
const peakTms = chData.peak_t_ms;
if (isGeo) {
const scale = geoAdcScale / 32767;
plotData = samples.map(s => s * scale);
// Use the device-recorded peak from the 0C waveform record — authoritative
// and matches Blastware. Computing from raw samples can catch rogue
// near-full-scale values from decoding artifacts.
const peakKey = { Tran:'tran_in_s', Vert:'vert_in_s', Long:'long_in_s' }[ch];
const devicePeak = data.peak_values?.[peakKey] ?? null;
peakLabel = devicePeak != null ? `${devicePeak.toFixed(5)} in/s` : `${Math.max(...plotData.map(Math.abs)).toFixed(5)} in/s`;
yUnit = 'in/s';
ttFmt = v => `${ch}: ${v.toFixed(5)} in/s`;
tickFmt = v => v.toFixed(4);
let peakLabel, ttFmt, tickFmt;
if (unit === 'psi') {
const peakDbl = (peak != null && peak > 0)
? 20 * Math.log10(peak / DBL_REF) : -Infinity;
peakLabel = `${peakDbl.toFixed(1)} dBL (${peak != null ? peak.toExponential(2) : '—'} psi)`;
ttFmt = v => `${v.toExponential(3)} psi`;
tickFmt = v => v.toExponential(1);
} else {
const peakCounts = Math.max(...samples.map(Math.abs));
const micScale = (micPeakPsi !== null && peakCounts > 0) ? Math.abs(micPeakPsi) / peakCounts : 1.0;
plotData = samples.map(s => s * micScale);
const peakPsi = Math.max(...plotData.map(Math.abs));
const peakDbl = peakPsi > 0 ? 20 * Math.log10(peakPsi / DBL_REF) : -Infinity;
peakLabel = `${peakDbl.toFixed(1)} dBL`;
yUnit = 'psi';
ttFmt = v => `${v.toExponential(3)} psi`;
tickFmt = v => v.toExponential(1);
peakLabel = peak != null ? `${peak.toFixed(5)} in/s` : '—';
ttFmt = v => `${ch}: ${v.toFixed(5)} in/s`;
tickFmt = v => v.toFixed(4);
}
// Downsample for display when the chart would otherwise have to
// rasterise tens of thousands of points. Uses every-Nth — fine for
// monthly-summary glance work; analysis tools should use the .h5 file.
const MAX_PTS = 4000;
let rTimes = times, rData = plotData;
let rTimes = times, rData = plotData, peakPlotIdx = -1;
if (plotData.length > MAX_PTS) {
const step = Math.ceil(plotData.length / MAX_PTS);
rTimes = times.filter((_, i) => i % step === 0);
rData = plotData.filter((_, i) => i % step === 0);
// Try to keep the peak sample from being downsampled away.
if (peakTms != null) {
const exactIdx = Math.round((peakTms - t0) / dt);
if (exactIdx >= 0 && exactIdx < plotData.length) {
peakPlotIdx = Math.floor(exactIdx / step);
}
}
} else if (peakTms != null) {
peakPlotIdx = Math.round((peakTms - t0) / dt);
}
const wrap = document.createElement('div');
@@ -1910,27 +2187,94 @@ function renderWaveform(data) {
},
scales: {
x: { type: 'category', ticks: { color:'#484f58', maxTicksLimit:10, maxRotation:0, callback:(v,i) => rTimes[i]+' ms' }, grid: { color:'#21262d' } },
y: { ticks: { color:'#484f58', maxTicksLimit:5, callback: v => tickFmt(v) }, grid: { color:'#21262d' }, title: { display:true, text:yUnit, color:'#484f58', font:{size:10} } },
y: { ticks: { color:'#484f58', maxTicksLimit:5, callback: v => tickFmt(v) }, grid: { color:'#21262d' }, title: { display:true, text:unit, color:'#484f58', font:{size:10} } },
},
},
plugins: [{
id: 'triggerLine',
id: 'triggerAndPeakMarkers',
afterDraw(chart) {
const zeroIdx = rTimes.findIndex(t => parseFloat(t) >= 0);
if (zeroIdx < 0) return;
const { ctx, scales: {x, y} } = chart;
const px = x.getPixelForValue(zeroIdx);
ctx.save();
ctx.beginPath();
ctx.moveTo(px, y.top); ctx.lineTo(px, y.bottom);
ctx.strokeStyle = 'rgba(248,81,73,0.7)'; ctx.lineWidth = 1.5;
ctx.setLineDash([4, 3]); ctx.stroke(); ctx.restore();
// Trigger line at t = trigger_ms (typically 0).
const triggerMs = data.trigger_ms ?? 0;
const zeroIdx = rTimes.findIndex(s => parseFloat(s) >= triggerMs);
if (zeroIdx >= 0) {
const px = x.getPixelForValue(zeroIdx);
ctx.save();
ctx.beginPath();
ctx.moveTo(px, y.top); ctx.lineTo(px, y.bottom);
ctx.strokeStyle = 'rgba(248,81,73,0.7)'; ctx.lineWidth = 1.5;
ctx.setLineDash([4, 3]); ctx.stroke(); ctx.restore();
}
// Peak marker (dot at the channel's peak sample).
if (peakPlotIdx >= 0 && peakPlotIdx < rData.length) {
const px = x.getPixelForValue(peakPlotIdx);
const py = y.getPixelForValue(rData[peakPlotIdx]);
ctx.save();
ctx.beginPath();
ctx.arc(px, py, 3.2, 0, Math.PI * 2);
ctx.fillStyle = color;
ctx.strokeStyle = '#0d1117';
ctx.lineWidth = 1.5;
ctx.fill(); ctx.stroke();
ctx.restore();
}
},
}],
});
}
}
// One-time normaliser for the legacy /device/event/{idx}/waveform shape
// (samples as int16 ADC counts in `channels.{ch}: [...]`). Bridges the
// gap if a stale cache or non-upgraded server returns the old format.
function _legacyWaveformToPlotV1(data) {
const sr = data.sample_rate || 1024;
const pretrig = data.pretrig_samples || 0;
const decoded = data.samples_decoded || 0;
const total = data.total_samples || decoded;
const dt = 1000 / sr;
const t0 = -pretrig * dt;
// Apply the CORRECT scale: 10 in/s full-scale for Normal range.
const geoFs = 10.0;
const geoScale = geoFs / 32768;
const ch = data.channels || {};
const micPeak = data.peak_values?.micl_psi ?? null;
const micPeakCounts = (ch.MicL || ch.Mic || []).reduce((m, v) => Math.max(m, Math.abs(v)), 0);
const micScale = (micPeak != null && micPeakCounts > 0) ? micPeak / micPeakCounts : 1.0;
const mkGeo = (counts) => {
if (!counts || !counts.length) return [];
return counts.map(c => c * geoScale);
};
const mkMic = (counts) => {
if (!counts || !counts.length) return [];
return counts.map(c => c * micScale);
};
return {
schema: 'sfm.plot.v1',
event_id: data.event_id || null,
serial: data.serial || '',
timestamp: data.timestamp?.display || data.timestamp || '',
record_type: data.record_type,
waveform_key: null,
time_axis: {
sample_rate: sr, pretrig_samples: pretrig, total_samples: total,
n_samples: decoded, t0_ms: t0, dt_ms: dt,
rectime_seconds: data.rectime_seconds || 0,
},
geo_range: 'normal', geo_full_scale_ips: geoFs, trigger_ms: 0,
channels: {
Tran: { unit:'in/s', values: mkGeo(ch.Tran), peak: data.peak_values?.tran_in_s ?? null, peak_t_ms: null },
Vert: { unit:'in/s', values: mkGeo(ch.Vert), peak: data.peak_values?.vert_in_s ?? null, peak_t_ms: null },
Long: { unit:'in/s', values: mkGeo(ch.Long), peak: data.peak_values?.long_in_s ?? null, peak_t_ms: null },
MicL: { unit:'psi', values: mkMic(ch.MicL || ch.Mic), peak: micPeak, peak_t_ms: null },
},
peak_values: data.peak_values || {},
};
}
// ── DB tabs ────────────────────────────────────────────────────────────────────
let histLoaded = false;
let unitsLoaded = false;
@@ -2032,7 +2376,9 @@ async function loadHistory() {
for (const ev of events) {
const tr = document.createElement('tr');
const pvs = ev.peak_vector_sum;
const maxPPV = Math.max(ev.tran_ppv ?? 0, ev.vert_ppv ?? 0, ev.long_ppv ?? 0);
tr.classList.add('clickable');
tr.title = 'Click to review (open sidecar editor)';
tr.dataset.eventId = ev.id;
tr.innerHTML = `
<td>${_fmtTs(ev.timestamp)}</td>
<td class="td-key">${ev.serial ?? '—'}</td>
@@ -2045,24 +2391,157 @@ async function loadHistory() {
<td class="td-text">${ev.client ?? '—'}</td>
<td class="td-dim">${ev.record_type ?? '—'}</td>
<td class="td-dim" style="font-size:10px">${ev.waveform_key ?? '—'}</td>
<td>${ev.false_trigger ? '<span class="ft-badge">FALSE</span>' : `<button class="ft-toggle-btn" onclick="toggleFalseTrigger(${ev.id}, this)" title="Flag as false trigger">Flag</button>`}</td>
<td>${ev.false_trigger ? '<span class="ft-badge">FALSE</span>' : ''}</td>
`;
tr.addEventListener('click', () => openSidecarModal(ev.id));
tbody.appendChild(tr);
}
}
async function toggleFalseTrigger(id, btn) {
btn.disabled = true;
// ── Sidecar review modal ───────────────────────────────────────────────────────
//
// Opens on row click in the History table. Loads the .sfm.json sidecar
// for the event via GET /db/events/{id}/sidecar, lets the user toggle
// false_trigger / edit notes / set reviewer, and saves via PATCH on the
// same URL. This mirrors the workflow used by the monthly vibration
// summary process — most of the rich review UX lives in Terra-View;
// this is the SFM-standalone equivalent for testing / direct edits.
let _scCurrentEventId = null;
let _scCurrentSidecar = null;
async function openSidecarModal(eventId) {
_scCurrentEventId = eventId;
_scCurrentSidecar = null;
document.getElementById('sc-status').textContent = 'Loading sidecar…';
document.getElementById('sc-status').className = 'sc-status';
document.getElementById('sc-overlay').classList.add('visible');
// Reset edit fields
document.getElementById('sc-edit-ft').checked = false;
document.getElementById('sc-edit-reviewer').value = '';
document.getElementById('sc-edit-notes').value = '';
try {
const r = await fetch(`${api()}/db/events/${id}/false_trigger?value=true`, { method: 'PATCH' });
if (!r.ok) throw new Error(r.statusText);
btn.outerHTML = '<span class="ft-badge">FALSE</span>';
const r = await fetch(`${api()}/db/events/${eventId}/sidecar`);
if (!r.ok) {
const e = await r.json().catch(() => ({}));
throw new Error(e.detail || r.statusText);
}
const data = await r.json();
_scCurrentSidecar = data;
_renderSidecar(data);
document.getElementById('sc-status').textContent = '';
} catch (e) {
btn.disabled = false;
alert(`Failed to flag: ${e.message}`);
document.getElementById('sc-status').className = 'sc-status error';
document.getElementById('sc-status').textContent = `Load failed: ${e.message}`;
}
}
function _renderSidecar(data) {
const ev = data.event || {};
const pv = data.peak_values || {};
const pi = data.project_info || {};
const bw = data.blastware || {};
const src = data.source || {};
const rev = data.review || {};
document.getElementById('sc-title').textContent = `Event — ${bw.filename || ev.waveform_key || 'unknown'}`;
const fmtPpv = v => (v == null ? '—' : Number(v).toFixed(5) + ' in/s');
const fmtMic = v => {
if (v == null || v <= 0) return '—';
const dbl = 20 * Math.log10(v / DBL_REF);
return `${dbl.toFixed(1)} dBL (${v.toExponential(2)} psi)`;
};
document.getElementById('sc-f-serial').textContent = ev.serial || '—';
document.getElementById('sc-f-ts').textContent = ev.timestamp || '—';
document.getElementById('sc-f-rt').textContent = ev.record_type || '—';
document.getElementById('sc-f-sr').textContent = (ev.sample_rate ?? '—') + (ev.sample_rate ? ' sps' : '');
document.getElementById('sc-f-key').textContent = ev.waveform_key || '—';
document.getElementById('sc-f-tran').textContent = fmtPpv(pv.transverse);
document.getElementById('sc-f-vert').textContent = fmtPpv(pv.vertical);
document.getElementById('sc-f-long').textContent = fmtPpv(pv.longitudinal);
document.getElementById('sc-f-pvs').textContent = fmtPpv(pv.vector_sum);
document.getElementById('sc-f-mic').textContent = fmtMic(pv.mic_psi);
document.getElementById('sc-f-project').textContent = pi.project || '—';
document.getElementById('sc-f-client').textContent = pi.client || '—';
document.getElementById('sc-f-operator').textContent = pi.operator || '—';
document.getElementById('sc-f-loc').textContent = pi.sensor_location || '—';
document.getElementById('sc-f-bw').textContent = bw.filename || '—';
document.getElementById('sc-f-bwsize').textContent = bw.filesize != null ? `${bw.filesize} bytes` : '—';
document.getElementById('sc-f-sha').textContent = bw.sha256 || '—';
document.getElementById('sc-f-src').textContent = src.kind || '—';
document.getElementById('sc-f-cap').textContent = src.captured_at || '—';
document.getElementById('sc-edit-ft').checked = !!rev.false_trigger;
document.getElementById('sc-edit-reviewer').value = rev.reviewer || '';
document.getElementById('sc-edit-notes').value = rev.notes || '';
document.getElementById('sc-raw-json').textContent = JSON.stringify(data, null, 2);
}
function closeSidecarModal() {
document.getElementById('sc-overlay').classList.remove('visible');
_scCurrentEventId = null;
_scCurrentSidecar = null;
}
function onSidecarOverlayClick(e) {
// Click on the dimmed backdrop (but NOT on the modal itself) closes.
if (e.target.id === 'sc-overlay') closeSidecarModal();
}
async function saveSidecarReview() {
if (!_scCurrentEventId) return;
const btn = document.getElementById('sc-save-btn');
const status = document.getElementById('sc-status');
btn.disabled = true;
status.className = 'sc-status';
status.textContent = 'Saving…';
const review = {
false_trigger: document.getElementById('sc-edit-ft').checked,
reviewer: document.getElementById('sc-edit-reviewer').value.trim() || null,
notes: document.getElementById('sc-edit-notes').value,
};
try {
const r = await fetch(`${api()}/db/events/${_scCurrentEventId}/sidecar`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ review }),
});
if (!r.ok) {
const e = await r.json().catch(() => ({}));
throw new Error(e.detail || r.statusText);
}
const updated = await r.json();
_scCurrentSidecar = updated;
_renderSidecar(updated);
status.className = 'sc-status ok';
status.textContent = 'Saved.';
// Refresh the History table so the false_trigger badge reflects the change.
if (typeof loadHistory === 'function') loadHistory();
setTimeout(closeSidecarModal, 600);
} catch (e) {
status.className = 'sc-status error';
status.textContent = `Save failed: ${e.message}`;
} finally {
btn.disabled = false;
}
}
// Esc closes the modal.
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape' && document.getElementById('sc-overlay').classList.contains('visible')) {
closeSidecarModal();
}
});
// ── Units tab ──────────────────────────────────────────────────────────────────
async function loadUnits() {
unitsLoaded = true;
@@ -2224,5 +2703,81 @@ document.getElementById('api-base').value = window.location.origin;
document.getElementById(id)?.addEventListener('keydown', e => { if (e.key === 'Enter') connectUnit(); });
});
</script>
<!-- ════════════════════════════════════════════════════════════════
Sidecar review modal (Database events table → row click)
═══════════════════════════════════════════════════════════════════ -->
<div class="sc-overlay" id="sc-overlay" onclick="onSidecarOverlayClick(event)">
<div class="sc-modal" id="sc-modal">
<div class="sc-header">
<h3 id="sc-title">Event</h3>
<button class="sc-close" onclick="closeSidecarModal()">×</button>
</div>
<div class="sc-body">
<div class="sc-section">
<h4>Event</h4>
<dl class="sc-grid">
<dt>Serial</dt> <dd id="sc-f-serial"></dd>
<dt>Timestamp</dt> <dd id="sc-f-ts"></dd>
<dt>Record type</dt> <dd id="sc-f-rt"></dd>
<dt>Sample rate</dt> <dd id="sc-f-sr"></dd>
<dt>Waveform key</dt> <dd id="sc-f-key"></dd>
</dl>
</div>
<div class="sc-section">
<h4>Peaks</h4>
<dl class="sc-grid">
<dt>Tran</dt> <dd id="sc-f-tran"></dd>
<dt>Vert</dt> <dd id="sc-f-vert"></dd>
<dt>Long</dt> <dd id="sc-f-long"></dd>
<dt>PVS</dt> <dd id="sc-f-pvs"></dd>
<dt>Mic</dt> <dd id="sc-f-mic"></dd>
</dl>
</div>
<div class="sc-section">
<h4>Project</h4>
<dl class="sc-grid">
<dt>Project</dt> <dd id="sc-f-project"></dd>
<dt>Client</dt> <dd id="sc-f-client"></dd>
<dt>Operator</dt> <dd id="sc-f-operator"></dd>
<dt>Location</dt> <dd id="sc-f-loc"></dd>
</dl>
</div>
<div class="sc-section">
<h4>Source / files</h4>
<dl class="sc-grid">
<dt>BW filename</dt> <dd id="sc-f-bw"></dd>
<dt>BW filesize</dt> <dd id="sc-f-bwsize"></dd>
<dt>BW sha256</dt> <dd id="sc-f-sha"></dd>
<dt>Source kind</dt> <dd id="sc-f-src"></dd>
<dt>Captured at</dt> <dd id="sc-f-cap"></dd>
</dl>
</div>
<div class="sc-section">
<h4>Review (editable)</h4>
<div class="sc-row">
<input type="checkbox" id="sc-edit-ft" />
<label for="sc-edit-ft">False trigger</label>
</div>
<div class="sc-row">
<label for="sc-edit-reviewer" style="min-width:60px">Reviewer</label>
<input type="text" id="sc-edit-reviewer" placeholder="e.g. brian" />
</div>
<label for="sc-edit-notes" style="font-size:11px;color:var(--text-mute)">Notes</label>
<textarea id="sc-edit-notes" placeholder="e.g. truck thump near sensor 14:23 — false trigger"></textarea>
</div>
<details class="sc-raw">
<summary>Raw sidecar JSON (read-only peek)</summary>
<pre id="sc-raw-json"></pre>
</details>
</div>
<div class="sc-footer">
<span class="sc-status" id="sc-status"></span>
<button class="btn btn-ghost" onclick="closeSidecarModal()">Cancel</button>
<button class="btn" id="sc-save-btn" onclick="saveSidecarReview()">Save</button>
</div>
</div>
</div>
</body>
</html>
+446
View File
@@ -0,0 +1,446 @@
"""
sfm/waveform_store.py On-disk store for Blastware-format event files.
Layout (flat per-serial, four files per event):
<root>/<serial>/<filename> event file (BW-readable binary)
<root>/<serial>/<filename>.a5.pkl pickled list of A5 S3Frame dicts
<root>/<serial>/<filename>.h5 clean waveform arrays (HDF5)
<root>/<serial>/<filename>.sfm.json modern sidecar (peaks, project,
review state, extensions)
`<filename>` is whatever `minimateplus.blastware_file.blastware_filename`
produces for the event. The extension is NOT a fixed type tag it
encodes the event timestamp (`AB0T` format).
Roles:
- BW binary: what Blastware reads. Untouched. The user-facing review
waveform viewer.
- .a5.pkl: regenerative source. Lets the BW binary be rebuilt
byte-for-byte if the encoder changes. Never delete.
- .h5: clean per-channel waveform arrays in physical units (in/s for
geo, psi for mic) plus event metadata. Canonical format for
downstream analysis tools and the `/device/event/{idx}/waveform`
endpoint's plot-JSON output.
- .sfm.json: small, queryable metadata + review state. SQL
`events.false_trigger` is a derived index kept in sync via
`patch_sidecar()`.
"""
from __future__ import annotations
import datetime
import logging
import pickle
import shutil
from pathlib import Path
from typing import Optional
from minimateplus import event_file_io
from minimateplus.blastware_file import blastware_filename, write_blastware_file
from minimateplus.framing import S3Frame
from minimateplus.models import Event
from sfm import event_hdf5
log = logging.getLogger("sfm.waveform_store")
A5_PICKLE_VERSION = 1
def _frame_to_dict(f: S3Frame) -> dict:
return {
"sub": f.sub,
"page_hi": f.page_hi,
"page_lo": f.page_lo,
"data": bytes(f.data),
"chk_byte": f.chk_byte,
"checksum_valid": f.checksum_valid,
}
def _dict_to_frame(d: dict) -> S3Frame:
return S3Frame(
sub=d["sub"],
page_hi=d["page_hi"],
page_lo=d["page_lo"],
data=bytes(d["data"]),
checksum_valid=d.get("checksum_valid", True),
chk_byte=d.get("chk_byte", 0),
)
class WaveformStore:
"""
Persistent store for Blastware-format waveform files + their A5 source frames.
Thread safety: write_blastware_file is single-shot; concurrent saves of the
*same* filename would race, but the filename encodes second-resolution
timestamps + serial, so collisions across threads/processes are vanishingly
unlikely in practice.
"""
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
log.info("WaveformStore root=%s", self.root)
# ── path helpers ────────────────────────────────────────────────────────────
def _serial_dir(self, serial: str) -> Path:
d = self.root / serial
d.mkdir(parents=True, exist_ok=True)
return d
def paths_for(self, serial: str, filename: str) -> tuple[Path, Path]:
"""Return (blastware_path, a5_pickle_path) for a given serial+filename.
For the sidecar path use `sidecar_path_for()` kept separate so
existing callers don't need to unpack a 3-tuple.
"""
d = self._serial_dir(serial)
return d / filename, d / f"{filename}.a5.pkl"
def sidecar_path_for(self, serial: str, filename: str) -> Path:
"""Return absolute path to the .sfm.json sidecar for a given event."""
return self._serial_dir(serial) / f"{filename}.sfm.json"
def hdf5_path_for(self, serial: str, filename: str) -> Path:
"""Return absolute path to the .h5 clean-waveform file for a given event."""
return self._serial_dir(serial) / f"{filename}.h5"
def open_blastware(self, serial: str, filename: str) -> Optional[Path]:
"""Return absolute path to an existing event file or None."""
bw_path, _ = self.paths_for(serial, filename)
return bw_path if bw_path.exists() else None
# ── save / load ─────────────────────────────────────────────────────────────
def save(
self,
ev: Event,
serial: str,
a5_frames: list[S3Frame],
*,
source_kind: str = "sfm-live",
geo_range = "normal",
) -> dict:
"""
Write all four event-file artifacts for one event:
- <filename> BW binary
- <filename>.a5.pkl raw A5 frame pickle
- <filename>.h5 clean waveform (HDF5)
- <filename>.sfm.json modern sidecar (metadata + review)
Returns a record dict suitable for persisting alongside the DB row:
{
"filename": "M529LKIQ.7M0W",
"filesize": 8708,
"sha256": "a1b2c3...",
"a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl",
"hdf5_filename": "M529LKIQ.7M0W.h5",
"sidecar_filename": "M529LKIQ.7M0W.sfm.json",
}
`source_kind` flows into `sidecar.source.kind` callers should
pass "sfm-live" (default) for the live endpoint and "sfm-ach" for
the ACH ingestion path. BW-imported events use save_imported_bw()
instead.
`geo_range` controls the ADC-counts in/s scaling in the HDF5
file ("normal" = 10 in/s FS, "sensitive" = 1.25 in/s FS).
Defaults to "normal" callers with compliance-config access
should pass the actual unit setting so the saved samples are in
the right units.
Idempotent: if the event file already exists, it is overwritten
with the freshly-encoded version (same bytes for the same
a5_frames) and the sidecar's review block is preserved across
re-saves.
"""
if not a5_frames:
raise ValueError("WaveformStore.save: a5_frames is empty")
if not serial:
raise ValueError("WaveformStore.save: serial is required")
filename = blastware_filename(ev, serial)
bw_path, a5_path = self.paths_for(serial, filename)
sidecar_path = self.sidecar_path_for(serial, filename)
hdf5_path = self.hdf5_path_for(serial, filename)
# 1. encode the event file (defensive unlink prevents trailing-byte
# leaks from a previous larger file on synced/odd filesystems).
try:
bw_path.unlink()
except FileNotFoundError:
pass
write_blastware_file(ev, a5_frames, bw_path)
filesize = bw_path.stat().st_size
sha256 = event_file_io.file_sha256(bw_path)
# 2. write the .a5.pkl sidecar
try:
a5_path.unlink()
except FileNotFoundError:
pass
payload = {
"version": A5_PICKLE_VERSION,
"frames": [_frame_to_dict(f) for f in a5_frames],
}
with a5_path.open("wb") as fp:
pickle.dump(payload, fp, protocol=pickle.HIGHEST_PROTOCOL)
# 3. write the .h5 clean-waveform file (samples in physical units).
# Best-effort: a write failure shouldn't sink the rest of the save
# (the HDF5 can be regenerated later from the .a5.pkl).
hdf5_filename: Optional[str] = None
try:
event_hdf5.write_event_hdf5(
hdf5_path, ev,
serial=serial,
geo_range=geo_range,
source_kind=source_kind,
)
hdf5_filename = hdf5_path.name
except Exception as exc:
log.warning(
"save: HDF5 write failed for %s: %s — continuing without .h5",
hdf5_path, exc,
)
# 4. write the .sfm.json sidecar. Preserve any existing review
# block + extensions across re-saves so user edits aren't lost
# when the same event is re-downloaded (e.g. via Force refresh).
existing_review = None
existing_extensions = None
if sidecar_path.exists():
try:
old = event_file_io.read_sidecar(sidecar_path)
existing_review = old.get("review")
existing_extensions = old.get("extensions")
except Exception as exc:
log.warning(
"save: existing sidecar at %s unreadable (%s); overwriting",
sidecar_path, exc,
)
sidecar = event_file_io.event_to_sidecar_dict(
ev,
serial=serial,
blastware_filename=filename,
blastware_filesize=filesize,
blastware_sha256=sha256,
source_kind=source_kind,
a5_pickle_filename=a5_path.name,
review=existing_review,
extensions=existing_extensions,
)
event_file_io.write_sidecar(sidecar_path, sidecar)
log.info(
"WaveformStore.save serial=%s filename=%s filesize=%d frames=%d "
"h5=%s sidecar=%s",
serial, filename, filesize, len(a5_frames),
hdf5_filename or "(skipped)", sidecar_path.name,
)
return {
"filename": filename,
"filesize": filesize,
"sha256": sha256,
"a5_pickle_filename": a5_path.name,
"hdf5_filename": hdf5_filename,
"sidecar_filename": sidecar_path.name,
}
def save_imported_bw(
self,
bw_bytes: bytes,
source_path: Path,
*,
serial_hint: Optional[str] = None,
) -> tuple[Event, dict]:
"""
Ingest a Blastware event file produced by an external tool
(Blastware's own ACH, manual download, etc.) where the source A5
frames aren't available.
Workflow:
1. Parse the bytes via event_file_io.read_blastware_file (writes
a temp file to do that, since the parser takes a path).
2. Resolve serial from BW filename (`<P><serial3>...`) or use
serial_hint. Falls back to "UNKNOWN".
3. Copy the BW bytes verbatim into <root>/<serial>/<filename>.
4. Write the .sfm.json sidecar with source.kind = "bw-import"
and a5_pickle_filename = None. Does NOT write a .a5.pkl
(no A5 source available; byte-for-byte regeneration not
possible the on-disk BW file IS the byte-for-byte source).
Returns (event, record_dict) so callers can both insert into
SeismoDb and surface the parsed Event.
"""
# Stash the bytes to a temp path so read_blastware_file (path-based)
# can parse without us duplicating its logic.
import tempfile
with tempfile.NamedTemporaryFile(suffix=".bw", delete=False) as tmp:
tmp.write(bw_bytes)
tmp_path = Path(tmp.name)
try:
ev = event_file_io.read_blastware_file(tmp_path)
finally:
try:
tmp_path.unlink()
except FileNotFoundError:
pass
# Resolve serial. blastware_filename derives a 4-char prefix from
# the numeric serial (e.g. BE11529 → M529); we go the other way
# via the source filename if a hint wasn't given.
serial = serial_hint or _serial_from_bw_filename(source_path.name) or "UNKNOWN"
# Use the source filename verbatim — it already encodes timestamp
# + record type per BW's AB0T scheme, and we want to preserve it
# so the file BW knows about can be opened back in BW.
filename = source_path.name
bw_path = self._serial_dir(serial) / filename
# 1. copy bytes
bw_path.write_bytes(bw_bytes)
filesize = bw_path.stat().st_size
sha256 = event_file_io.file_sha256(bw_path)
# 2. write the .h5 clean-waveform file from the parsed Event.
# Note: peaks here are computed from raw samples (the BW file
# doesn't carry the device-authoritative 0C peaks). Best-effort.
hdf5_path = self.hdf5_path_for(serial, filename)
hdf5_filename: Optional[str] = None
try:
event_hdf5.write_event_hdf5(
hdf5_path, ev,
serial=serial,
geo_range="normal", # BW file doesn't carry the range; assume Normal
source_kind="bw-import",
)
hdf5_filename = hdf5_path.name
except Exception as exc:
log.warning(
"save_imported_bw: HDF5 write failed for %s: %s — continuing",
hdf5_path, exc,
)
# 3. write sidecar with source.kind = bw-import
sidecar_path = self.sidecar_path_for(serial, filename)
existing_review = None
if sidecar_path.exists():
try:
existing_review = event_file_io.read_sidecar(sidecar_path).get("review")
except Exception:
pass
sidecar = event_file_io.event_to_sidecar_dict(
ev,
serial=serial,
blastware_filename=filename,
blastware_filesize=filesize,
blastware_sha256=sha256,
source_kind="bw-import",
a5_pickle_filename=None,
review=existing_review,
)
event_file_io.write_sidecar(sidecar_path, sidecar)
log.info(
"WaveformStore.save_imported_bw serial=%s filename=%s filesize=%d "
"h5=%s (no .a5.pkl — A5 source unavailable for BW-imported files)",
serial, filename, filesize, hdf5_filename or "(skipped)",
)
return ev, {
"filename": filename,
"filesize": filesize,
"sha256": sha256,
"a5_pickle_filename": None,
"hdf5_filename": hdf5_filename,
"sidecar_filename": sidecar_path.name,
}
def load_a5(self, serial: str, filename: str) -> Optional[list[S3Frame]]:
"""
Re-hydrate the pickled A5 frame stream for a stored event.
Returns None if the sidecar is missing.
"""
_, a5_path = self.paths_for(serial, filename)
if not a5_path.exists():
return None
with a5_path.open("rb") as fp:
payload = pickle.load(fp)
if not isinstance(payload, dict) or "frames" not in payload:
log.warning("WaveformStore.load_a5: malformed sidecar at %s", a5_path)
return None
return [_dict_to_frame(d) for d in payload["frames"]]
# ── modern .sfm.json sidecar accessors ──────────────────────────────────────
def load_sidecar(self, serial: str, filename: str) -> Optional[dict]:
"""Return the parsed .sfm.json sidecar dict, or None if missing."""
path = self.sidecar_path_for(serial, filename)
if not path.exists():
return None
try:
return event_file_io.read_sidecar(path)
except Exception as exc:
log.warning("load_sidecar: failed to read %s: %s", path, exc)
return None
def patch_sidecar(
self,
serial: str,
filename: str,
*,
review: Optional[dict] = None,
extensions: Optional[dict] = None,
reviewer_now: bool = True,
) -> Optional[dict]:
"""
JSON-merge-patch the .sfm.json sidecar's review/extensions blocks.
Returns the new full dict, or None if the sidecar doesn't exist.
"""
path = self.sidecar_path_for(serial, filename)
if not path.exists():
return None
return event_file_io.patch_sidecar(
path,
review=review,
extensions=extensions,
reviewer_now=reviewer_now,
)
# ── helpers ─────────────────────────────────────────────────────────────────────
def _serial_from_bw_filename(name: str) -> Optional[str]:
"""
Reverse of `blastware_filename`'s serial-prefix encoding.
BW filename format (V10.72): `<P><serial3><stem4>.<ext>`
where P = chr(ord('B') + floor(serial // 1000))
and serial3 = f"{serial % 1000:03d}".
Examples (from CLAUDE.md verification archive):
P036... BE14036 H907... BE6907
M529... BE11529 T003... BE18003
Returns the inferred BE-prefix serial (e.g. "BE11529") or None when
the filename doesn't match the expected pattern.
"""
if not name:
return None
# First letter encodes the thousands group; next 3 chars encode the
# last 3 digits of the serial.
base = name.split(".", 1)[0]
if len(base) < 4 or not base[0].isalpha() or not base[1:4].isdigit():
return None
prefix_letter = base[0].upper()
if prefix_letter < "B":
return None
thousands = ord(prefix_letter) - ord("B")
serial_num = thousands * 1000 + int(base[1:4])
return f"BE{serial_num}"
+209
View File
@@ -0,0 +1,209 @@
"""
test_cache_invalidation.py verify post-erase key-reuse correctness.
The device's event-key counter resets to 0x01110000 after every memory erase,
so a bare-key dedup (the old behaviour) silently treats a freshly-recorded
event 0 as if it were the previously-downloaded one. These tests exercise
the (key, timestamp)-based eviction logic in:
- bridges/ach_server.py (state-file migration + force flag)
- sfm/server.py (_LiveCache.set_events / set_waveform)
Run:
python tests/test_cache_invalidation.py
"""
from __future__ import annotations
import json
import os
import sys
import tempfile
from pathlib import Path
try:
import pytest
except ImportError:
pytest = None # type: ignore
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# ── ACH state migration ───────────────────────────────────────────────────────
def test_ach_state_legacy_migration(tmp_path: Path):
"""
Legacy v1 state with a `downloaded_keys` list is migrated on _load_state
to the v2 `downloaded_events` dict. All legacy keys come back with empty
timestamps so the (key, ts) compare in get_events() always falls through
to a fresh download.
"""
from bridges.ach_server import _load_state
state_path = tmp_path / "ach_state.json"
legacy = {
"BE11529": {
"downloaded_keys": ["01110000", "0111245a"],
"max_downloaded_key": "0111245a",
"last_seen": "2026-04-11T01:04:36",
"serial": "BE11529",
"peer": "63.43.212.232:51920",
},
}
state_path.write_text(json.dumps(legacy))
migrated = _load_state(state_path)
unit = migrated["BE11529"]
assert "downloaded_keys" not in unit
assert unit["downloaded_events"] == {
"01110000": "",
"0111245a": "",
}
# max_downloaded_key is preserved verbatim
assert unit["max_downloaded_key"] == "0111245a"
def test_ach_state_v2_passes_through(tmp_path: Path):
"""A v2 state file is returned verbatim — no migration touches it."""
from bridges.ach_server import _load_state
state_path = tmp_path / "ach_state.json"
v2 = {
"BE11529": {
"downloaded_events": {
"01110000": "2026-04-15T14:23:45",
"0111245a": "2026-04-16T09:01:12",
},
"max_downloaded_key": "0111245a",
"serial": "BE11529",
},
}
state_path.write_text(json.dumps(v2))
loaded = _load_state(state_path)
assert loaded["BE11529"]["downloaded_events"] == v2["BE11529"]["downloaded_events"]
def test_ach_state_missing_returns_empty(tmp_path: Path):
"""Nonexistent state path → empty dict (not an error)."""
from bridges.ach_server import _load_state
assert _load_state(tmp_path / "absent.json") == {}
# ── _LiveCache eviction ───────────────────────────────────────────────────────
def _ev(index: int, key: str, ts: str) -> dict:
return {"index": index, "waveform_key": key, "timestamp": ts}
def test_live_cache_set_events_no_eviction_when_keys_match():
"""No flush when incoming events match the cached (key, ts) at each index."""
from sfm.live_cache import LiveCache as _LiveCache
c = _LiveCache()
conn = "tcp:1.2.3.4:12345"
c.set_events(conn, 2, [_ev(0, "01110000", "2026-04-15T14:23:45"),
_ev(1, "0111245a", "2026-04-16T09:01:12")])
c.set_waveform(conn, 0, _ev(0, "01110000", "2026-04-15T14:23:45"))
# Same events again — must not flush.
c.set_events(conn, 2, [_ev(0, "01110000", "2026-04-15T14:23:45"),
_ev(1, "0111245a", "2026-04-16T09:01:12")])
assert c._events[conn][0] == 2
assert (conn, 0) in c._waveforms
def test_live_cache_set_events_flushes_on_post_erase_collision():
"""
Index 0 keeps the same key (01110000 reuses) but the timestamp differs
device was erased + re-recorded flush all events + waveforms for the
device.
"""
from sfm.live_cache import LiveCache as _LiveCache
c = _LiveCache()
conn = "tcp:1.2.3.4:12345"
# First "session": index 0 key=01110000 ts=2026-04-15.
c.set_events(conn, 1, [_ev(0, "01110000", "2026-04-15T14:23:45")])
c.set_waveform(conn, 0, _ev(0, "01110000", "2026-04-15T14:23:45"))
assert (conn, 0) in c._waveforms
# Second "session" after erase: index 0 still key=01110000 but new ts.
c.set_events(conn, 1, [_ev(0, "01110000", "2026-05-06T12:34:56")])
# Stale waveform for index 0 must have been flushed by the eviction path
# before the new event was inserted. The new events list IS in cache but
# the cached waveform from the prior session is gone.
assert (conn, 0) not in c._waveforms
assert c._events[conn][1][0]["timestamp"] == "2026-05-06T12:34:56"
def test_live_cache_set_waveform_flushes_on_mismatch():
"""set_waveform alone should also evict when (key, ts) differs."""
from sfm.live_cache import LiveCache as _LiveCache
c = _LiveCache()
conn = "tcp:1.2.3.4:12345"
c.set_waveform(conn, 0, _ev(0, "01110000", "2026-04-15T14:23:45"))
c.set_waveform(conn, 1, _ev(1, "0111245a", "2026-04-16T09:01:12"))
# Index 0 swap: same key, new timestamp.
c.set_waveform(conn, 0, _ev(0, "01110000", "2026-05-06T12:34:56"))
# Index 1's stale waveform must be flushed — keeping it would mix eras.
assert (conn, 1) not in c._waveforms
# The newly-inserted index 0 entry is what's there.
assert c._waveforms[(conn, 0)]["timestamp"] == "2026-05-06T12:34:56"
def test_live_cache_partial_signature_does_not_flush():
"""
If incoming event lacks waveform_key OR timestamp, we cannot prove a
mismatch eviction must NOT trigger. Avoids spurious flushes from
legacy / partial event shapes.
"""
from sfm.live_cache import LiveCache as _LiveCache
c = _LiveCache()
conn = "tcp:1.2.3.4:12345"
c.set_waveform(conn, 0, _ev(0, "01110000", "2026-04-15T14:23:45"))
# Incoming entry missing the timestamp — cannot prove a mismatch.
c.set_waveform(conn, 0, {"index": 0, "waveform_key": "01110000"})
# Cache should contain the new entry; the implementation overwrites
# the index-0 row but does NOT flush other indices. Since there are no
# other indices in this test, just check the entry exists.
assert (conn, 0) in c._waveforms
if __name__ == "__main__":
if pytest is not None:
pytest.main([__file__, "-v"])
else:
import inspect
import traceback as _tb
passed = failed = 0
for _name, _fn in sorted(globals().items()):
if not _name.startswith("test_") or not callable(_fn):
continue
try:
_sig = inspect.signature(_fn)
if "tmp_path" in _sig.parameters:
with tempfile.TemporaryDirectory() as _td:
_fn(Path(_td))
else:
_fn()
print(f"PASS {_name}")
passed += 1
except Exception:
print(f"FAIL {_name}")
_tb.print_exc()
failed += 1
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)
+348
View File
@@ -0,0 +1,348 @@
"""
test_event_file_io.py sidecar write/read/patch round-trips,
WaveformStore sidecar integration, and the BW-import path.
Run:
python tests/test_event_file_io.py
"""
from __future__ import annotations
import json
import os
import sys
import tempfile
from pathlib import Path
try:
import pytest
except ImportError:
pytest = None # type: ignore
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from minimateplus import event_file_io
from minimateplus.framing import S3Frame
from minimateplus.models import Event, Timestamp
# ── Fixtures shared with test_waveform_store.py ───────────────────────────────
def _make_synthetic_event() -> tuple[Event, list[S3Frame]]:
"""Same shape as tests/test_waveform_store.py — minimum viable Event +
A5 stream that makes write_blastware_file emit a parseable file.
STRT is exactly 21 bytes; rectime_seconds lands at byte 18 to match
`_decode_a5_waveform`'s expected layout (which is also what
`read_blastware_file()` reads back)."""
key4 = bytes.fromhex("01110000")
rectime = 3
strt = bytearray(21)
strt[0:4] = b"STRT"
strt[4:6] = b"\xff\xfe"
strt[6:10] = key4 # end_key (per data[23:27] in CLAUDE.md)
strt[10:14] = key4 # start_key (per data[27:31])
strt[18] = rectime
strt = bytes(strt)
probe_data = bytes(7) + strt + bytes(32)
probe = S3Frame(sub=0xA5, page_hi=0x10, page_lo=0x00, data=probe_data,
checksum_valid=True, chk_byte=0x00)
sample = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x10,
data=bytes(7) + bytes(0x0200), checksum_valid=True,
chk_byte=0x00)
# Build a valid 26-byte footer (0e 08 + ts1 + ts2 + 6 const + 2 crc)
# and embed it at the END of the terminator's contribution so
# write_blastware_file finds the real `0e 08` marker rather than
# falling back to slicing the last 26 bytes of zero garbage.
# ts byte order: [day][month][year_HI][year_LO][0x00][hour][min][sec]
footer = (
b"\x0e\x08"
+ bytes([6, 5, 0x07, 0xea, 0, 12, 34, 56]) # ts1 = 2026-05-06 12:34:56
+ bytes([6, 5, 0x07, 0xea, 0, 12, 35, 6]) # ts2 = ts1 + ~10s
+ b"\x00\x01\x00\x02\x00\x00"
+ b"\x00\x00"
)
assert len(footer) == 26
term_data = bytes(11) + bytes(38) + footer # 11 prefix + 38 pad + 26 footer = 75
term = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x00,
data=term_data, checksum_valid=True, chk_byte=0x00)
ev = Event(index=0)
ev._waveform_key = key4
ev.timestamp = Timestamp(
raw=b"", flag=0x10, year=2026, unknown_byte=0,
month=5, day=6, hour=12, minute=34, second=56,
)
ev.rectime_seconds = rectime
ev.record_type = "Waveform"
ev._a5_frames = [probe, sample, term]
return ev, [probe, sample, term]
# ── Sidecar write/read round-trip ─────────────────────────────────────────────
def test_event_to_sidecar_dict_shape():
ev, _ = _make_synthetic_event()
d = event_file_io.event_to_sidecar_dict(
ev,
serial="BE11529",
blastware_filename="M529LKIQ.7M0W",
blastware_filesize=1024,
blastware_sha256="abcd" * 16,
source_kind="sfm-live",
a5_pickle_filename="M529LKIQ.7M0W.a5.pkl",
)
assert d["schema_version"] == event_file_io.SCHEMA_VERSION
assert d["kind"] == event_file_io.SIDECAR_KIND
assert d["event"]["serial"] == "BE11529"
assert d["event"]["timestamp"] == "2026-05-06T12:34:56"
assert d["event"]["waveform_key"] == "01110000"
assert d["blastware"]["sha256"] == "abcd" * 16
assert d["source"]["kind"] == "sfm-live"
assert d["review"] == {
"false_trigger": False, "reviewer": None,
"reviewed_at": None, "notes": "",
}
assert d["extensions"] == {}
def test_sidecar_write_and_read_round_trip(tmp_path: Path):
ev, _ = _make_synthetic_event()
path = tmp_path / "M529LKIQ.7M0W.sfm.json"
src = event_file_io.event_to_sidecar_dict(
ev, serial="BE11529",
blastware_filename="M529LKIQ.7M0W", blastware_filesize=1024,
blastware_sha256="x" * 64, source_kind="sfm-ach",
)
event_file_io.write_sidecar(path, src)
loaded = event_file_io.read_sidecar(path)
assert loaded["event"] == src["event"]
assert loaded["blastware"] == src["blastware"]
assert loaded["source"]["kind"] == "sfm-ach"
def test_sidecar_rejects_unsupported_schema_version(tmp_path: Path):
path = tmp_path / "future.sfm.json"
path.write_text(json.dumps({
"schema_version": event_file_io.SCHEMA_VERSION + 1,
"kind": event_file_io.SIDECAR_KIND,
}))
try:
event_file_io.read_sidecar(path)
except ValueError as exc:
assert "schema_version" in str(exc)
return
raise AssertionError("read_sidecar should have rejected unsupported version")
def test_sidecar_extensions_survive_round_trip(tmp_path: Path):
"""Forward-compat: unknown keys inside `extensions` survive a r/w cycle."""
ev, _ = _make_synthetic_event()
path = tmp_path / "x.sfm.json"
d = event_file_io.event_to_sidecar_dict(
ev, serial="BE11529",
blastware_filename="X", blastware_filesize=0, blastware_sha256="",
source_kind="sfm-live",
extensions={"vendor.acme.gps": {"lat": 40.7, "lon": -74.0}},
)
event_file_io.write_sidecar(path, d)
back = event_file_io.read_sidecar(path)
assert back["extensions"]["vendor.acme.gps"]["lat"] == 40.7
def test_sidecar_patch_review_stamps_reviewed_at(tmp_path: Path):
ev, _ = _make_synthetic_event()
path = tmp_path / "patch.sfm.json"
event_file_io.write_sidecar(
path,
event_file_io.event_to_sidecar_dict(
ev, serial="BE11529",
blastware_filename="X", blastware_filesize=0, blastware_sha256="",
source_kind="sfm-live",
),
)
new = event_file_io.patch_sidecar(
path,
review={"false_trigger": True, "notes": "truck thump", "reviewer": "brian"},
)
assert new["review"]["false_trigger"] is True
assert new["review"]["notes"] == "truck thump"
assert new["review"]["reviewer"] == "brian"
assert new["review"]["reviewed_at"], "reviewed_at must be auto-stamped"
on_disk = event_file_io.read_sidecar(path)
assert on_disk["review"]["false_trigger"] is True
# ── WaveformStore integration ─────────────────────────────────────────────────
def test_waveform_store_save_writes_sidecar(tmp_path: Path):
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
ev, frames = _make_synthetic_event()
rec = store.save(ev, serial="BE11529", a5_frames=frames, source_kind="sfm-live")
assert rec["sidecar_filename"].endswith(".sfm.json")
assert rec["sha256"] and len(rec["sha256"]) == 64
sc = store.load_sidecar("BE11529", rec["filename"])
assert sc is not None
assert sc["blastware"]["filename"] == rec["filename"]
assert sc["blastware"]["sha256"] == rec["sha256"]
assert sc["source"]["kind"] == "sfm-live"
# The .a5.pkl reference should match the actual filename on disk.
assert sc["source"]["a5_pickle_filename"] == rec["a5_pickle_filename"]
def test_waveform_store_save_preserves_review_across_resave(tmp_path: Path):
"""Re-saving the same event must preserve a user's prior review edits."""
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
ev, frames = _make_synthetic_event()
rec = store.save(ev, serial="BE11529", a5_frames=frames)
# User flips false_trigger and adds a note.
store.patch_sidecar(
"BE11529", rec["filename"],
review={"false_trigger": True, "notes": "hello"},
)
# A second save (e.g. Force refresh re-download) must keep those edits.
store.save(ev, serial="BE11529", a5_frames=frames)
sc = store.load_sidecar("BE11529", rec["filename"])
assert sc["review"]["false_trigger"] is True
assert sc["review"]["notes"] == "hello"
def test_waveform_store_patch_sidecar_returns_none_when_missing(tmp_path: Path):
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
out = store.patch_sidecar("BE99999", "no.such.W", review={"notes": "x"})
assert out is None
# ── DB integration: sidecar_filename column + update_event_review ─────────────
def test_seismodb_persists_sidecar_filename_and_review_sync(tmp_path: Path):
from sfm.database import SeismoDb
db = SeismoDb(tmp_path / "seismo_relay.db")
ev, _ = _make_synthetic_event()
rec = {
"filename": "M529LKIQ.7M0W",
"filesize": 8708,
"a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl",
"sidecar_filename": "M529LKIQ.7M0W.sfm.json",
}
inserted, _ = db.insert_events(
[ev], serial="BE11529",
waveform_records={ev._waveform_key.hex(): rec},
)
assert inserted == 1
rows = db.query_events(serial="BE11529")
row = rows[0]
assert row["sidecar_filename"] == rec["sidecar_filename"]
# update_event_review keeps false_trigger column in sync with sidecar.
assert db.update_event_review(row["id"], {"false_trigger": True}) is True
again = db.get_event(row["id"])
assert again["false_trigger"] == 1
# Empty review block (no false_trigger key) → no-op but row exists.
assert db.update_event_review(row["id"], {"notes": "x"}) is True
# ── BW-file reader (read_blastware_file) ─────────────────────────────────────
def test_read_blastware_file_round_trip(tmp_path: Path):
"""write → read → key/timestamp/rectime survive."""
from minimateplus.blastware_file import write_blastware_file, blastware_filename
ev, frames = _make_synthetic_event()
bw_path = tmp_path / blastware_filename(ev, "BE11529")
write_blastware_file(ev, frames, bw_path)
parsed = event_file_io.read_blastware_file(bw_path)
assert parsed._waveform_key == ev._waveform_key
assert parsed.rectime_seconds == ev.rectime_seconds
# Timestamp lands via the footer; year/month/day/hour/min/sec all survive.
assert parsed.timestamp is not None
assert parsed.timestamp.year == ev.timestamp.year
assert parsed.timestamp.month == ev.timestamp.month
assert parsed.timestamp.day == ev.timestamp.day
assert parsed.timestamp.hour == ev.timestamp.hour
assert parsed.timestamp.minute == ev.timestamp.minute
assert parsed.timestamp.second == ev.timestamp.second
# No A5 source recoverable.
assert parsed._a5_frames is None
# Peaks computed from samples (synthetic = zero samples → zero peaks).
assert parsed.peak_values is not None
assert parsed.peak_values.peak_vector_sum == 0.0
def test_save_imported_bw_round_trip(tmp_path: Path):
"""save_imported_bw stores a copy + sidecar with source.kind = bw-import."""
from minimateplus.blastware_file import write_blastware_file, blastware_filename
from sfm.waveform_store import WaveformStore
# Produce a BW file outside the store.
ev, frames = _make_synthetic_event()
fname = blastware_filename(ev, "BE11529")
src = tmp_path / fname
write_blastware_file(ev, frames, src)
store = WaveformStore(tmp_path / "waveforms")
parsed_ev, rec = store.save_imported_bw(src.read_bytes(), source_path=src)
assert rec["filename"] == fname
assert rec["a5_pickle_filename"] is None # no A5 source for BW imports
sc = store.load_sidecar("BE11529", fname)
assert sc is not None
assert sc["source"]["kind"] == "bw-import"
assert sc["source"]["a5_pickle_filename"] is None
# The stored binary should match the source byte-for-byte (we just copied).
stored_path = store.open_blastware("BE11529", fname)
assert stored_path is not None
assert stored_path.read_bytes() == src.read_bytes()
if __name__ == "__main__":
if pytest is not None:
pytest.main([__file__, "-v"])
else:
import inspect
import traceback as _tb
passed = failed = 0
for _name, _fn in sorted(globals().items()):
if not _name.startswith("test_") or not callable(_fn):
continue
try:
_sig = inspect.signature(_fn)
if "tmp_path" in _sig.parameters:
with tempfile.TemporaryDirectory() as _td:
_fn(Path(_td))
else:
_fn()
print(f"PASS {_name}")
passed += 1
except Exception:
print(f"FAIL {_name}")
_tb.print_exc()
failed += 1
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)
+296
View File
@@ -0,0 +1,296 @@
"""
test_event_hdf5.py HDF5 codec round-trip + plot.v1 JSON shape sanity.
Run:
python tests/test_event_hdf5.py
"""
from __future__ import annotations
import os
import sys
import tempfile
from pathlib import Path
try:
import pytest
except ImportError:
pytest = None # type: ignore
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from minimateplus.framing import S3Frame
from minimateplus.models import Event, PeakValues, ProjectInfo, Timestamp
from sfm import event_hdf5
# ── Fixtures ──────────────────────────────────────────────────────────────────
def _make_event_with_samples(n: int = 256) -> Event:
"""An Event with synthetic int16 ADC samples on all four channels.
Channel content:
- Tran: ramp from -16384 to +16383 (peak 5 in/s for Normal range)
- Vert: full-scale dirac at index n//2 (peak = 10 in/s)
- Long: zeros
- MicL: small ramp
Peak values are set on the event the way the device's 0C record
would supply them used by the HDF5 writer for the mic per-count
factor.
"""
tran = [int((i / max(n - 1, 1)) * 32767 - 16384) for i in range(n)]
vert = [0] * n
if n:
vert[n // 2] = 32767
long_ = [0] * n
mic = [int((i / max(n - 1, 1)) * 5000) for i in range(n)]
ev = Event(index=0)
ev._waveform_key = bytes.fromhex("01110000")
ev.timestamp = Timestamp(
raw=b"", flag=0x10,
year=2026, unknown_byte=0, month=5, day=7,
hour=10, minute=0, second=0,
)
ev.record_type = "Waveform"
ev.sample_rate = 1024
ev.pretrig_samples = n // 4
ev.total_samples = n
ev.rectime_seconds = n / 1024.0
ev.raw_samples = {"Tran": tran, "Vert": vert, "Long": long_, "MicL": mic}
ev.peak_values = PeakValues(
tran=5.0, vert=10.0, long=0.0,
peak_vector_sum=10.0, micl=0.001,
)
ev.project_info = ProjectInfo(
project="TestProj", client="TestClient",
operator="brian", sensor_location="loc-A",
)
return ev
# ── HDF5 round-trip ───────────────────────────────────────────────────────────
def test_hdf5_round_trip_preserves_metadata(tmp_path: Path):
ev = _make_event_with_samples()
h5 = tmp_path / "test.h5"
event_hdf5.write_event_hdf5(
h5, ev, serial="BE11529", geo_range="normal",
)
data = event_hdf5.read_event_hdf5(h5)
a = data["attrs"]
assert a["schema_version"] == event_hdf5.SCHEMA_VERSION
assert a["kind"] == event_hdf5.HDF5_KIND
assert a["serial"] == "BE11529"
assert a["waveform_key"] == "01110000"
assert a["sample_rate"] == 1024
assert a["pretrig_samples"] == 64
assert a["geo_range"] == "normal"
assert a["geo_full_scale_ips"] == 10.0
assert a["project"] == "TestProj"
assert a["client"] == "TestClient"
assert a["operator"] == "brian"
# Float attrs may round-trip with tiny precision noise.
assert abs(a["peak_tran_ips"] - 5.0) < 1e-6
assert abs(a["peak_vert_ips"] - 10.0) < 1e-6
def test_hdf5_samples_in_physical_units_normal_range(tmp_path: Path):
"""Vert hits ADC full-scale (32767) → with Normal range FS=10 in/s,
the HDF5 sample value should be 10 * 32767/32768 in/s."""
ev = _make_event_with_samples()
h5 = tmp_path / "n.h5"
event_hdf5.write_event_hdf5(h5, ev, serial="BE11529", geo_range="normal")
data = event_hdf5.read_event_hdf5(h5)
vert = data["samples"]["Vert"]
assert vert.dtype.name == "float32"
assert max(abs(v) for v in vert) > 9.99 # full-scale ≈ 10.0
# The dirac was at n//2 → 32767 ADC counts.
expected_peak = 10.0 * 32767 / 32768
assert abs(max(vert) - expected_peak) < 1e-3
def test_hdf5_samples_in_physical_units_sensitive_range(tmp_path: Path):
"""Same fixture but Sensitive range → full-scale 1.250 in/s."""
ev = _make_event_with_samples()
h5 = tmp_path / "s.h5"
event_hdf5.write_event_hdf5(h5, ev, serial="BE11529", geo_range="sensitive")
data = event_hdf5.read_event_hdf5(h5)
vert = data["samples"]["Vert"]
expected_peak = 1.250 * 32767 / 32768
assert abs(max(vert) - expected_peak) < 1e-4
def test_hdf5_includes_int16_samples(tmp_path: Path):
ev = _make_event_with_samples()
h5 = tmp_path / "i.h5"
event_hdf5.write_event_hdf5(h5, ev, serial="BE11529")
data = event_hdf5.read_event_hdf5(h5)
assert data["samples_int16"] is not None
assert "Tran" in data["samples_int16"]
assert data["samples_int16"]["Vert"].dtype.name == "int16"
def test_hdf5_rejects_unsupported_schema(tmp_path: Path):
"""Round-tripping with a tampered schema_version raises ValueError."""
import h5py
h5 = tmp_path / "future.h5"
with h5py.File(h5, "w") as f:
f.attrs["schema_version"] = 99
f.attrs["kind"] = event_hdf5.HDF5_KIND
try:
event_hdf5.read_event_hdf5(h5)
except ValueError as exc:
assert "schema_version" in str(exc)
return
raise AssertionError("read_event_hdf5 should reject unsupported schema_version")
# ── plot.v1 JSON shape ────────────────────────────────────────────────────────
def test_event_to_plot_json_shape():
ev = _make_event_with_samples()
j = event_hdf5.event_to_plot_json(ev, serial="BE11529", geo_range="normal")
assert j["schema"] == "sfm.plot.v1"
assert j["serial"] == "BE11529"
assert j["geo_range"] == "normal"
assert j["geo_full_scale_ips"] == 10.0
assert j["trigger_ms"] == 0.0
t = j["time_axis"]
assert t["sample_rate"] == 1024
assert t["pretrig_samples"] == 64
assert t["n_samples"] == 256
# t0_ms = -pretrig * dt_ms = -64 * (1000/1024) ≈ -62.5
assert abs(t["t0_ms"] - (-64 * 1000 / 1024)) < 1e-3
assert abs(t["dt_ms"] - (1000 / 1024)) < 1e-6
chans = j["channels"]
for name in ("Tran", "Vert", "Long", "MicL"):
assert name in chans, f"missing channel: {name}"
assert chans[name]["unit"] in ("in/s", "psi")
assert "values" in chans[name]
assert "peak" in chans[name]
assert "peak_t_ms" in chans[name]
# Values are in physical units: Vert peak ≈ 10 in/s.
assert max(chans["Vert"]["values"]) > 9.99
def test_event_to_plot_json_peak_t_ms_locates_dirac():
"""The Vert channel's full-scale dirac at sample n//2 should produce
peak_t_ms = (n//2 - pretrig) * dt_ms."""
ev = _make_event_with_samples(n=256)
j = event_hdf5.event_to_plot_json(ev, serial="BE11529")
expected = (128 - 64) * (1000 / 1024) # = 62.5 ms
assert abs(j["channels"]["Vert"]["peak_t_ms"] - expected) < 1e-2
def test_plot_json_from_hdf5_round_trip(tmp_path: Path):
"""plot_json_from_hdf5 produces the same shape as event_to_plot_json."""
ev = _make_event_with_samples()
h5 = tmp_path / "rt.h5"
event_hdf5.write_event_hdf5(h5, ev, serial="BE11529", geo_range="normal")
j_disk = event_hdf5.plot_json_from_hdf5(h5, event_id="abc-123")
j_mem = event_hdf5.event_to_plot_json(ev, serial="BE11529", geo_range="normal", event_id="abc-123")
# Top-level shape parity
for k in ("schema", "serial", "geo_range", "geo_full_scale_ips",
"trigger_ms", "record_type", "waveform_key", "event_id"):
assert j_disk.get(k) == j_mem.get(k), f"mismatch on {k}"
assert j_disk["time_axis"]["sample_rate"] == j_mem["time_axis"]["sample_rate"]
assert j_disk["time_axis"]["n_samples"] == j_mem["time_axis"]["n_samples"]
# Sample values must match within float32 precision.
for ch in ("Tran", "Vert", "Long", "MicL"):
a = j_disk["channels"][ch]["values"]
b = j_mem["channels"][ch]["values"]
assert len(a) == len(b)
if a:
mx = max(abs(x - y) for x, y in zip(a, b))
assert mx < 1e-3, f"{ch}: max diff {mx}"
# ── WaveformStore integration with HDF5 ───────────────────────────────────────
def _make_synthetic_event_for_save() -> tuple[Event, list[S3Frame]]:
"""Same flavour as test_event_file_io.py but ensures _make_event_with_samples
is also wired into the BW write path so we can exercise WaveformStore.save."""
ev = _make_event_with_samples(n=128)
# Build a minimum 3-frame A5 stream (probe + sample + term) — same
# shape used in the other test files. The encoder only really needs
# the STRT in the probe + a non-zero body and a footer in the term.
key4 = ev._waveform_key
rectime = int(ev.rectime_seconds or 0) or 1
strt = bytearray(21)
strt[0:4] = b"STRT"
strt[4:6] = b"\xff\xfe"
strt[6:10] = key4
strt[10:14] = key4
strt[18] = rectime
probe = S3Frame(sub=0xA5, page_hi=0x10, page_lo=0x00,
data=bytes(7) + bytes(strt) + bytes(32),
checksum_valid=True, chk_byte=0x00)
sample = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x10,
data=bytes(7) + bytes(0x0200), checksum_valid=True, chk_byte=0x00)
footer = (
b"\x0e\x08"
+ bytes([7, 5, 0x07, 0xea, 0, 10, 0, 0])
+ bytes([7, 5, 0x07, 0xea, 0, 10, 0, 1])
+ b"\x00\x01\x00\x02\x00\x00\x00\x00"
)
term = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x00,
data=bytes(11) + bytes(38) + footer, checksum_valid=True, chk_byte=0x00)
ev._a5_frames = [probe, sample, term]
return ev, [probe, sample, term]
def test_waveform_store_save_emits_hdf5(tmp_path: Path):
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
ev, frames = _make_synthetic_event_for_save()
rec = store.save(ev, serial="BE11529", a5_frames=frames, geo_range="normal")
assert rec["hdf5_filename"], "hdf5_filename should be present in save() record"
h5 = store.hdf5_path_for("BE11529", rec["filename"])
assert h5.exists(), "WaveformStore.save should produce a .h5 file"
# The HDF5 round-trip should match the event's metadata.
data = event_hdf5.read_event_hdf5(h5)
assert data["attrs"]["serial"] == "BE11529"
assert data["attrs"]["geo_range"] == "normal"
if __name__ == "__main__":
if pytest is not None:
pytest.main([__file__, "-v"])
else:
import inspect
import traceback as _tb
passed = failed = 0
for _name, _fn in sorted(globals().items()):
if not _name.startswith("test_") or not callable(_fn):
continue
try:
_sig = inspect.signature(_fn)
if "tmp_path" in _sig.parameters:
with tempfile.TemporaryDirectory() as _td:
_fn(Path(_td))
else:
_fn()
print(f"PASS {_name}")
passed += 1
except Exception:
print(f"FAIL {_name}")
_tb.print_exc()
failed += 1
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)
+302
View File
@@ -0,0 +1,302 @@
"""
test_waveform_store.py unit tests for sfm/waveform_store.py and the
SeismoDb columns + insert_events upsert path that the store depends on.
These tests exercise the *store + DB plumbing* in isolation they do not
re-test write_blastware_file (covered separately) and do not require a live
device or a wire capture.
Run:
python -m pytest tests/test_waveform_store.py -v
"""
from __future__ import annotations
import os
import sys
import datetime
from pathlib import Path
try:
import pytest
except ImportError: # allow running standalone without pytest installed
pytest = None # type: ignore
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from minimateplus.framing import S3Frame
from minimateplus.models import Event, Timestamp
# ── Test fixtures ──────────────────────────────────────────────────────────────
def _make_synthetic_event() -> tuple[Event, list[S3Frame]]:
"""
Build a minimal Event + a 3-frame A5 stream that satisfies
write_blastware_file's STRT-extraction path.
Frame 0 (probe): contains a STRT record at the canonical position so
write_blastware_file finds it without falling back.
Frame 1 (sample): 0x0200 bytes of zeros at page_key=0x0010 (sample marker).
Frame 2 (TERM): page_key=0x0000 marks the terminator.
"""
key4 = bytes.fromhex("01110000")
rectime = 3
strt = b"STRT" + b"\xff\xfe" + key4 + key4 + bytes(7) + bytes([rectime])
# Probe payload prefix: 7 zero bytes then STRT (matches blastware_file._strip
# logic which looks for STRT in data[7:]). Tail with 32 zero bytes of fake
# body so reconstruction has something to slice.
probe_data = bytes(7) + strt + bytes(32)
probe = S3Frame(sub=0xA5, page_hi=0x10, page_lo=0x00, data=probe_data,
checksum_valid=True, chk_byte=0x00)
sample = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x10,
data=bytes(7) + bytes(0x0200), checksum_valid=True,
chk_byte=0x00)
term = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x00,
data=bytes(7) + bytes(64), checksum_valid=True,
chk_byte=0x00)
ev = Event(index=0)
ev._waveform_key = key4
ev.timestamp = Timestamp(
raw=b"",
flag=0x10,
year=2026,
unknown_byte=0,
month=5,
day=6,
hour=12,
minute=34,
second=56,
)
ev.rectime_seconds = rectime
ev.record_type = "Waveform"
ev._a5_frames = [probe, sample, term]
return ev, [probe, sample, term]
# ── Frame round-trip ───────────────────────────────────────────────────────────
def test_frame_dict_round_trip():
"""_frame_to_dict and _dict_to_frame must round-trip every field."""
from sfm.waveform_store import _dict_to_frame, _frame_to_dict
f = S3Frame(
sub=0xA5, page_hi=0x12, page_lo=0x34,
data=b"\x10\x02\x00\xab\xcd",
checksum_valid=False,
chk_byte=0x42,
)
d = _frame_to_dict(f)
g = _dict_to_frame(d)
assert g.sub == f.sub
assert g.page_hi == f.page_hi
assert g.page_lo == f.page_lo
assert g.data == f.data
assert g.checksum_valid == f.checksum_valid
assert g.chk_byte == f.chk_byte
# ── Store save/load round-trip ─────────────────────────────────────────────────
def test_waveform_store_save_load_round_trip(tmp_path: Path):
"""save() writes both files; load_a5() returns equivalent frames."""
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
ev, frames = _make_synthetic_event()
rec = store.save(ev, serial="BE11529", a5_frames=frames)
assert rec["filename"].startswith("M529")
assert rec["filesize"] > 0
assert rec["a5_pickle_filename"] == rec["filename"] + ".a5.pkl"
bw_path = store.open_blastware("BE11529", rec["filename"])
assert bw_path is not None
assert bw_path.exists()
assert bw_path.stat().st_size == rec["filesize"]
# Sidecar exists and round-trips
loaded = store.load_a5("BE11529", rec["filename"])
assert loaded is not None
assert len(loaded) == len(frames)
for orig, got in zip(frames, loaded):
assert got.sub == orig.sub
assert got.page_hi == orig.page_hi
assert got.page_lo == orig.page_lo
assert got.data == orig.data
def test_waveform_store_missing_returns_none(tmp_path: Path):
"""open_blastware / load_a5 return None for nonexistent entries."""
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
assert store.open_blastware("BE99999", "no_such.7M0W") is None
assert store.load_a5("BE99999", "no_such.7M0W") is None
def test_waveform_store_idempotent_save(tmp_path: Path):
"""Saving the same event twice produces the same event-file bytes."""
from sfm.waveform_store import WaveformStore
store = WaveformStore(tmp_path / "waveforms")
ev, frames = _make_synthetic_event()
rec1 = store.save(ev, serial="BE11529", a5_frames=frames)
bw_path = store.open_blastware("BE11529", rec1["filename"])
bytes1 = bw_path.read_bytes()
rec2 = store.save(ev, serial="BE11529", a5_frames=frames)
bytes2 = bw_path.read_bytes()
assert rec1["filename"] == rec2["filename"]
assert bytes1 == bytes2
# ── DB integration ────────────────────────────────────────────────────────────
def test_seismodb_persists_waveform_columns(tmp_path: Path):
"""insert_events writes the new columns when waveform_records is supplied."""
from sfm.database import SeismoDb
db = SeismoDb(tmp_path / "seismo_relay.db")
ev, _ = _make_synthetic_event()
rec = {
"filename": "M529LKIQ.7M0W",
"filesize": 8708,
"a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl",
}
inserted, skipped = db.insert_events(
[ev],
serial="BE11529",
waveform_records={ev._waveform_key.hex(): rec},
)
assert inserted == 1
assert skipped == 0
rows = db.query_events(serial="BE11529")
assert len(rows) == 1
row = rows[0]
assert row["blastware_filename"] == rec["filename"]
assert row["blastware_filesize"] == rec["filesize"]
assert row["a5_pickle_filename"] == rec["a5_pickle_filename"]
# get_event by id returns the same fields
row2 = db.get_event(row["id"])
assert row2 is not None
assert row2["blastware_filename"] == rec["filename"]
def test_seismodb_dedup_upserts_waveform_fields(tmp_path: Path):
"""Re-inserting the same (serial, timestamp) refreshes waveform fields."""
from sfm.database import SeismoDb
db = SeismoDb(tmp_path / "seismo_relay.db")
ev, _ = _make_synthetic_event()
db.insert_events([ev], serial="BE11529") # no waveform record yet
rows = db.query_events(serial="BE11529")
assert rows[0]["blastware_filename"] is None
rec = {
"filename": "M529LKIQ.7M0W",
"filesize": 4242,
"a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl",
}
inserted, skipped = db.insert_events(
[ev],
serial="BE11529",
waveform_records={ev._waveform_key.hex(): rec},
)
assert inserted == 0 # dedup'd
assert skipped == 1
rows = db.query_events(serial="BE11529")
assert rows[0]["blastware_filename"] == rec["filename"]
assert rows[0]["blastware_filesize"] == 4242
def test_seismodb_migration_adds_columns(tmp_path: Path):
"""An existing DB without the new columns gets them added on init."""
import sqlite3
db_path = tmp_path / "old.db"
# Build a "v0" events table without the new columns.
with sqlite3.connect(str(db_path)) as conn:
conn.executescript("""
CREATE TABLE events (
id TEXT PRIMARY KEY,
serial TEXT NOT NULL,
waveform_key TEXT NOT NULL,
session_id TEXT,
timestamp TEXT,
tran_ppv REAL,
vert_ppv REAL,
long_ppv REAL,
peak_vector_sum REAL,
mic_ppv REAL,
project TEXT,
client TEXT,
operator TEXT,
sensor_location TEXT,
sample_rate INTEGER,
record_type TEXT,
false_trigger INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
UNIQUE(serial, timestamp)
);
INSERT INTO events
(id, serial, waveform_key, timestamp)
VALUES
('legacy-id', 'BE11529', '01110000',
'2026-04-01T12:00:00');
""")
# Initialise SeismoDb against the old DB — migration should run.
from sfm.database import SeismoDb
db = SeismoDb(db_path)
rows = db.query_events(serial="BE11529")
assert len(rows) == 1
assert rows[0]["blastware_filename"] is None
assert "blastware_filesize" in rows[0]
assert "a5_pickle_filename" in rows[0]
if __name__ == "__main__":
if pytest is not None:
pytest.main([__file__, "-v"])
else:
# Standalone runner — does not require pytest.
import inspect
import tempfile
import traceback as _tb
passed = failed = 0
for _name, _fn in sorted(globals().items()):
if not _name.startswith("test_") or not callable(_fn):
continue
try:
_sig = inspect.signature(_fn)
if "tmp_path" in _sig.parameters:
with tempfile.TemporaryDirectory() as _td:
_fn(Path(_td))
else:
_fn()
print(f"PASS {_name}")
passed += 1
except Exception:
print(f"FAIL {_name}")
_tb.print_exc()
failed += 1
print(f"\n{passed} passed, {failed} failed")
sys.exit(0 if failed == 0 else 1)