feat: v0.15.0

### Added

- **Layered event storage architecture.**  Each event now lands as four
  files in the per-serial waveform store, each with a clear role:

  - `<filename>` — the Blastware-readable binary (BW file).  Untouched.
  - `<filename>.a5.pkl` — the raw 5A frames (regenerative source).
  - `<filename>.h5` — clean per-channel waveform arrays in physical
    units (in/s for geo, psi for mic) plus event metadata (HDF5 with
    gzip compression).  This is the canonical format for downstream
    analysis tools.
  - `<filename>.sfm.json` — the modern review/metadata sidecar (peaks,
    project, source provenance, review state, extensions).

  SQLite (`seismo_relay.db`) is the searchable index over all four.

- **Plot-ready waveform JSON (`sfm.plot.v1`).**  The `/device/event/{idx}/waveform`
  and `/db/events/{id}/waveform.json` endpoints now return samples in
  physical units with explicit time-axis metadata, peak markers, and
  per-channel unit hints — no more guessing the ADC-to-velocity scale
  client-side.  The webapp waveform viewer was rewritten to consume
  this shape.

- **In-app waveform viewer accuracy fix.**  The standalone SFM webapp
  viewer was scaling geophone amplitudes by `geoAdcScale / 32767`
  (≈ 6.206 / 32767), where `geoAdcScale = 6.206053` is the device's
  *in/s per V* hardware constant — not the ADC-counts-to-velocity
  factor.  This silently scaled every plot ~38% too low for Normal-range
  geophones (the correct full-scale is 10.0 in/s, or 1.25 in/s for
  Sensitive).  Conversion is now done server-side using the geo_range
  from compliance config; the client just plots.

- New `sfm/event_hdf5.py` module: `write_event_hdf5()`,
  `read_event_hdf5()`, plus a plot-JSON helper.
- Backfill script extended to also emit `.h5` for existing events.

### Dependencies

- Added `h5py>=3.10` and `numpy>=1.24` for the HDF5 storage layer.
- Added `python-multipart>=0.0.7` (required by FastAPI for the
  `/db/import/blastware_file` endpoint introduced in this release).
This commit is contained in:
2026-05-08 04:39:51 +00:00
parent 9afa3484f4
commit c641d5fc10
14 changed files with 3511 additions and 177 deletions
+269 -66
View File
@@ -45,7 +45,7 @@ from typing import Optional
# FastAPI / Pydantic
try:
from fastapi import Body, FastAPI, HTTPException, Query
from fastapi import Body, FastAPI, File, HTTPException, Query, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
from pydantic import BaseModel
@@ -64,6 +64,7 @@ from minimateplus.models import CallHomeConfig, ComplianceConfig, DeviceInfo, Ev
from minimateplus.transport import TcpTransport, DEFAULT_TCP_PORT
from minimateplus.blastware_file import write_blastware_file, blastware_filename
from minimateplus.client import _decode_a5_metadata_into, _decode_a5_waveform
from sfm import event_hdf5
from sfm.cache import SFMCache, get_cache
from sfm.database import SeismoDb
from sfm.live_cache import LiveCache as _LiveCache
@@ -732,26 +733,33 @@ def device_event_waveform(
detail=f"Event index {index} not found on device",
)
raw = getattr(ev, "raw_samples", None) or {}
samples_decoded = len(raw.get("Tran", []))
# Backfill from compliance_config: sample_rate, record_time, and
# derived total_samples. These are user-set authoritative values; the
# corresponding STRT-derived guesses in `_decode_a5_waveform` can be
# off (e.g. rectime used to read the 0x46 record-type marker = 70s).
cc = info.compliance_config
if cc:
if ev.sample_rate is None and cc.sample_rate:
ev.sample_rate = cc.sample_rate
if cc.record_time:
ev.rectime_seconds = cc.record_time
if ev.sample_rate and ev.rectime_seconds:
derived = int(round(ev.sample_rate * ev.rectime_seconds))
if (ev.total_samples is None
or ev.total_samples > derived * 2
or ev.total_samples < derived // 4):
ev.total_samples = derived
geo_range = getattr(cc, "geo_range", None) if cc else None
# Resolve sample_rate from compliance config if not on the event itself
sample_rate = ev.sample_rate
if sample_rate is None and info.compliance_config:
sample_rate = info.compliance_config.sample_rate
result = {
"index": ev.index,
"record_type": ev.record_type,
"timestamp": _serialise_timestamp(ev.timestamp),
"total_samples": ev.total_samples,
"pretrig_samples": ev.pretrig_samples,
"rectime_seconds": ev.rectime_seconds,
"samples_decoded": samples_decoded,
"sample_rate": sample_rate,
"peak_values": _serialise_peak_values(ev.peak_values),
"channels": raw,
}
# Build the plot.v1 JSON: samples in physical units (in/s for geo, psi
# for mic), explicit time axis, peak markers — the shape clients should
# consume directly without doing any ADC scaling.
serial = getattr(info, "serial", None) or ""
result = event_hdf5.event_to_plot_json(
ev, serial=serial,
geo_range=geo_range or "normal",
index=index,
)
cache.set_waveform(conn_key, index, result)
return result
@@ -781,8 +789,9 @@ def device_event_blastware_file(
triggered events; histogram requires recording_mode
to be populated from compliance config)
Performs: POLL startup → get_events(full_waveform=False, extra_chunks=1,
stop_after_index=index) → write_blastware_file() → FileResponse.
Performs: POLL startup → get_events(full_waveform=True,
stop_after_index=index) → write_blastware_file() → FileResponse +
persistent store + DB upsert.
"""
log.info(
"GET /device/event/%d/blastware_file port=%s host=%s force=%s",
@@ -790,19 +799,19 @@ def device_event_blastware_file(
)
# `force` always re-downloads from the device. This endpoint already
# never short-circuits via cache, so `force` is reserved for parity with
# the other live endpoints and to suppress the post-download persist
# (see end of handler) when the caller wants a fetch-only escape hatch.
# the other live endpoints.
try:
def _do():
with _build_client(port, baud, host, tcp_port, timeout=120.0) as client:
info = client.connect()
# Under v0.14.0 BW-exact 5A walk, the chunk loop is bounded by
# the event end_offset extracted from STRT. No more
# stop_after_metadata / extra_chunks gymnastics — these
# kwargs are now no-ops.
# full_waveform=True pulls the complete 5A stream so the
# client populates STRT-derived fields (total_samples,
# pretrig_samples, rectime_seconds) AND raw_samples on the
# Event. Required for the .h5 + .sfm.json sidecar to be
# filled in correctly — without it, those land as nulls.
events = client.get_events(
full_waveform=False,
full_waveform=True,
stop_after_index=index,
)
matching = [ev for ev in events if ev.index == index]
@@ -861,7 +870,34 @@ def device_event_blastware_file(
# queryable via /db/events afterwards (matches the ACH ingestion path).
if serial != "UNKNOWN" and ev._waveform_key is not None:
try:
rec = _get_store().save(ev, serial=serial, a5_frames=a5_frames)
cc = info.compliance_config
# Backfill authoritative compliance-config values onto the
# Event before persisting. These supersede whatever
# _decode_a5_waveform read from the STRT bytes (some of which
# have ambiguous semantics — e.g. STRT[20] is rectime but
# STRT[8:10] / STRT[16:18] are device-specific scratch fields
# that aren't reliable sample/pretrig counts).
if cc:
if ev.sample_rate is None and cc.sample_rate:
ev.sample_rate = cc.sample_rate
if cc.record_time:
# record_time from compliance is authoritative — the
# user-set value the device followed when recording.
ev.rectime_seconds = cc.record_time
# Derive total_samples from sample_rate × rectime when
# we can; the STRT-derived value can land at a buffer-
# offset rather than a sample count.
if ev.sample_rate and ev.rectime_seconds:
derived = int(round(ev.sample_rate * ev.rectime_seconds))
if (ev.total_samples is None
or ev.total_samples > derived * 2
or ev.total_samples < derived // 4):
ev.total_samples = derived
geo_range = getattr(cc, "geo_range", None) if cc else None
rec = _get_store().save(
ev, serial=serial, a5_frames=a5_frames,
geo_range=geo_range if geo_range is not None else "normal",
)
_get_db().insert_events(
[ev],
serial=serial,
@@ -1412,34 +1448,50 @@ def db_event_blastware_file(event_id: str) -> FileResponse:
@app.get("/db/events/{event_id}/waveform.json")
def db_event_waveform_json(event_id: str) -> dict:
"""
Return the decoded raw_samples + metadata for a stored event in the
same JSON shape as `/device/event/{index}/waveform`.
Return the plot-ready JSON (`sfm.plot.v1`) for a stored event.
Reads the `.a5.pkl` sidecar from the store, rebuilds an Event in
memory, runs the standard A5 decoders, and serialises the result.
Resolution order (cheapest first):
1. If `<filename>.h5` exists, serve it via `plot_json_from_hdf5`.
Samples are already in physical units; no decode work needed.
2. Else if `<filename>.a5.pkl` exists, replay the A5 decoders to
rebuild an Event and serialise via `event_to_plot_json`.
3. Else 404 — the event has no waveform data on disk.
The shape is identical regardless of source, so clients (the SFM
webapp, Terra-View, etc.) consume the same `sfm.plot.v1` payload.
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
a5_name = row.get("a5_pickle_filename")
if not serial or not filename or not a5_name:
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=f"Event {event_id} has no event file in the store",
)
store = _get_store()
# Path 1: HDF5 (canonical clean format).
h5_path = store.hdf5_path_for(serial, filename)
if h5_path.exists():
try:
return event_hdf5.plot_json_from_hdf5(h5_path, event_id=event_id)
except Exception as exc:
log.warning("HDF5 read failed (%s); falling back to A5 path", exc)
# Path 2: A5 pickle replay.
a5_frames = store.load_a5(serial, filename)
if not a5_frames:
raise HTTPException(
status_code=404,
detail=(
f"Event {event_id} has no A5 sidecar in the store. "
"Re-download via the live endpoint to populate."
f"Event {event_id} has no waveform data on disk "
"(no .h5 and no .a5.pkl). Run the backfill script or "
"re-download via the live endpoint to populate."
),
)
a5_frames = _get_store().load_a5(serial, filename)
if not a5_frames:
raise HTTPException(
status_code=410,
detail=f"A5 sidecar missing or unreadable: {a5_name}",
)
# Rebuild a minimal Event from DB fields, then decode A5 onto it.
ev = Event(index=-1)
try:
_decode_a5_metadata_into(a5_frames, ev)
@@ -1451,27 +1503,178 @@ def db_event_waveform_json(event_id: str) -> dict:
log.error("db_event_waveform_json: waveform decode failed: %s", exc, exc_info=True)
raise HTTPException(status_code=500, detail=f"Waveform decode failed: {exc}") from exc
raw = getattr(ev, "raw_samples", None) or {}
samples_decoded = len(raw.get("Tran", []))
return {
"event_id": event_id,
"serial": serial,
"record_type": ev.record_type or row.get("record_type"),
"timestamp": _serialise_timestamp(ev.timestamp) or row.get("timestamp"),
"total_samples": ev.total_samples,
"pretrig_samples": ev.pretrig_samples,
"rectime_seconds": ev.rectime_seconds,
"samples_decoded": samples_decoded,
"sample_rate": ev.sample_rate or row.get("sample_rate"),
"peak_values": _serialise_peak_values(ev.peak_values) or {
"transverse": row.get("tran_ppv"),
"vertical": row.get("vert_ppv"),
"longitudinal": row.get("long_ppv"),
"vector_sum": row.get("peak_vector_sum"),
"mic": row.get("mic_ppv"),
},
"channels": raw,
}
# Carry over fields from the DB row when the A5 replay didn't fill them.
if ev.sample_rate is None and row.get("sample_rate"):
ev.sample_rate = row.get("sample_rate")
return event_hdf5.event_to_plot_json(
ev, serial=serial, geo_range="normal", event_id=event_id,
)
# ── /db/events/{id}/sidecar — modern .sfm.json review/metadata accessors ──────
class SidecarPatchBody(BaseModel):
"""Body for PATCH /db/events/{id}/sidecar.
JSON-merge-patch semantics: only the keys you include get updated.
`review` is the editable block for monthly-summary workflows
(false_trigger flag, reviewer notes, etc.); `extensions` is the
forward-compat namespace for vendor / future fields.
"""
review: Optional[dict] = None
extensions: Optional[dict] = None
@app.get("/db/events/{event_id}/sidecar")
def db_event_sidecar(event_id: str) -> dict:
"""
Return the .sfm.json sidecar for a stored event. 404 if the event
is unknown or has no sidecar in the store (events ingested before
the sidecar feature landed will show this until backfilled).
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=f"Event {event_id} has no event file in the store",
)
sidecar = _get_store().load_sidecar(serial, filename)
if sidecar is None:
raise HTTPException(
status_code=404,
detail=(
f"No .sfm.json sidecar on disk for {filename}. "
"Run scripts/backfill_sidecars.py to generate one."
),
)
return sidecar
@app.patch("/db/events/{event_id}/sidecar")
def db_event_sidecar_patch(event_id: str, body: SidecarPatchBody) -> dict:
"""
JSON-merge-patch the sidecar's `review` and/or `extensions` blocks.
The sidecar JSON is the source of truth for review state. When
`review.false_trigger` is updated, the SQL `events.false_trigger`
column is kept in sync as a derived index for fast filtering.
Returns the new full sidecar. 404 if the event or sidecar is missing.
"""
row = _get_db().get_event(event_id)
if row is None:
raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
serial = row.get("serial")
filename = row.get("blastware_filename")
if not serial or not filename:
raise HTTPException(
status_code=404,
detail=f"Event {event_id} has no event file in the store",
)
if not (body.review or body.extensions):
raise HTTPException(
status_code=400,
detail="PATCH body must include `review` and/or `extensions`",
)
new_sidecar = _get_store().patch_sidecar(
serial, filename,
review=body.review,
extensions=body.extensions,
)
if new_sidecar is None:
raise HTTPException(
status_code=404,
detail=f"No .sfm.json sidecar on disk for {filename}",
)
# Mirror false_trigger from review block into the SQL index column.
if body.review is not None:
_get_db().update_event_review(event_id, new_sidecar.get("review", {}))
return new_sidecar
# ── /db/import/blastware_file — ingest BW-only event files ────────────────────
@app.post("/db/import/blastware_file")
async def db_import_blastware_file(
files: list[UploadFile] = File(...),
serial: Optional[str] = Query(None, description="Optional serial-number hint (e.g. BE11529); falls back to the BW filename's encoded prefix when omitted"),
) -> dict:
"""
Multipart upload of one or more Blastware event file binaries
(typically produced by Blastware's own ACH). For each file:
1. Parse the bytes via WaveformStore.save_imported_bw — produces
a parsed Event + copies the file into the persistent store +
writes a .sfm.json sidecar with source.kind = "bw-import".
2. Upsert a row into `events` (dedup'd on serial+timestamp).
Response includes per-file outcomes so the caller can see which
landed cleanly and which failed (e.g. malformed file, unknown
serial, etc.).
"""
store = _get_store()
db = _get_db()
results: list[dict] = []
for upload in files:
try:
content = await upload.read()
except Exception as exc:
results.append({
"filename": upload.filename, "status": "error",
"detail": f"read failed: {exc}",
})
continue
try:
ev, rec = store.save_imported_bw(
content,
source_path=Path(upload.filename or "imported.bw"),
serial_hint=serial,
)
inserted, skipped = db.insert_events(
[ev],
serial=(serial or _serial_from_event(ev) or "UNKNOWN"),
waveform_records={
ev._waveform_key.hex(): rec
if ev._waveform_key else None
} if ev._waveform_key else None,
)
results.append({
"filename": upload.filename,
"status": "ok",
"stored_filename": rec["filename"],
"filesize": rec["filesize"],
"sha256": rec["sha256"],
"inserted": inserted,
"skipped": skipped,
})
except Exception as exc:
log.error("import failed for %s: %s", upload.filename, exc, exc_info=True)
results.append({
"filename": upload.filename, "status": "error",
"detail": str(exc),
})
return {"count": len(results), "results": results}
def _serial_from_event(ev) -> Optional[str]:
"""Fallback serial resolver — currently relies on the BW filename
decoder via WaveformStore.save_imported_bw, so this is just a
placeholder for future enhancement (e.g. inferring from project_info)."""
return None
@app.get("/db/units/{serial}/waveforms.zip")