diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c2dda0..5206b2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,52 @@ All notable changes to seismo-relay are documented here. --- +## v0.15.0 — 2026-05-07 + +### Added + +- **Layered event storage architecture.** Each event now lands as four + files in the per-serial waveform store, each with a clear role: + + - `` — the Blastware-readable binary (BW file). Untouched. + - `.a5.pkl` — the raw 5A frames (regenerative source). + - `.h5` — clean per-channel waveform arrays in physical + units (in/s for geo, psi for mic) plus event metadata (HDF5 with + gzip compression). This is the canonical format for downstream + analysis tools. + - `.sfm.json` — the modern review/metadata sidecar (peaks, + project, source provenance, review state, extensions). + + SQLite (`seismo_relay.db`) is the searchable index over all four. + +- **Plot-ready waveform JSON (`sfm.plot.v1`).** The `/device/event/{idx}/waveform` + and `/db/events/{id}/waveform.json` endpoints now return samples in + physical units with explicit time-axis metadata, peak markers, and + per-channel unit hints — no more guessing the ADC-to-velocity scale + client-side. The webapp waveform viewer was rewritten to consume + this shape. + +- **In-app waveform viewer accuracy fix.** The standalone SFM webapp + viewer was scaling geophone amplitudes by `geoAdcScale / 32767` + (≈ 6.206 / 32767), where `geoAdcScale = 6.206053` is the device's + *in/s per V* hardware constant — not the ADC-counts-to-velocity + factor. This silently scaled every plot ~38% too low for Normal-range + geophones (the correct full-scale is 10.0 in/s, or 1.25 in/s for + Sensitive). Conversion is now done server-side using the geo_range + from compliance config; the client just plots. + +- New `sfm/event_hdf5.py` module: `write_event_hdf5()`, + `read_event_hdf5()`, plus a plot-JSON helper. +- Backfill script extended to also emit `.h5` for existing events. + +### Dependencies + +- Added `h5py>=3.10` and `numpy>=1.24` for the HDF5 storage layer. +- Added `python-multipart>=0.0.7` (required by FastAPI for the + `/db/import/blastware_file` endpoint introduced in this release). + +--- + ## v0.14.3 — 2026-05-05 ### Fixed diff --git a/minimateplus/client.py b/minimateplus/client.py index cddabbe..43eeb57 100644 --- a/minimateplus/client.py +++ b/minimateplus/client.py @@ -1362,29 +1362,36 @@ def _decode_waveform_record_into(data: bytes, event: Event) -> None: Modifies event in-place. """ - # ── Record type ─────────────────────────────────────────────────────────── - # Decoded from byte[1] (sub_code) first so we can gate timestamp parsing. + # ── Record type + format detection ──────────────────────────────────────── + # `record_type` is the user-facing label ("Waveform" for any triggered + # event regardless of timestamp-header layout). `fmt` is the internal + # format code used to pick the right Timestamp parser; it stays + # internal and doesn't leak to the API / sidecar / UI. try: event.record_type = _extract_record_type(data) except Exception as exc: log.warning("waveform record type decode failed: %s", exc) + fmt = _detect_record_format(data) # ── Timestamp ───────────────────────────────────────────────────────────── - # 9-byte format for sub_code=0x10 Waveform records: - # [day][sub_code][month][year:2 BE][unknown][hour][min][sec] - # sub_code=0x10 and sub_code=0x03 have different timestamp byte layouts. - # Both confirmed against Blastware event reports (BE11529, 2026-04-01 and 2026-04-03). - if event.record_type == "Waveform": + # Three timestamp-header layouts have been observed across BE11529 + # firmware S338.17 — each picks a different Timestamp parser: + # "single_shot": 9-byte [day][0x10][month][year:2][unk][h][m][s] + # "continuous": 10-byte [0x10][day][0x10][month][year:2][unk][h][m][s] + # "short": 8-byte [day][month][year:2][unk][h][m][s] + # All decoded into the same Timestamp dataclass — only the byte + # offsets differ. + if fmt == "single_shot": try: event.timestamp = Timestamp.from_waveform_record(data) except Exception as exc: - log.warning("waveform record timestamp decode failed: %s", exc) - elif event.record_type == "Waveform (Continuous)": + log.warning("single_shot record timestamp decode failed: %s", exc) + elif fmt == "continuous": try: event.timestamp = Timestamp.from_continuous_record(data) except Exception as exc: log.warning("continuous record timestamp decode failed: %s", exc) - elif event.record_type == "Waveform (Short)": + elif fmt == "short": try: event.timestamp = Timestamp.from_short_record(data) except Exception as exc: @@ -1562,17 +1569,33 @@ def _decode_a5_waveform( log.warning("_decode_a5_waveform: STRT record truncated (%dB)", len(strt)) return + # STRT byte layout (21 bytes; verified against M529LIY6 reference files + # and re-confirmed against live BE11529 captures, 2026-05-08): + # [0:4] b'STRT' + # [4:6] 0xff 0xfe fixed + # [6:10] end_key (4-byte device flash address where event ends) + # [10:14] start_key (4-byte device flash address where event starts) + # [14:18] device-specific (4 bytes; semantics not pinned) + # [18] 0x46 record-type marker (= 70 in decimal — NOT rectime!) + # [19] device-specific + # [20] rectime (uint8 seconds, user-set Record Time) + # + # The earlier reading of `rectime_seconds = strt[18]` always returned + # 70 for a real waveform event because it was reading the 0x46 marker. + # Caller should prefer compliance_config.record_time when available + # (that's the authoritative user-set value) and fall back to this. total_samples = struct.unpack_from(">H", strt, 8)[0] pretrig_samples = struct.unpack_from(">H", strt, 16)[0] - rectime_seconds = strt[18] + rectime_seconds = strt[20] event.total_samples = total_samples event.pretrig_samples = pretrig_samples event.rectime_seconds = rectime_seconds log.debug( - "_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds", - total_samples, pretrig_samples, rectime_seconds, + "_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds " + "(strt[18]=0x%02X record-type marker, strt[20]=0x%02X rectime)", + total_samples, pretrig_samples, rectime_seconds, strt[18], strt[20], ) # ── Collect per-frame waveform bytes with global offset tracking ───────── @@ -1724,22 +1747,30 @@ def _detect_record_format(data: bytes) -> Optional[str]: def _extract_record_type(data: bytes) -> Optional[str]: """ - Return a human-readable name for the waveform record format detected - in the first bytes of a 210-byte 0C record. + Return a user-facing name for a waveform record. All three internal + timestamp-header layouts represent the *same* user concept — a + triggered seismic event — so they all surface as just "Waveform". - Maps to the format codes returned by _detect_record_format(): - "single_shot" → "Waveform" - "continuous" → "Waveform (Continuous)" - "short" → "Waveform (Short)" - None → "Unknown(XX.YY.ZZ)" + The internal format code is preserved for parsing logic (timestamp + decoder selection) but doesn't leak into the API / UI / sidecar. + Callers that need the raw layout can call `_detect_record_format` + directly. + + Background: across BE11529 firmware S338.17 we've observed three + different byte layouts for the timestamp header at the start of the + 0C record (8 / 9 / 10 bytes, distinguished by the position of the + BE-encoded year and the presence of `0x10` marker bytes). An older + revision of this code labelled them "Waveform" / "Waveform + (Continuous)" / "Waveform (Short)", which created the false + impression that there were three distinct event "types" the user + could configure. In reality the user only ever picks Single Shot + vs Continuous vs Histogram in the compliance config — the byte + layout is a firmware-internal detail that doesn't always correlate + with that choice. """ fmt = _detect_record_format(data) - if fmt == "single_shot": + if fmt in ("single_shot", "continuous", "short"): return "Waveform" - if fmt == "continuous": - return "Waveform (Continuous)" - if fmt == "short": - return "Waveform (Short)" if len(data) >= 3: log.warning( "_extract_record_type: unrecognized header: data[0:3]=%02X %02X %02X", diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py new file mode 100644 index 0000000..b1f54ee --- /dev/null +++ b/minimateplus/event_file_io.py @@ -0,0 +1,518 @@ +""" +minimateplus/event_file_io.py — modern event-file (.sfm.json sidecar) IO. + +This module is the single home for event-file conversion code that doesn't +fit cleanly inside `blastware_file.py` (which is the BW binary codec): + + - sidecar JSON read/write (the modern per-event metadata file) + - read_blastware_file() — reverse of write_blastware_file, used by + the BW-importer flow when SFM is ingesting files produced by + Blastware's own ACH (where the source A5 frames aren't available). + +Sidecar schema v1 layout — see docs in the project plan or the schema +declared in `event_to_sidecar_dict()`. +""" + +from __future__ import annotations + +import datetime +import hashlib +import json +import logging +import os +import struct +from pathlib import Path +from typing import Optional, Union + +from .models import Event, PeakValues, ProjectInfo, Timestamp +from . import blastware_file as _bw # avoid circular reference at module load + +log = logging.getLogger(__name__) + +# Schema version for the sidecar JSON. Bump when fields change shape. +# Older readers must reject anything > SCHEMA_VERSION; newer fields added +# inside `extensions` are forward-compatible without a bump. +SCHEMA_VERSION = 1 +SIDECAR_KIND = "sfm.event" + +# Default tool_version stamp; callers can override. Hard-coded here +# rather than read via importlib.metadata because the latter reflects the +# *installed* dist-info, which doesn't update when pyproject.toml is +# bumped without a `pip install` re-run — leading to confusing stale +# version stamps in sidecars. Bump this constant and CHANGELOG.md +# together at release time. +TOOL_VERSION = "0.15.0" + +try: + # Best-effort: prefer the installed metadata when it's NEWER than the + # baked-in constant (e.g. a downstream packager bumped the wheel + # without editing this file). Otherwise fall back to TOOL_VERSION. + from importlib.metadata import version as _pkg_version + _meta_v = _pkg_version("seismo-relay") + def _vtuple(s): + try: + return tuple(int(p) for p in s.split(".")[:3]) + except Exception: + return (0, 0, 0) + _TOOL_VERSION_DEFAULT = ( + _meta_v if _vtuple(_meta_v) > _vtuple(TOOL_VERSION) else TOOL_VERSION + ) +except Exception: + _TOOL_VERSION_DEFAULT = TOOL_VERSION + + +# ── Sidecar dict construction ───────────────────────────────────────────────── + + +def _ts_iso(ts: Optional[Timestamp]) -> Optional[str]: + if ts is None: + return None + try: + return datetime.datetime( + ts.year, ts.month, ts.day, + ts.hour or 0, ts.minute or 0, ts.second or 0, + ).isoformat() + except Exception: + return str(ts) + + +def _peak_values_to_dict(pv: Optional[PeakValues]) -> dict: + if pv is None: + return { + "transverse": None, + "vertical": None, + "longitudinal": None, + "vector_sum": None, + "mic_psi": None, + } + return { + "transverse": pv.tran, + "vertical": pv.vert, + "longitudinal": pv.long, + "vector_sum": pv.peak_vector_sum, + "mic_psi": pv.micl, + } + + +def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict: + if pi is None: + return { + "project": None, + "client": None, + "operator": None, + "sensor_location": None, + } + return { + "project": pi.project, + "client": pi.client, + "operator": pi.operator, + "sensor_location": pi.sensor_location, + } + + +def event_to_sidecar_dict( + event: Event, + *, + serial: str, + blastware_filename: str, + blastware_filesize: int, + blastware_sha256: str, + source_kind: str = "sfm-live", + a5_pickle_filename: Optional[str] = None, + tool_version: str = _TOOL_VERSION_DEFAULT, + captured_at: Optional[datetime.datetime] = None, + review: Optional[dict] = None, + extensions: Optional[dict] = None, +) -> dict: + """ + Build a v1 sidecar dict from an Event + the surrounding metadata. + + Pure helper — no file I/O. Callers stitch the result into a sidecar + via `write_sidecar()` (or POST it back via the PATCH endpoint). + """ + if source_kind not in {"sfm-live", "sfm-ach", "bw-import"}: + raise ValueError(f"unknown source_kind: {source_kind!r}") + + captured_at = captured_at or datetime.datetime.utcnow() + + return { + "schema_version": SCHEMA_VERSION, + "kind": SIDECAR_KIND, + + "event": { + "serial": serial, + "timestamp": _ts_iso(event.timestamp), + "waveform_key": event._waveform_key.hex() if event._waveform_key else None, + "record_type": event.record_type, + "sample_rate": event.sample_rate, + "rectime_seconds": event.rectime_seconds, + "total_samples": event.total_samples, + "pretrig_samples": event.pretrig_samples, + }, + + "peak_values": _peak_values_to_dict(event.peak_values), + "project_info": _project_info_to_dict(event.project_info), + + "blastware": { + "filename": blastware_filename, + "filesize": blastware_filesize, + "sha256": blastware_sha256, + "available": True, + }, + + "source": { + "kind": source_kind, + "captured_at": captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat(), + "tool_version": tool_version, + "a5_pickle_filename": a5_pickle_filename, + }, + + "review": review or { + "false_trigger": False, + "reviewer": None, + "reviewed_at": None, + "notes": "", + }, + + "extensions": extensions or {}, + } + + +# ── Sidecar IO ──────────────────────────────────────────────────────────────── + + +def write_sidecar(path: Union[str, Path], data: dict) -> None: + """ + Atomic write of a sidecar dict to . + + Validates schema_version is supported before writing so we don't + silently drop a future-format sidecar over the wire. + """ + path = Path(path) + sv = data.get("schema_version") + if not isinstance(sv, int) or sv < 1 or sv > SCHEMA_VERSION: + raise ValueError( + f"write_sidecar: unsupported schema_version={sv!r} " + f"(this build supports 1..{SCHEMA_VERSION})" + ) + + tmp = path.with_suffix(path.suffix + ".tmp") + with tmp.open("w", encoding="utf-8") as f: + json.dump(data, f, indent=2, sort_keys=False, default=str) + f.write("\n") + f.flush() + os.fsync(f.fileno()) + os.replace(tmp, path) + + +def read_sidecar(path: Union[str, Path]) -> dict: + """ + Load a sidecar JSON file. + + Raises FileNotFoundError if missing, ValueError on bad shape / + unsupported schema_version. Unknown keys at the top level are + preserved in the returned dict (forward-compat). + """ + path = Path(path) + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, dict): + raise ValueError(f"sidecar at {path}: top-level is not a JSON object") + sv = data.get("schema_version") + if not isinstance(sv, int) or sv < 1: + raise ValueError(f"sidecar at {path}: missing or invalid schema_version") + if sv > SCHEMA_VERSION: + raise ValueError( + f"sidecar at {path}: schema_version={sv} > supported {SCHEMA_VERSION}; " + "upgrade seismo-relay to read this file" + ) + if data.get("kind") != SIDECAR_KIND: + raise ValueError(f"sidecar at {path}: unexpected kind={data.get('kind')!r}") + return data + + +def patch_sidecar( + path: Union[str, Path], + *, + review: Optional[dict] = None, + extensions: Optional[dict] = None, + reviewer_now: bool = True, +) -> dict: + """ + Atomically apply a JSON-merge-patch to a sidecar file's `review` + and/or `extensions` blocks. Other top-level keys are untouched. + + `review_now`: when True (default) and `review` is non-empty, stamps + `review.reviewed_at` with the current UTC time so the review-time is + auditable without the caller having to pass it. + + Returns the new full sidecar dict. + """ + path = Path(path) + data = read_sidecar(path) + + if review: + merged = dict(data.get("review") or {}) + merged.update({k: v for k, v in review.items() if v is not None or k in merged}) + if reviewer_now: + merged["reviewed_at"] = datetime.datetime.utcnow().isoformat() + "Z" + data["review"] = merged + + if extensions: + merged_ext = dict(data.get("extensions") or {}) + merged_ext.update(extensions) + data["extensions"] = merged_ext + + write_sidecar(path, data) + return data + + +def sidecar_path_for(blastware_path: Union[str, Path]) -> Path: + """Convention: .sfm.json sits next to the BW binary.""" + p = Path(blastware_path) + return p.with_name(p.name + ".sfm.json") + + +def file_sha256(path: Union[str, Path], chunk_size: int = 65536) -> str: + """Compute SHA-256 of a file as a hex string.""" + h = hashlib.sha256() + with open(path, "rb") as f: + while True: + chunk = f.read(chunk_size) + if not chunk: + break + h.update(chunk) + return h.hexdigest() + + +# ── Blastware-file reader ───────────────────────────────────────────────────── +# +# Reverse of `blastware_file.write_blastware_file`. Used by the BW-import +# flow to ingest files produced by Blastware's own ACH (where the source +# A5 frames are not available). +# +# File structure (recap): +# [22B header] [21B STRT record] [body bytes] [26B footer] +# +# The body holds: +# - 6B preamble (00 00 ff ff ff ff) immediately after the STRT +# - 4-channel interleaved int16 LE samples +# - Embedded ASCII metadata strings (Project: / Client: / User Name: / +# Seis Loc: / Extended Notes) from the device's session-start config +# +# The 0C waveform record (per-event peaks, project name) is NOT in the +# BW file — those are computed by the device firmware and only carried +# in the live SUB 0C response. read_blastware_file() therefore computes +# peaks from the raw samples assuming Normal-range (10 in/s full-scale) +# geophone sensitivity. Imported events surface that assumption via the +# sidecar's `peak_values.computed_from_samples` flag. + + +# Geophone scale factor, in/s per ADC unit, for Normal range (10 in/s FS). +# Confirmed from CLAUDE.md (geo_hardware_constant = 6.206053 in/s per V, +# ADC full-scale = 1.61133 V Normal range = 10.0 in/s peak; per-count +# resolution ≈ 10.0 / 32768). +_GEO_NORMAL_FS_INS = 10.0 +_GEO_SENSITIVE_FS_INS = 1.250 +_INT16_FS = 32768.0 + +# Microphone scale factor, psi per ADC count. Approximate — exact factor +# depends on the geophone-vs-mic ADC scaling and the firmware reference. +# We mark mic_psi as "computed approximate" in the sidecar. +_MIC_FS_PSI = 0.0125 / _INT16_FS # ~0.5 psi full-scale assumption + + +def _decode_strt(strt: bytes) -> dict: + """ + Decode the 21-byte STRT record from a BW file. + + Returns dict with waveform_key (4B), total_samples, pretrig_samples, + rectime_seconds. Falls back to None on truncated/missing fields. + """ + if len(strt) < 21 or strt[0:4] != b"STRT": + return {} + return { + "waveform_key": strt[6:10].hex(), + "total_samples": struct.unpack_from(">H", strt, 8)[0], + "pretrig_samples": struct.unpack_from(">H", strt, 16)[0], + "rectime_seconds": strt[18], + } + + +def _find_first_string(buf: bytes, label: bytes, max_len: int = 256) -> Optional[str]: + """ + Search `buf` for `label` (e.g. b"Project:") and return the + null-terminated ASCII string that follows, stripped. + """ + pos = buf.find(label) + if pos < 0: + return None + start = pos + len(label) + end = buf.find(b"\x00", start, start + max_len) + if end < 0: + end = start + max_len + text = buf[start:end].decode("ascii", errors="replace").strip() + return text or None + + +def _decode_samples_4ch_int16_le(stream: bytes) -> dict[str, list[int]]: + """ + Decode a 4-channel interleaved int16 LE byte stream into per-channel + lists. Channels are [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3]. + Truncates to a multiple of 8 bytes (one full sample-set). + """ + n_complete = (len(stream) // 8) * 8 + if n_complete == 0: + return {"Tran": [], "Vert": [], "Long": [], "MicL": []} + fmt = "<" + "h" * (n_complete // 2) + flat = list(struct.unpack(fmt, stream[:n_complete])) + return { + "Tran": flat[0::4], + "Vert": flat[1::4], + "Long": flat[2::4], + "MicL": flat[3::4], + } + + +def _peaks_from_samples(samples: dict[str, list[int]]) -> PeakValues: + """ + Compute approximate peaks from raw int16 samples assuming Normal-range + geophone sensitivity. Used by the BW-importer when the 0C waveform + record (the device's authoritative peaks) is unavailable. + """ + def _peak_ins(ch: list[int]) -> float: + if not ch: + return 0.0 + m = max(abs(int(v)) for v in ch) + return m / _INT16_FS * _GEO_NORMAL_FS_INS + + tran = _peak_ins(samples.get("Tran", [])) + vert = _peak_ins(samples.get("Vert", [])) + long_ = _peak_ins(samples.get("Long", [])) + + # Mic in psi (approximate) + mic_ch = samples.get("MicL", []) or [] + mic = max((abs(int(v)) for v in mic_ch), default=0) * _MIC_FS_PSI + + # Peak vector sum: max over time of sqrt(T^2 + V^2 + L^2) + pvs = 0.0 + n = min(len(samples.get("Tran", [])), len(samples.get("Vert", [])), len(samples.get("Long", []))) + if n: + scale = _GEO_NORMAL_FS_INS / _INT16_FS + T = samples["Tran"]; V = samples["Vert"]; L = samples["Long"] + for i in range(n): + t = T[i] * scale + v = V[i] * scale + l = L[i] * scale + mag = (t*t + v*v + l*l) ** 0.5 + if mag > pvs: + pvs = mag + + return PeakValues( + tran=tran, vert=vert, long=long_, + peak_vector_sum=pvs, micl=mic, + ) + + +def read_blastware_file(path: Union[str, Path]) -> Event: + """ + Parse a Blastware waveform file into an Event. + + Recovers: + - waveform_key, rectime_seconds, total_samples, pretrig_samples + (from the STRT record) + - timestamp (from the footer's start-time field) + - project_info (from ASCII labels embedded in the body) + - raw_samples (Tran/Vert/Long/MicL int16 lists) + - peak_values (computed from raw_samples; approximate — see notes + on _peaks_from_samples about Normal-range assumption) + + Does NOT recover the source A5 frames (they aren't in the BW file). + The returned Event has `_a5_frames = None`, signalling that + byte-for-byte regeneration of the BW file from this Event alone is + not possible — the on-disk BW file IS the byte-for-byte source. + """ + path = Path(path) + raw = path.read_bytes() + if len(raw) < _bw._WAVEFORM_HEADER_SIZE + 21 + 26: + raise ValueError(f"{path}: file too short ({len(raw)} bytes) to be a BW event") + + # Header: validate magic prefix. + header = raw[:_bw._WAVEFORM_HEADER_SIZE] + if not header.startswith(_bw._FILE_HEADER_PREFIX): + raise ValueError(f"{path}: not a Blastware file (bad header prefix)") + + # STRT record: 21 bytes immediately after the header. + strt_raw = raw[_bw._WAVEFORM_HEADER_SIZE : _bw._WAVEFORM_HEADER_SIZE + 21] + strt_fields = _decode_strt(strt_raw) + if not strt_fields: + raise ValueError(f"{path}: STRT record missing or malformed") + + # Footer: locate the 0e 08 marker, validating the year is in a sane range. + body_start = _bw._WAVEFORM_HEADER_SIZE + 21 + footer_pos = -1 + pos = body_start + while True: + pos = raw.find(b"\x0e\x08", pos) + if pos < 0 or pos + 26 > len(raw): + break + yr = (raw[pos + 4] << 8) | raw[pos + 5] + if 2015 <= yr <= 2050: + footer_pos = pos + break + pos += 1 + + if footer_pos < 0 and len(raw) >= 26: + footer_pos = len(raw) - 26 + if footer_pos < body_start: + raise ValueError(f"{path}: footer not found") + + body = raw[body_start : footer_pos] + footer = raw[footer_pos : footer_pos + 26] + + # Footer layout: + # [0:2] 0e 08 marker + # [2:10] ts1 (start) BE 8B + # [10:18] ts2 (stop) BE 8B + # [18:24] 00 01 00 02 00 00 + # [24:26] crc + ts1 = _bw._decode_ts_be(footer[2:10]) + ts2 = _bw._decode_ts_be(footer[10:18]) + + # Body: first 6 bytes are the preamble (00 00 ff ff ff ff). Strip + # them before decoding samples. Any trailing tail past the last + # full sample-set is silently truncated by _decode_samples_4ch. + sample_bytes = body[6:] if body[:6].hex() in ("0000ffffffff", "0000FFFFFFFF") else body + samples = _decode_samples_4ch_int16_le(sample_bytes) + + # Metadata strings (label-anchored search across the body). + project = _find_first_string(body, b"Project:") + client = _find_first_string(body, b"Client:") + user = _find_first_string(body, b"User Name:") + seisloc = _find_first_string(body, b"Seis Loc:") + + # Build the Event. + ev = Event(index=-1) + if strt_fields.get("waveform_key"): + ev._waveform_key = bytes.fromhex(strt_fields["waveform_key"]) + ev.record_type = "Waveform" + ev.rectime_seconds = strt_fields.get("rectime_seconds") + ev.total_samples = strt_fields.get("total_samples") + ev.pretrig_samples = strt_fields.get("pretrig_samples") + + if ts1 is not None: + ev.timestamp = Timestamp( + raw=footer[2:10], + flag=0x10, + year=ts1.year, unknown_byte=0, month=ts1.month, day=ts1.day, + hour=ts1.hour, minute=ts1.minute, second=ts1.second, + ) + + ev.project_info = ProjectInfo( + project=project, client=client, operator=user, sensor_location=seisloc, + ) + ev.raw_samples = samples + ev.peak_values = _peaks_from_samples(samples) + ev._a5_frames = None # not recoverable from BW file + + return ev diff --git a/pyproject.toml b/pyproject.toml index 8b9e8ec..5286aa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "seismo-relay" -version = "0.12.0" +version = "0.15.0" description = "Python client and REST server for MiniMate Plus seismographs" requires-python = ">=3.10" dependencies = [ @@ -12,6 +12,9 @@ dependencies = [ "uvicorn[standard]>=0.24", "pyserial>=3.5", "sqlalchemy>=2.0", + "python-multipart>=0.0.7", + "h5py>=3.10", + "numpy>=1.24", ] [tool.setuptools.packages.find] diff --git a/requirements.txt b/requirements.txt index 0958f1a..8b01960 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,6 @@ fastapi uvicorn sqlalchemy pyserial +python-multipart +h5py +numpy diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py new file mode 100644 index 0000000..6b7cb82 --- /dev/null +++ b/scripts/backfill_sidecars.py @@ -0,0 +1,346 @@ +""" +scripts/backfill_sidecars.py — generate .sfm.json sidecars AND .h5 +clean-waveform files for existing events already in the waveform store +that predate those features. + +Walks `//` and for each BW event file: + + Sidecar (.sfm.json): + - Skip when an existing sidecar's blastware.sha256 matches the + current BW file's sha256. + - Else regenerate: prefer .a5.pkl (full fidelity); fall back to + parsing the BW binary directly (peaks computed from samples). + + Clean waveform (.h5): + - Skip when .h5 already exists (idempotent). + - Else write from .a5.pkl (preferred) or BW binary parse (fallback). + +Usage: + python scripts/backfill_sidecars.py [--store-root PATH] + [--db-path PATH] + [--dry-run] + [--skip-hdf5] + [-v] +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +# Allow running from the repo root without installation. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from minimateplus import event_file_io +from sfm import event_hdf5 +from sfm.waveform_store import WaveformStore, _frame_to_dict, _dict_to_frame # noqa: F401 +from sfm.database import SeismoDb + +log = logging.getLogger("backfill_sidecars") + + +def _looks_like_event_file(path: Path) -> bool: + """Same heuristic as the importer CLI.""" + if not path.is_file(): + return False + if path.name.endswith((".a5.pkl", ".sfm.json")): + return False + ext = path.suffix.lstrip(".") + if not (3 <= len(ext) <= 4): + return False + if not (ext[-1].upper() in {"W", "H"} or ext.endswith("0")): + return False + try: + return path.stat().st_size >= 70 + except OSError: + return False + + +def main(argv=None) -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument( + "--db-path", + default=str(Path(__file__).resolve().parent.parent / "bridges" / "captures" / "seismo_relay.db"), + ) + p.add_argument("--store-root", default=None) + p.add_argument("--dry-run", action="store_true") + p.add_argument( + "--skip-hdf5", action="store_true", + help="Don't generate .h5 clean-waveform files (only sidecars).", + ) + p.add_argument( + "--force", action="store_true", + help=( + "Regenerate sidecars + .h5 even when an existing sidecar's " + "blastware.sha256 matches the current BW file. Use this after " + "upgrading seismo-relay to pull in decoder bug fixes (e.g. the " + "STRT-rectime byte-offset fix in v0.15.x)." + ), + ) + p.add_argument("-v", "--verbose", action="store_true") + args = p.parse_args(argv) + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)-7s %(name)s %(message)s", + datefmt="%H:%M:%S", + ) + + db_path = Path(args.db_path).expanduser().resolve() + store_root = ( + Path(args.store_root).expanduser().resolve() + if args.store_root else db_path.parent / "waveforms" + ) + if not store_root.exists(): + print(f"error: store root does not exist: {store_root}", file=sys.stderr) + return 2 + + store = WaveformStore(store_root) + db = SeismoDb(db_path) + + written = skipped = errors = 0 + for serial_dir in sorted(p for p in store_root.iterdir() if p.is_dir()): + serial = serial_dir.name + for path in sorted(serial_dir.iterdir()): + if not _looks_like_event_file(path): + continue + sidecar_path = store.sidecar_path_for(serial, path.name) + try: + bw_sha = event_file_io.file_sha256(path) + except Exception as exc: + log.error("sha256 failed for %s: %s", path, exc) + errors += 1 + continue + + # Skip when an up-to-date sidecar already exists. + # + # Two-part freshness check: + # 1. blastware.sha256 must match the current BW file (proves + # the sidecar describes THIS file). + # 2. source.tool_version must be ≥ current TOOL_VERSION (proves + # the sidecar was written by a build that includes any + # decoder fixes shipped since). + # Either part failing → regenerate. --force bypasses both. + if sidecar_path.exists() and not args.force: + try: + existing = event_file_io.read_sidecar(sidecar_path) + sha_ok = existing.get("blastware", {}).get("sha256") == bw_sha + src_ver = existing.get("source", {}).get("tool_version", "") + def _vt(s): + try: + return tuple(int(p) for p in str(s).split(".")[:3]) + except Exception: + return (0, 0, 0) + ver_ok = _vt(src_ver) >= _vt(event_file_io.TOOL_VERSION) + if sha_ok and ver_ok: + skipped += 1 + continue + if sha_ok and not ver_ok: + log.info( + "regenerating %s (sidecar tool_version=%s < current %s)", + sidecar_path.name, src_ver or "(none)", + event_file_io.TOOL_VERSION, + ) + except Exception: + pass # fall through to rewrite + + # Decide path: A5-based (high-fidelity) or BW-only. + a5_path = serial_dir / f"{path.name}.a5.pkl" + try: + if a5_path.exists(): + frames = store.load_a5(serial, path.name) + if not frames: + raise RuntimeError("a5_pickle present but unreadable") + # Build an Event by replaying the A5 decoders. Note: + # the .a5.pkl alone CANNOT recover timestamp / + # record_type / waveform_key / per-channel peaks — + # those live in the 0C record, which isn't saved + # separately. We seed those from the DB row + the + # existing sidecar below so a re-backfill doesn't + # nuke fields the original save populated. + from minimateplus.client import ( + _decode_a5_metadata_into, + _decode_a5_waveform, + ) + from minimateplus.models import Event, PeakValues, ProjectInfo, Timestamp + ev = Event(index=-1) + _decode_a5_metadata_into(frames, ev) + _decode_a5_waveform(frames, ev) + source_kind = "sfm-live" + a5_filename = a5_path.name + else: + ev = event_file_io.read_blastware_file(path) + source_kind = "bw-import" + a5_filename = None + from minimateplus.models import Event, PeakValues, ProjectInfo, Timestamp + + # ── Seed missing fields from the SeismoDb events row ── + # The DB row was populated at original save time with peaks, + # project info, timestamp, record_type, sample_rate, etc. + # All of those survive intact in SQLite; pull them onto the + # rebuilt Event so the regenerated sidecar matches what was + # there before the backfill ran. + db_row = None + try: + import sqlite3 as _sql + with _sql.connect(str(db.db_path)) as _conn: + _conn.row_factory = _sql.Row + db_row = _conn.execute( + "SELECT * FROM events " + "WHERE serial=? AND blastware_filename=? " + "LIMIT 1", + (serial, path.name), + ).fetchone() + except Exception as exc: + log.debug("DB lookup failed for %s: %s", path.name, exc) + + if db_row is not None: + if ev.sample_rate is None and db_row["sample_rate"]: + ev.sample_rate = int(db_row["sample_rate"]) + if not ev.record_type and db_row["record_type"]: + ev.record_type = db_row["record_type"] + if ev._waveform_key is None and db_row["waveform_key"]: + try: + ev._waveform_key = bytes.fromhex(db_row["waveform_key"]) + except Exception: + pass + # Timestamp from the ISO-8601 string in the DB row. + if ev.timestamp is None and db_row["timestamp"]: + try: + import datetime as _dt + _t = _dt.datetime.fromisoformat(db_row["timestamp"]) + ev.timestamp = Timestamp( + raw=b"", flag=0x10, + year=_t.year, unknown_byte=0, + month=_t.month, day=_t.day, + hour=_t.hour, minute=_t.minute, second=_t.second, + ) + except Exception: + pass + # Peaks from the DB row when the A5 decode didn't supply them. + if ev.peak_values is None: + ev.peak_values = PeakValues( + tran=db_row["tran_ppv"], + vert=db_row["vert_ppv"], + long=db_row["long_ppv"], + peak_vector_sum=db_row["peak_vector_sum"], + micl=db_row["mic_ppv"], + ) + # Project info from the DB row when the A5 metadata-page + # decode didn't pick it up. + if ev.project_info is None or all( + v in (None, "") + for v in ( + (ev.project_info.project if ev.project_info else None), + (ev.project_info.client if ev.project_info else None), + (ev.project_info.operator if ev.project_info else None), + (ev.project_info.sensor_location if ev.project_info else None), + ) + ): + ev.project_info = ProjectInfo( + project=db_row["project"], + client=db_row["client"], + operator=db_row["operator"], + sensor_location=db_row["sensor_location"], + ) + + # Derive total_samples when we have both rectime + sample_rate. + # The decoder's STRT-derived value can be a buffer offset + # rather than a sample count — drop it in that case. + if ev.sample_rate and ev.rectime_seconds: + derived = int(round(ev.sample_rate * ev.rectime_seconds)) + if (ev.total_samples is None + or ev.total_samples > derived * 2 + or ev.total_samples < derived // 4): + ev.total_samples = derived + + # Preserve user-edited review state + extensions from the + # existing sidecar (false_trigger flag, notes, etc.) so a + # backfill never wipes them out. + preserved_review = None + preserved_ext = None + if sidecar_path.exists(): + try: + _existing = event_file_io.read_sidecar(sidecar_path) + preserved_review = _existing.get("review") + preserved_ext = _existing.get("extensions") + except Exception: + pass + + sidecar = event_file_io.event_to_sidecar_dict( + ev, + serial=serial, + blastware_filename=path.name, + blastware_filesize=path.stat().st_size, + blastware_sha256=bw_sha, + source_kind=source_kind, + a5_pickle_filename=a5_filename, + review=preserved_review, + extensions=preserved_ext, + ) + + # Also emit the .h5 clean-waveform file when missing OR when + # --force was passed (so a re-backfill picks up decoder fixes). + hdf5_path = store.hdf5_path_for(serial, path.name) + hdf5_filename = hdf5_path.name if hdf5_path.exists() else None + hdf5_action = "kept" + need_h5 = not args.skip_hdf5 and (args.force or not hdf5_path.exists()) + if need_h5: + if args.dry_run: + hdf5_action = "would (re)write" + else: + try: + event_hdf5.write_event_hdf5( + hdf5_path, ev, + serial=serial, + geo_range="normal", + source_kind=source_kind, + ) + hdf5_filename = hdf5_path.name + hdf5_action = "rewrote" if hdf5_path.exists() else "wrote" + except Exception as exc: + log.warning("HDF5 write failed for %s: %s", path.name, exc) + hdf5_action = "FAILED" + + if args.dry_run: + print(f" [DRY ] would write {sidecar_path.name} " + f"+ .h5 ({hdf5_action}) source={source_kind}") + written += 1 + continue + + event_file_io.write_sidecar(sidecar_path, sidecar) + + # Best-effort: keep the SQL row's sidecar_filename in sync + # by upserting via insert_events (it dedups on serial+ts). + try: + db.insert_events( + [ev], serial=serial, + waveform_records=( + {ev._waveform_key.hex(): { + "filename": path.name, + "filesize": path.stat().st_size, + "a5_pickle_filename": a5_filename, + "sidecar_filename": sidecar_path.name, + }} + if ev._waveform_key else None + ), + ) + except Exception as exc: + log.warning("DB upsert failed for %s: %s", path.name, exc) + + print(f" [OK ] {path.name} → {sidecar_path.name} " + f"+ h5 ({hdf5_action}) source={source_kind}") + written += 1 + + except Exception as exc: + log.error("backfill failed for %s: %s", path, exc, exc_info=args.verbose) + errors += 1 + + print(f"\nDone. written={written} skipped(uptodate)={skipped} errors={errors}") + return 0 if errors == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sfm/database.py b/sfm/database.py index 7f0d7dc..8c492f5 100644 --- a/sfm/database.py +++ b/sfm/database.py @@ -84,6 +84,7 @@ CREATE TABLE IF NOT EXISTS events ( blastware_filename TEXT, -- event file within waveform store; extension is per-event (AB0T encodes timestamp) blastware_filesize INTEGER, -- bytes; NULL if no event file saved a5_pickle_filename TEXT, -- ".a5.pkl" sidecar + sidecar_filename TEXT, -- ".sfm.json" review/metadata sidecar created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')), UNIQUE(serial, timestamp) ); @@ -196,6 +197,7 @@ class SeismoDb: ("blastware_filename", "TEXT"), ("blastware_filesize", "INTEGER"), ("a5_pickle_filename", "TEXT"), + ("sidecar_filename", "TEXT"), ): if col not in existing_cols: log.info("_migrate: events ADD COLUMN %s %s", col, ddl) @@ -346,8 +348,9 @@ class SeismoDb: tran_ppv, vert_ppv, long_ppv, peak_vector_sum, mic_ppv, project, client, operator, sensor_location, sample_rate, record_type, - blastware_filename, blastware_filesize, a5_pickle_filename) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + blastware_filename, blastware_filesize, + a5_pickle_filename, sidecar_filename) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( self._new_id(), serial, key, session_id, ts, @@ -365,6 +368,7 @@ class SeismoDb: rec.get("filename"), rec.get("filesize"), rec.get("a5_pickle_filename"), + rec.get("sidecar_filename"), ), ) inserted += 1 @@ -379,13 +383,15 @@ class SeismoDb: UPDATE events SET blastware_filename = ?, blastware_filesize = ?, - a5_pickle_filename = ? + a5_pickle_filename = ?, + sidecar_filename = ? WHERE serial = ? AND timestamp = ? """, ( rec.get("filename"), rec.get("filesize"), rec.get("a5_pickle_filename"), + rec.get("sidecar_filename"), serial, ts, ), @@ -449,6 +455,36 @@ class SeismoDb: ) return cur.rowcount > 0 + def update_event_review(self, event_id: str, review: dict) -> bool: + """ + Sync derived index columns from a sidecar's `review` block. + + Currently the only derived index is `events.false_trigger` — kept + in sync so `/db/events?false_trigger=true` queries don't have to + scan every sidecar JSON on disk. The sidecar JSON itself remains + the source of truth for the full review state. + + Returns True when the row exists, False otherwise. No-op fields + (review without `false_trigger`) leave the column untouched. + """ + if not isinstance(review, dict): + return False + if "false_trigger" not in review: + # Nothing derived to update; just confirm the row exists. + with self._connect() as conn: + row = conn.execute( + "SELECT 1 FROM events WHERE id=?", (event_id,), + ).fetchone() + return row is not None + + flag = 1 if review.get("false_trigger") else 0 + with self._connect() as conn: + cur = conn.execute( + "UPDATE events SET false_trigger=? WHERE id=?", + (flag, event_id), + ) + return cur.rowcount > 0 + # ── Monitor log ─────────────────────────────────────────────────────────── def insert_monitor_log( diff --git a/sfm/event_hdf5.py b/sfm/event_hdf5.py new file mode 100644 index 0000000..aa6e6f0 --- /dev/null +++ b/sfm/event_hdf5.py @@ -0,0 +1,530 @@ +""" +sfm/event_hdf5.py — HDF5 codec for the canonical "clean waveform" file. + +Layout written to `.h5`: + + / + ├─ samples/ + │ ├─ Tran (float32, in/s) shape: (N,) + │ ├─ Vert (float32, in/s) shape: (N,) + │ ├─ Long (float32, in/s) shape: (N,) + │ └─ MicL (float32, psi) shape: (N,) + ├─ samples_int16/ (optional) + │ ├─ Tran (int16, raw ADC counts) shape: (N,) + │ └─ ... per channel (only when present in the source) + └─ root attrs (event metadata): + schema_version int = 1 + kind str = "sfm.event.hdf5" + serial str + waveform_key str (8-hex) + timestamp str (ISO-8601) + record_type str + sample_rate int (sps) + pretrig_samples int + total_samples int + rectime_seconds float + geo_range str "normal" | "sensitive" + geo_full_scale_ips float (10.0 or 1.250) + project str + client str + operator str + sensor_location str + peak_tran_ips float (from 0C; authoritative) + peak_vert_ips float + peak_long_ips float + peak_pvs_ips float + peak_mic_psi float + tool_version str + captured_at str (ISO-8601 UTC) + source_kind str "sfm-live" | "sfm-ach" | "bw-import" + +Why HDF5 and not just JSON for the canonical clean format: + - Native float32 arrays (no base64 dance, no per-value JSON parsing). + - Per-dataset gzip compression — sample arrays compress 3-5×. + - Cross-language: h5py (Python), HDF5.jl (Julia), io.netcdf (R), etc. + Analysis pipelines don't have to know anything about Blastware. + - Self-describing via attributes; future fields don't break readers. + +The plot-ready `sfm.plot.v1` JSON returned by the REST endpoints is +derived from this HDF5 (or computed on-the-fly when no .h5 exists yet). +""" + +from __future__ import annotations + +import datetime +import logging +from pathlib import Path +from typing import Optional, Union + +import h5py +import numpy as np + +from minimateplus.event_file_io import TOOL_VERSION as _DEFAULT_TOOL_VERSION +from minimateplus.models import Event + +log = logging.getLogger(__name__) + +SCHEMA_VERSION = 1 +HDF5_KIND = "sfm.event.hdf5" + +# Geophone full-scale velocity per range (in/s). Confirmed in CLAUDE.md +# from 4-20-26 captures: Normal=0x00 → 10 in/s, Sensitive=0x01 → 1.25 in/s. +_GEO_FS_BY_RANGE = { + "normal": 10.000, + "sensitive": 1.2500, + 0: 10.000, + 1: 1.2500, +} +_INT16_FS = 32768.0 + +# Default mic conversion: ADC count → psi. Approximate; exact factor +# depends on firmware reference voltage and mic sensitivity, neither of +# which is independently confirmed. We try to refine it from the device- +# reported peak when available (peak_mic_psi / max_abs_int16). +_MIC_DEFAULT_FS_PSI = 0.0125 # ≈ 0.5 psi at full scale (rough) + + +def _resolve_geo_full_scale(geo_range) -> float: + """Map a geo_range value (string or int from compliance config) to the + full-scale velocity in in/s. Defaults to Normal range (10.0) when the + value is unknown — same default as Blastware itself.""" + if geo_range is None: + return _GEO_FS_BY_RANGE["normal"] + if isinstance(geo_range, str): + return _GEO_FS_BY_RANGE.get(geo_range.lower(), _GEO_FS_BY_RANGE["normal"]) + return _GEO_FS_BY_RANGE.get(int(geo_range), _GEO_FS_BY_RANGE["normal"]) + + +def _normalise_range(geo_range) -> str: + """Return 'normal' or 'sensitive' (string) regardless of input form.""" + if isinstance(geo_range, str): + v = geo_range.lower() + if v in ("normal", "sensitive"): + return v + return "normal" + if geo_range == 1: + return "sensitive" + return "normal" + + +def _ts_iso(ts) -> str: + if ts is None: + return "" + try: + return datetime.datetime( + ts.year, ts.month, ts.day, + ts.hour or 0, ts.minute or 0, ts.second or 0, + ).isoformat() + except Exception: + return str(ts) + + +def _samples_to_float( + samples_int16: list[int], + full_scale: float, +) -> np.ndarray: + """Convert int16 ADC counts → float32 physical units. + + Uses _INT16_FS=32768 (not 32767) so that a count of -32768 maps to + exactly -full_scale and +32767 maps to ~+full_scale * 32767/32768. + Matches the device firmware's documented mapping (see CLAUDE.md + geo_hardware_constant rationale). + """ + if not samples_int16: + return np.array([], dtype=np.float32) + arr = np.asarray(samples_int16, dtype=np.int32) # int32 to avoid overflow during scale + return (arr.astype(np.float32) * (full_scale / _INT16_FS)).astype(np.float32) + + +def _mic_scale_factor( + samples_int16: list[int], + peak_mic_psi: Optional[float], +) -> float: + """Resolve the per-count psi factor for the microphone channel. + + When the device reports a peak mic value via the 0C record, we + back-solve the per-count factor from `peak_psi / max(|samples|)` so + the plotted waveform peaks land exactly at the device-reported value. + Otherwise fall back to the rough _MIC_DEFAULT_FS_PSI estimate. + """ + if peak_mic_psi is not None and peak_mic_psi > 0 and samples_int16: + max_count = max(abs(int(v)) for v in samples_int16) or 1 + return float(peak_mic_psi) / float(max_count) + return _MIC_DEFAULT_FS_PSI / _INT16_FS + + +def write_event_hdf5( + path: Union[str, Path], + event: Event, + *, + serial: str, + geo_range = "normal", + source_kind: str = "sfm-live", + tool_version: Optional[str] = None, + captured_at: Optional[datetime.datetime] = None, + include_int16: bool = True, +) -> dict: + """ + Persist a decoded Event as an HDF5 file with samples in physical units. + + Returns a small summary dict suitable for logging: + {"path": Path, "n_samples": int, "geo_full_scale_ips": float} + """ + path = Path(path) + raw = event.raw_samples or {} + pv = event.peak_values + pi = event.project_info + + geo_fs = _resolve_geo_full_scale(geo_range) + geo_range_str = _normalise_range(geo_range) + captured_at = captured_at or datetime.datetime.utcnow() + tool_version = tool_version or _DEFAULT_TOOL_VERSION + + # Per-channel float32 arrays in physical units. + geo_arrays = {} + for ch in ("Tran", "Vert", "Long"): + geo_arrays[ch] = _samples_to_float(raw.get(ch, []), geo_fs) + + # Mic channel — the per-count factor is resolved from the device-reported + # peak when available so the plot peaks the BW value exactly. + mic_int16 = raw.get("MicL", []) + mic_factor = _mic_scale_factor( + mic_int16, + getattr(pv, "micl", None) if pv else None, + ) + if mic_int16: + mic_arr = (np.asarray(mic_int16, dtype=np.int32).astype(np.float32) * mic_factor).astype(np.float32) + else: + mic_arr = np.array([], dtype=np.float32) + + n_samples = max( + (len(geo_arrays[ch]) for ch in geo_arrays), + default=0, + ) + + # Atomic write: temp file + os.replace. + tmp = path.with_suffix(path.suffix + ".tmp") + with h5py.File(tmp, "w") as f: + # Root attrs — event-level metadata. + attrs = f.attrs + attrs["schema_version"] = SCHEMA_VERSION + attrs["kind"] = HDF5_KIND + attrs["serial"] = serial or "" + attrs["waveform_key"] = event._waveform_key.hex() if event._waveform_key else "" + attrs["timestamp"] = _ts_iso(event.timestamp) + attrs["record_type"] = event.record_type or "" + attrs["sample_rate"] = int(event.sample_rate or 0) + attrs["pretrig_samples"] = int(event.pretrig_samples or 0) + attrs["total_samples"] = int(event.total_samples or n_samples) + attrs["rectime_seconds"] = float(event.rectime_seconds or 0.0) + attrs["geo_range"] = geo_range_str + attrs["geo_full_scale_ips"] = float(geo_fs) + attrs["project"] = (pi.project if pi else "") or "" + attrs["client"] = (pi.client if pi else "") or "" + attrs["operator"] = (pi.operator if pi else "") or "" + attrs["sensor_location"] = (pi.sensor_location if pi else "") or "" + attrs["peak_tran_ips"] = float(pv.tran if pv and pv.tran is not None else 0.0) + attrs["peak_vert_ips"] = float(pv.vert if pv and pv.vert is not None else 0.0) + attrs["peak_long_ips"] = float(pv.long if pv and pv.long is not None else 0.0) + attrs["peak_pvs_ips"] = float(pv.peak_vector_sum if pv and pv.peak_vector_sum is not None else 0.0) + attrs["peak_mic_psi"] = float(pv.micl if pv and pv.micl is not None else 0.0) + attrs["tool_version"] = tool_version or "" + attrs["captured_at"] = captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat() + attrs["source_kind"] = source_kind + + # /samples — physical-units float32 (the primary data). + sgrp = f.create_group("samples") + for ch, arr in geo_arrays.items(): + sgrp.create_dataset( + ch, data=arr, dtype="float32", + compression="gzip", compression_opts=4, shuffle=True, + ) + sgrp.create_dataset( + "MicL", data=mic_arr, dtype="float32", + compression="gzip", compression_opts=4, shuffle=True, + ) + + # /samples_int16 — optional raw ADC counts (preserved for analysis + # tools that want pre-conversion data). Cheap to include. + if include_int16: + igrp = f.create_group("samples_int16") + for ch in ("Tran", "Vert", "Long", "MicL"): + vals = raw.get(ch, []) + if vals: + igrp.create_dataset( + ch, data=np.asarray(vals, dtype=np.int16), + compression="gzip", compression_opts=4, shuffle=True, + ) + igrp.attrs["mic_psi_per_count"] = float(mic_factor) + + import os + os.replace(tmp, path) + + log.info( + "write_event_hdf5: %s n_samples=%d geo_fs=%.3f filesize=%d", + path, n_samples, geo_fs, path.stat().st_size, + ) + return { + "path": path, + "n_samples": n_samples, + "geo_full_scale_ips": geo_fs, + } + + +def read_event_hdf5(path: Union[str, Path]) -> dict: + """ + Load an event HDF5 into a plain dict (no Event reconstruction — + callers that want an Event can use the data directly). + + Returns: + { + "schema_version": int, + "kind": str, + "attrs": dict[str, …], # all root attributes + "samples": { # float32 lists in physical units + "Tran": ndarray, "Vert": ndarray, "Long": ndarray, "MicL": ndarray, + }, + "samples_int16": {…} or None, + "mic_psi_per_count": float | None, + } + + Raises FileNotFoundError if missing, ValueError on bad shape / + unsupported schema_version. + """ + path = Path(path) + with h5py.File(path, "r") as f: + attrs = {k: _h5_attr_value(v) for k, v in f.attrs.items()} + sv = attrs.get("schema_version", 0) + if not isinstance(sv, int) or sv < 1 or sv > SCHEMA_VERSION: + raise ValueError( + f"{path}: unsupported HDF5 schema_version={sv} " + f"(this build supports 1..{SCHEMA_VERSION})" + ) + if attrs.get("kind") != HDF5_KIND: + raise ValueError(f"{path}: kind != {HDF5_KIND!r} (got {attrs.get('kind')!r})") + + samples = {} + for ch in ("Tran", "Vert", "Long", "MicL"): + ds = f.get(f"samples/{ch}") + samples[ch] = np.asarray(ds[()]) if ds is not None else np.array([], dtype=np.float32) + + samples_int16 = None + mic_psi = None + igrp = f.get("samples_int16") + if igrp is not None: + samples_int16 = {} + for ch in ("Tran", "Vert", "Long", "MicL"): + ds = igrp.get(ch) + if ds is not None: + samples_int16[ch] = np.asarray(ds[()]) + mic_attr = igrp.attrs.get("mic_psi_per_count") + if mic_attr is not None: + mic_psi = float(mic_attr) + + return { + "schema_version": sv, + "kind": attrs.get("kind"), + "attrs": attrs, + "samples": samples, + "samples_int16": samples_int16, + "mic_psi_per_count": mic_psi, + } + + +def _h5_attr_value(v): + """Convert an h5py attribute value to a plain Python type.""" + if isinstance(v, bytes): + return v.decode("utf-8", errors="replace") + if isinstance(v, np.generic): + return v.item() + return v + + +# ── Plot-ready JSON ────────────────────────────────────────────────────────── + + +def event_to_plot_json( + event: Event, + *, + serial: str, + geo_range = "normal", + event_id: Optional[str] = None, + index: Optional[int] = None, +) -> dict: + """ + Build a `sfm.plot.v1` JSON dict directly from an Event (skipping HDF5). + + Used by: + - `/device/event/{idx}/waveform` (live device path) + - The CLI / tests for in-memory conversion sanity-checks. + + Stored events go through `plot_json_from_hdf5()` so the wire format + is identical regardless of whether the data came from the live device + or the on-disk HDF5. + """ + raw = event.raw_samples or {} + pv = event.peak_values + geo_fs = _resolve_geo_full_scale(geo_range) + geo_range_str = _normalise_range(geo_range) + sr = int(event.sample_rate or 0) or 1024 + pretrig = int(event.pretrig_samples or 0) + + geo_arrays = {ch: _samples_to_float(raw.get(ch, []), geo_fs).tolist() + for ch in ("Tran", "Vert", "Long")} + mic_int16 = raw.get("MicL", []) + mic_factor = _mic_scale_factor( + mic_int16, + getattr(pv, "micl", None) if pv else None, + ) + mic_arr = [float(v) * mic_factor for v in mic_int16] if mic_int16 else [] + + n = max( + (len(geo_arrays[ch]) for ch in geo_arrays), + default=len(mic_arr), + ) + return _build_plot_dict( + n_samples=n, + sample_rate=sr, + pretrig_samples=pretrig, + total_samples=int(event.total_samples or n), + rectime_seconds=float(event.rectime_seconds or 0.0), + timestamp_iso=_ts_iso(event.timestamp), + serial=serial, + record_type=event.record_type, + waveform_key=event._waveform_key.hex() if event._waveform_key else None, + geo_range=geo_range_str, + geo_fs=geo_fs, + channels_floats={ + "Tran": geo_arrays["Tran"], + "Vert": geo_arrays["Vert"], + "Long": geo_arrays["Long"], + "MicL": mic_arr, + }, + peaks_dict={ + "tran": getattr(pv, "tran", None) if pv else None, + "vert": getattr(pv, "vert", None) if pv else None, + "long": getattr(pv, "long", None) if pv else None, + "pvs": getattr(pv, "peak_vector_sum", None) if pv else None, + "mic": getattr(pv, "micl", None) if pv else None, + }, + event_id=event_id, + index=index if index is not None else event.index, + ) + + +def plot_json_from_hdf5( + path: Union[str, Path], + *, + event_id: Optional[str] = None, + index: Optional[int] = None, +) -> dict: + """Build a `sfm.plot.v1` JSON dict from a stored .h5 file.""" + data = read_event_hdf5(path) + a = data["attrs"] + s = data["samples"] + return _build_plot_dict( + n_samples=len(s["Tran"]) if "Tran" in s else 0, + sample_rate=int(a.get("sample_rate", 1024) or 1024), + pretrig_samples=int(a.get("pretrig_samples", 0) or 0), + total_samples=int(a.get("total_samples", 0) or 0), + rectime_seconds=float(a.get("rectime_seconds", 0.0) or 0.0), + timestamp_iso=a.get("timestamp", ""), + serial=a.get("serial", ""), + record_type=a.get("record_type", ""), + waveform_key=a.get("waveform_key", "") or None, + geo_range=a.get("geo_range", "normal"), + geo_fs=float(a.get("geo_full_scale_ips", 10.0) or 10.0), + channels_floats={ + "Tran": s.get("Tran", np.array([])).tolist(), + "Vert": s.get("Vert", np.array([])).tolist(), + "Long": s.get("Long", np.array([])).tolist(), + "MicL": s.get("MicL", np.array([])).tolist(), + }, + peaks_dict={ + "tran": float(a.get("peak_tran_ips", 0.0) or 0.0) or None, + "vert": float(a.get("peak_vert_ips", 0.0) or 0.0) or None, + "long": float(a.get("peak_long_ips", 0.0) or 0.0) or None, + "pvs": float(a.get("peak_pvs_ips", 0.0) or 0.0) or None, + "mic": float(a.get("peak_mic_psi", 0.0) or 0.0) or None, + }, + event_id=event_id, + index=index, + ) + + +def _build_plot_dict( + *, + n_samples: int, + sample_rate: int, + pretrig_samples: int, + total_samples: int, + rectime_seconds: float, + timestamp_iso: str, + serial: str, + record_type: Optional[str], + waveform_key: Optional[str], + geo_range: str, + geo_fs: float, + channels_floats: dict[str, list[float]], + peaks_dict: dict[str, Optional[float]], + event_id: Optional[str], + index: Optional[int] = None, +) -> dict: + dt_ms = (1000.0 / sample_rate) if sample_rate > 0 else 0.0 + t0_ms = -pretrig_samples * dt_ms + + def _ch(unit: str, values: list[float], peak: Optional[float]) -> dict: + # Locate the peak's time within the values array (max abs). + if values: + mags = [abs(v) for v in values] + i = mags.index(max(mags)) + peak_t_ms = round(t0_ms + i * dt_ms, 4) + peak_value = peak if peak is not None else values[i] + else: + peak_t_ms = None + peak_value = peak + return { + "unit": unit, + "values": values, + "peak": peak_value, + "peak_t_ms": peak_t_ms, + } + + return { + "schema": "sfm.plot.v1", + "event_id": event_id, + "index": index, + "serial": serial, + "timestamp": timestamp_iso, + "record_type": record_type, + "waveform_key": waveform_key, + + "time_axis": { + "sample_rate": sample_rate, + "pretrig_samples": pretrig_samples, + "total_samples": total_samples or n_samples, + "n_samples": n_samples, + "t0_ms": round(t0_ms, 4), + "dt_ms": round(dt_ms, 6), + "rectime_seconds": rectime_seconds, + }, + + "geo_range": geo_range, + "geo_full_scale_ips": geo_fs, + "trigger_ms": 0.0, + + "channels": { + "Tran": _ch("in/s", channels_floats.get("Tran", []), peaks_dict.get("tran")), + "Vert": _ch("in/s", channels_floats.get("Vert", []), peaks_dict.get("vert")), + "Long": _ch("in/s", channels_floats.get("Long", []), peaks_dict.get("long")), + "MicL": _ch("psi", channels_floats.get("MicL", []), peaks_dict.get("mic")), + }, + + "peak_values": { + "transverse": peaks_dict.get("tran"), + "vertical": peaks_dict.get("vert"), + "longitudinal": peaks_dict.get("long"), + "vector_sum": peaks_dict.get("pvs"), + "mic_psi": peaks_dict.get("mic"), + }, + } diff --git a/sfm/import_bw.py b/sfm/import_bw.py new file mode 100644 index 0000000..f49097b --- /dev/null +++ b/sfm/import_bw.py @@ -0,0 +1,194 @@ +""" +sfm/import_bw.py — CLI for ingesting Blastware-format event files. + +Walks a path (file or directory), parses each recognised event-file +binary, copies it into the canonical waveform store, writes the +.sfm.json sidecar, and upserts a row in seismo_relay.db. + +Use cases: + - Migrating a Blastware ACH inbox into SFM + - One-off imports of files emailed in by field crews + - Bulk-loading historical archives + +Usage: + python -m sfm.import_bw [--serial BE11529] + [--db-path bridges/captures/seismo_relay.db] + [--store-root bridges/captures/waveforms] + [--dry-run] + [-v] + +Examples: + python -m sfm.import_bw ~/Downloads/M529LKIQ.7M0W + python -m sfm.import_bw /path/to/blastware_archive --serial BE11529 +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path +from typing import Iterator + +# Allow running from the repo root without installation. +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from sfm.database import SeismoDb +from sfm.waveform_store import WaveformStore + +log = logging.getLogger("sfm.import_bw") + + +# Blastware event-file extensions: 4-char `AB0T` (T = W or H) for ACH +# downloads, 3-char `AB0` for direct downloads. We discover candidates +# by length + last-char rather than enumerating every (A, B) pair. +def _looks_like_bw_event(path: Path) -> bool: + """Heuristic: 3-char or 4-char extension, ends with W/H/0, and the + file is at least 70 bytes (header + STRT + footer minimum).""" + if not path.is_file(): + return False + ext = path.suffix.lstrip(".") + if not (3 <= len(ext) <= 4): + return False + if not (ext[-1].upper() in {"W", "H"} or ext.endswith("0")): + return False + try: + return path.stat().st_size >= 70 + except OSError: + return False + + +def _walk(path: Path) -> Iterator[Path]: + """Yield candidate BW event-file paths under `path` (file or dir).""" + if path.is_file(): + if _looks_like_bw_event(path): + yield path + return + if path.is_dir(): + for p in sorted(path.rglob("*")): + if _looks_like_bw_event(p): + yield p + + +def main(argv: list[str] | None = None) -> int: + p = argparse.ArgumentParser( + description="Import Blastware-format event files into the SFM store + DB.", + ) + p.add_argument("path", help="File or directory to import.") + p.add_argument( + "--serial", default=None, metavar="SERIAL", + help="Override the serial-number hint (e.g. BE11529). Defaults to " + "the value decoded from each BW filename's prefix.", + ) + p.add_argument( + "--db-path", + default=str(Path(__file__).resolve().parent.parent / "bridges" / "captures" / "seismo_relay.db"), + help="Path to seismo_relay.db (default: bridges/captures/seismo_relay.db).", + ) + p.add_argument( + "--store-root", + default=None, + help="Root of the waveform store (default: /waveforms).", + ) + p.add_argument( + "--dry-run", action="store_true", + help="Parse and report per-file outcomes; don't write anything.", + ) + p.add_argument("-v", "--verbose", action="store_true", help="Debug logging.") + args = p.parse_args(argv) + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)-7s %(name)s %(message)s", + datefmt="%H:%M:%S", + ) + + src = Path(args.path).expanduser().resolve() + if not src.exists(): + print(f"error: {src} does not exist", file=sys.stderr) + return 2 + + db_path = Path(args.db_path).expanduser().resolve() + store_root = ( + Path(args.store_root).expanduser().resolve() + if args.store_root else db_path.parent / "waveforms" + ) + + db = None if args.dry_run else SeismoDb(db_path) + store = None if args.dry_run else WaveformStore(store_root) + + candidates = list(_walk(src)) + if not candidates: + print(f"No BW event-file candidates found under {src}", file=sys.stderr) + return 1 + + print(f"Importing {len(candidates)} file(s) from {src}...") + if args.dry_run: + print("(dry-run — no writes will occur)") + + ok = err = skipped = 0 + for path in candidates: + try: + bw_bytes = path.read_bytes() + except Exception as exc: + print(f" [ERR ] {path}: read failed: {exc}") + err += 1 + continue + + if args.dry_run: + # Just parse to verify integrity; don't touch DB or store. + from minimateplus import event_file_io + try: + ev = event_file_io.read_blastware_file(path) + ts = ev.timestamp and ( + f"{ev.timestamp.year}-{ev.timestamp.month:02d}-{ev.timestamp.day:02d} " + f"{ev.timestamp.hour:02d}:{ev.timestamp.minute:02d}:{ev.timestamp.second:02d}" + ) or "?" + pv = ev.peak_values + pvs = pv.peak_vector_sum if pv and pv.peak_vector_sum is not None else 0.0 + print(f" [OK ] {path.name} ts={ts} PVS={pvs:.4f}") + ok += 1 + except Exception as exc: + print(f" [ERR ] {path}: parse failed: {exc}") + err += 1 + continue + + try: + ev, rec = store.save_imported_bw( + bw_bytes, source_path=path, serial_hint=args.serial, + ) + # Resolve serial for the DB row. Prefer the hint, then the + # one decoded from the filename (already done by the store). + serial_used = args.serial or _infer_serial(path.name) or "UNKNOWN" + ins, sk = db.insert_events( + [ev], serial=serial_used, + waveform_records=( + {ev._waveform_key.hex(): rec} + if ev._waveform_key else None + ), + ) + tag = "OK " if ins else ("SKIP" if sk else "OK ") + print(f" [{tag}] {path.name} → {rec['filename']} " + f"({rec['filesize']} B, sha256={rec['sha256'][:12]}…) " + f"serial={serial_used} ins={ins} skip={sk}") + if ins: + ok += 1 + else: + skipped += 1 + except Exception as exc: + print(f" [ERR ] {path}: import failed: {exc}") + log.debug("traceback", exc_info=True) + err += 1 + + print(f"\nDone. ok={ok} skipped={skipped} errors={err}") + return 0 if err == 0 else 1 + + +def _infer_serial(filename: str): + """Reuse WaveformStore's filename → serial decoder for log output.""" + from sfm.waveform_store import _serial_from_bw_filename + return _serial_from_bw_filename(filename) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sfm/server.py b/sfm/server.py index f6043d9..d7073ca 100644 --- a/sfm/server.py +++ b/sfm/server.py @@ -45,7 +45,7 @@ from typing import Optional # FastAPI / Pydantic try: - from fastapi import Body, FastAPI, HTTPException, Query + from fastapi import Body, FastAPI, File, HTTPException, Query, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse, StreamingResponse from pydantic import BaseModel @@ -64,6 +64,7 @@ from minimateplus.models import CallHomeConfig, ComplianceConfig, DeviceInfo, Ev from minimateplus.transport import TcpTransport, DEFAULT_TCP_PORT from minimateplus.blastware_file import write_blastware_file, blastware_filename from minimateplus.client import _decode_a5_metadata_into, _decode_a5_waveform +from sfm import event_hdf5 from sfm.cache import SFMCache, get_cache from sfm.database import SeismoDb from sfm.live_cache import LiveCache as _LiveCache @@ -732,26 +733,33 @@ def device_event_waveform( detail=f"Event index {index} not found on device", ) - raw = getattr(ev, "raw_samples", None) or {} - samples_decoded = len(raw.get("Tran", [])) + # Backfill from compliance_config: sample_rate, record_time, and + # derived total_samples. These are user-set authoritative values; the + # corresponding STRT-derived guesses in `_decode_a5_waveform` can be + # off (e.g. rectime used to read the 0x46 record-type marker = 70s). + cc = info.compliance_config + if cc: + if ev.sample_rate is None and cc.sample_rate: + ev.sample_rate = cc.sample_rate + if cc.record_time: + ev.rectime_seconds = cc.record_time + if ev.sample_rate and ev.rectime_seconds: + derived = int(round(ev.sample_rate * ev.rectime_seconds)) + if (ev.total_samples is None + or ev.total_samples > derived * 2 + or ev.total_samples < derived // 4): + ev.total_samples = derived + geo_range = getattr(cc, "geo_range", None) if cc else None - # Resolve sample_rate from compliance config if not on the event itself - sample_rate = ev.sample_rate - if sample_rate is None and info.compliance_config: - sample_rate = info.compliance_config.sample_rate - - result = { - "index": ev.index, - "record_type": ev.record_type, - "timestamp": _serialise_timestamp(ev.timestamp), - "total_samples": ev.total_samples, - "pretrig_samples": ev.pretrig_samples, - "rectime_seconds": ev.rectime_seconds, - "samples_decoded": samples_decoded, - "sample_rate": sample_rate, - "peak_values": _serialise_peak_values(ev.peak_values), - "channels": raw, - } + # Build the plot.v1 JSON: samples in physical units (in/s for geo, psi + # for mic), explicit time axis, peak markers — the shape clients should + # consume directly without doing any ADC scaling. + serial = getattr(info, "serial", None) or "" + result = event_hdf5.event_to_plot_json( + ev, serial=serial, + geo_range=geo_range or "normal", + index=index, + ) cache.set_waveform(conn_key, index, result) return result @@ -781,8 +789,9 @@ def device_event_blastware_file( triggered events; histogram requires recording_mode to be populated from compliance config) - Performs: POLL startup → get_events(full_waveform=False, extra_chunks=1, - stop_after_index=index) → write_blastware_file() → FileResponse. + Performs: POLL startup → get_events(full_waveform=True, + stop_after_index=index) → write_blastware_file() → FileResponse + + persistent store + DB upsert. """ log.info( "GET /device/event/%d/blastware_file port=%s host=%s force=%s", @@ -790,19 +799,19 @@ def device_event_blastware_file( ) # `force` always re-downloads from the device. This endpoint already # never short-circuits via cache, so `force` is reserved for parity with - # the other live endpoints and to suppress the post-download persist - # (see end of handler) when the caller wants a fetch-only escape hatch. + # the other live endpoints. try: def _do(): with _build_client(port, baud, host, tcp_port, timeout=120.0) as client: info = client.connect() - # Under v0.14.0 BW-exact 5A walk, the chunk loop is bounded by - # the event end_offset extracted from STRT. No more - # stop_after_metadata / extra_chunks gymnastics — these - # kwargs are now no-ops. + # full_waveform=True pulls the complete 5A stream so the + # client populates STRT-derived fields (total_samples, + # pretrig_samples, rectime_seconds) AND raw_samples on the + # Event. Required for the .h5 + .sfm.json sidecar to be + # filled in correctly — without it, those land as nulls. events = client.get_events( - full_waveform=False, + full_waveform=True, stop_after_index=index, ) matching = [ev for ev in events if ev.index == index] @@ -861,7 +870,34 @@ def device_event_blastware_file( # queryable via /db/events afterwards (matches the ACH ingestion path). if serial != "UNKNOWN" and ev._waveform_key is not None: try: - rec = _get_store().save(ev, serial=serial, a5_frames=a5_frames) + cc = info.compliance_config + # Backfill authoritative compliance-config values onto the + # Event before persisting. These supersede whatever + # _decode_a5_waveform read from the STRT bytes (some of which + # have ambiguous semantics — e.g. STRT[20] is rectime but + # STRT[8:10] / STRT[16:18] are device-specific scratch fields + # that aren't reliable sample/pretrig counts). + if cc: + if ev.sample_rate is None and cc.sample_rate: + ev.sample_rate = cc.sample_rate + if cc.record_time: + # record_time from compliance is authoritative — the + # user-set value the device followed when recording. + ev.rectime_seconds = cc.record_time + # Derive total_samples from sample_rate × rectime when + # we can; the STRT-derived value can land at a buffer- + # offset rather than a sample count. + if ev.sample_rate and ev.rectime_seconds: + derived = int(round(ev.sample_rate * ev.rectime_seconds)) + if (ev.total_samples is None + or ev.total_samples > derived * 2 + or ev.total_samples < derived // 4): + ev.total_samples = derived + geo_range = getattr(cc, "geo_range", None) if cc else None + rec = _get_store().save( + ev, serial=serial, a5_frames=a5_frames, + geo_range=geo_range if geo_range is not None else "normal", + ) _get_db().insert_events( [ev], serial=serial, @@ -1412,34 +1448,50 @@ def db_event_blastware_file(event_id: str) -> FileResponse: @app.get("/db/events/{event_id}/waveform.json") def db_event_waveform_json(event_id: str) -> dict: """ - Return the decoded raw_samples + metadata for a stored event in the - same JSON shape as `/device/event/{index}/waveform`. + Return the plot-ready JSON (`sfm.plot.v1`) for a stored event. - Reads the `.a5.pkl` sidecar from the store, rebuilds an Event in - memory, runs the standard A5 decoders, and serialises the result. + Resolution order (cheapest first): + 1. If `.h5` exists, serve it via `plot_json_from_hdf5`. + Samples are already in physical units; no decode work needed. + 2. Else if `.a5.pkl` exists, replay the A5 decoders to + rebuild an Event and serialise via `event_to_plot_json`. + 3. Else 404 — the event has no waveform data on disk. + + The shape is identical regardless of source, so clients (the SFM + webapp, Terra-View, etc.) consume the same `sfm.plot.v1` payload. """ row = _get_db().get_event(event_id) if row is None: raise HTTPException(status_code=404, detail=f"Event {event_id} not found") serial = row.get("serial") filename = row.get("blastware_filename") - a5_name = row.get("a5_pickle_filename") - if not serial or not filename or not a5_name: + if not serial or not filename: + raise HTTPException( + status_code=404, + detail=f"Event {event_id} has no event file in the store", + ) + store = _get_store() + + # Path 1: HDF5 (canonical clean format). + h5_path = store.hdf5_path_for(serial, filename) + if h5_path.exists(): + try: + return event_hdf5.plot_json_from_hdf5(h5_path, event_id=event_id) + except Exception as exc: + log.warning("HDF5 read failed (%s); falling back to A5 path", exc) + + # Path 2: A5 pickle replay. + a5_frames = store.load_a5(serial, filename) + if not a5_frames: raise HTTPException( status_code=404, detail=( - f"Event {event_id} has no A5 sidecar in the store. " - "Re-download via the live endpoint to populate." + f"Event {event_id} has no waveform data on disk " + "(no .h5 and no .a5.pkl). Run the backfill script or " + "re-download via the live endpoint to populate." ), ) - a5_frames = _get_store().load_a5(serial, filename) - if not a5_frames: - raise HTTPException( - status_code=410, - detail=f"A5 sidecar missing or unreadable: {a5_name}", - ) - # Rebuild a minimal Event from DB fields, then decode A5 onto it. ev = Event(index=-1) try: _decode_a5_metadata_into(a5_frames, ev) @@ -1451,27 +1503,178 @@ def db_event_waveform_json(event_id: str) -> dict: log.error("db_event_waveform_json: waveform decode failed: %s", exc, exc_info=True) raise HTTPException(status_code=500, detail=f"Waveform decode failed: {exc}") from exc - raw = getattr(ev, "raw_samples", None) or {} - samples_decoded = len(raw.get("Tran", [])) - return { - "event_id": event_id, - "serial": serial, - "record_type": ev.record_type or row.get("record_type"), - "timestamp": _serialise_timestamp(ev.timestamp) or row.get("timestamp"), - "total_samples": ev.total_samples, - "pretrig_samples": ev.pretrig_samples, - "rectime_seconds": ev.rectime_seconds, - "samples_decoded": samples_decoded, - "sample_rate": ev.sample_rate or row.get("sample_rate"), - "peak_values": _serialise_peak_values(ev.peak_values) or { - "transverse": row.get("tran_ppv"), - "vertical": row.get("vert_ppv"), - "longitudinal": row.get("long_ppv"), - "vector_sum": row.get("peak_vector_sum"), - "mic": row.get("mic_ppv"), - }, - "channels": raw, - } + # Carry over fields from the DB row when the A5 replay didn't fill them. + if ev.sample_rate is None and row.get("sample_rate"): + ev.sample_rate = row.get("sample_rate") + + return event_hdf5.event_to_plot_json( + ev, serial=serial, geo_range="normal", event_id=event_id, + ) + + +# ── /db/events/{id}/sidecar — modern .sfm.json review/metadata accessors ────── + + +class SidecarPatchBody(BaseModel): + """Body for PATCH /db/events/{id}/sidecar. + + JSON-merge-patch semantics: only the keys you include get updated. + `review` is the editable block for monthly-summary workflows + (false_trigger flag, reviewer notes, etc.); `extensions` is the + forward-compat namespace for vendor / future fields. + """ + review: Optional[dict] = None + extensions: Optional[dict] = None + + +@app.get("/db/events/{event_id}/sidecar") +def db_event_sidecar(event_id: str) -> dict: + """ + Return the .sfm.json sidecar for a stored event. 404 if the event + is unknown or has no sidecar in the store (events ingested before + the sidecar feature landed will show this until backfilled). + """ + row = _get_db().get_event(event_id) + if row is None: + raise HTTPException(status_code=404, detail=f"Event {event_id} not found") + serial = row.get("serial") + filename = row.get("blastware_filename") + if not serial or not filename: + raise HTTPException( + status_code=404, + detail=f"Event {event_id} has no event file in the store", + ) + sidecar = _get_store().load_sidecar(serial, filename) + if sidecar is None: + raise HTTPException( + status_code=404, + detail=( + f"No .sfm.json sidecar on disk for {filename}. " + "Run scripts/backfill_sidecars.py to generate one." + ), + ) + return sidecar + + +@app.patch("/db/events/{event_id}/sidecar") +def db_event_sidecar_patch(event_id: str, body: SidecarPatchBody) -> dict: + """ + JSON-merge-patch the sidecar's `review` and/or `extensions` blocks. + + The sidecar JSON is the source of truth for review state. When + `review.false_trigger` is updated, the SQL `events.false_trigger` + column is kept in sync as a derived index for fast filtering. + + Returns the new full sidecar. 404 if the event or sidecar is missing. + """ + row = _get_db().get_event(event_id) + if row is None: + raise HTTPException(status_code=404, detail=f"Event {event_id} not found") + serial = row.get("serial") + filename = row.get("blastware_filename") + if not serial or not filename: + raise HTTPException( + status_code=404, + detail=f"Event {event_id} has no event file in the store", + ) + + if not (body.review or body.extensions): + raise HTTPException( + status_code=400, + detail="PATCH body must include `review` and/or `extensions`", + ) + + new_sidecar = _get_store().patch_sidecar( + serial, filename, + review=body.review, + extensions=body.extensions, + ) + if new_sidecar is None: + raise HTTPException( + status_code=404, + detail=f"No .sfm.json sidecar on disk for {filename}", + ) + + # Mirror false_trigger from review block into the SQL index column. + if body.review is not None: + _get_db().update_event_review(event_id, new_sidecar.get("review", {})) + + return new_sidecar + + +# ── /db/import/blastware_file — ingest BW-only event files ──────────────────── + + +@app.post("/db/import/blastware_file") +async def db_import_blastware_file( + files: list[UploadFile] = File(...), + serial: Optional[str] = Query(None, description="Optional serial-number hint (e.g. BE11529); falls back to the BW filename's encoded prefix when omitted"), +) -> dict: + """ + Multipart upload of one or more Blastware event file binaries + (typically produced by Blastware's own ACH). For each file: + + 1. Parse the bytes via WaveformStore.save_imported_bw — produces + a parsed Event + copies the file into the persistent store + + writes a .sfm.json sidecar with source.kind = "bw-import". + 2. Upsert a row into `events` (dedup'd on serial+timestamp). + + Response includes per-file outcomes so the caller can see which + landed cleanly and which failed (e.g. malformed file, unknown + serial, etc.). + """ + store = _get_store() + db = _get_db() + results: list[dict] = [] + + for upload in files: + try: + content = await upload.read() + except Exception as exc: + results.append({ + "filename": upload.filename, "status": "error", + "detail": f"read failed: {exc}", + }) + continue + + try: + ev, rec = store.save_imported_bw( + content, + source_path=Path(upload.filename or "imported.bw"), + serial_hint=serial, + ) + inserted, skipped = db.insert_events( + [ev], + serial=(serial or _serial_from_event(ev) or "UNKNOWN"), + waveform_records={ + ev._waveform_key.hex(): rec + if ev._waveform_key else None + } if ev._waveform_key else None, + ) + results.append({ + "filename": upload.filename, + "status": "ok", + "stored_filename": rec["filename"], + "filesize": rec["filesize"], + "sha256": rec["sha256"], + "inserted": inserted, + "skipped": skipped, + }) + except Exception as exc: + log.error("import failed for %s: %s", upload.filename, exc, exc_info=True) + results.append({ + "filename": upload.filename, "status": "error", + "detail": str(exc), + }) + + return {"count": len(results), "results": results} + + +def _serial_from_event(ev) -> Optional[str]: + """Fallback serial resolver — currently relies on the BW filename + decoder via WaveformStore.save_imported_bw, so this is just a + placeholder for future enhancement (e.g. inferring from project_info).""" + return None @app.get("/db/units/{serial}/waveforms.zip") diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html index 63f15b8..17474b4 100644 --- a/sfm/sfm_webapp.html +++ b/sfm/sfm_webapp.html @@ -639,6 +639,117 @@ } .force-toggle.active .ft-dot { background: #f85149; box-shadow: 0 0 6px #f85149; } + /* ── Sidecar review modal ── */ + .sc-overlay { + position: fixed; inset: 0; + background: rgba(0,0,0,0.55); + display: none; + align-items: center; + justify-content: center; + z-index: 100; + } + .sc-overlay.visible { display: flex; } + .sc-modal { + background: var(--surface2); + border: 1px solid var(--border); + border-radius: 8px; + width: min(720px, 92vw); + max-height: 88vh; + display: flex; + flex-direction: column; + box-shadow: 0 8px 32px rgba(0,0,0,0.5); + } + .sc-header { + display: flex; align-items: center; justify-content: space-between; + padding: 14px 18px; + border-bottom: 1px solid var(--border); + } + .sc-header h3 { + margin: 0; font-size: 14px; font-weight: 600; + color: var(--text); font-family: monospace; + } + .sc-close { + background: none; border: none; cursor: pointer; + color: var(--text-mute); font-size: 18px; line-height: 1; + padding: 4px 8px; border-radius: 4px; + } + .sc-close:hover { background: var(--surface); color: var(--text); } + .sc-body { + flex: 1; overflow-y: auto; + padding: 16px 18px; + display: flex; flex-direction: column; gap: 14px; + } + .sc-section { + display: flex; flex-direction: column; gap: 6px; + } + .sc-section h4 { + margin: 0 0 4px; + font-size: 11px; font-weight: 600; + color: var(--text-mute); text-transform: uppercase; + letter-spacing: 0.6px; + } + .sc-grid { + display: grid; + grid-template-columns: 130px 1fr; + gap: 4px 12px; + font-size: 12px; + } + .sc-grid dt { color: var(--text-mute); } + .sc-grid dd { margin: 0; color: var(--text); font-family: monospace; word-break: break-all; } + .sc-row { display: flex; align-items: center; gap: 8px; font-size: 13px; } + .sc-row label { color: var(--text-dim); } + .sc-row input[type="checkbox"] { cursor: pointer; } + .sc-row input[type="text"], .sc-body textarea { + flex: 1; + background: var(--bg); + border: 1px solid var(--border); + border-radius: 5px; + padding: 6px 9px; + font-size: 12px; + color: var(--text); + font-family: monospace; + } + .sc-body textarea { + width: 100%; + min-height: 80px; + resize: vertical; + font-family: inherit; + } + .sc-raw { + border: 1px solid var(--border); + border-radius: 5px; + background: var(--bg); + } + .sc-raw summary { + padding: 6px 10px; + cursor: pointer; + font-size: 11px; + color: var(--text-dim); + user-select: none; + } + .sc-raw pre { + margin: 0; + padding: 8px 12px; + max-height: 240px; + overflow: auto; + font-size: 11px; + color: var(--text); + border-top: 1px solid var(--border); + } + .sc-footer { + display: flex; justify-content: flex-end; gap: 8px; + padding: 12px 18px; + border-top: 1px solid var(--border); + } + .sc-status { + flex: 1; align-self: center; + font-size: 11px; color: var(--text-mute); + } + .sc-status.error { color: #f85149; } + .sc-status.ok { color: #56d364; } + table.db-table tbody tr.clickable { cursor: pointer; } + table.db-table tbody tr.clickable:hover { background: var(--surface2); } + /* ── Section containers ── */ #section-live, #section-db { display: flex; @@ -806,6 +917,14 @@
+ +
@@ -1224,7 +1343,7 @@ let currentEvent = 0; let charts = {}; let geoAdcScale = 6.206; const DBL_REF = 2.9e-9; // 20 µPa in psi — reference pressure for dBL -const CHANNEL_COLORS = { Tran:'#58a6ff', Vert:'#3fb950', Long:'#d29922', Mic:'#bc8cff' }; +const CHANNEL_COLORS = { Tran:'#58a6ff', Vert:'#3fb950', Long:'#d29922', MicL:'#bc8cff' }; // ── Helpers ──────────────────────────────────────────────────────────────────── function api() { return document.getElementById('api-base').value.replace(/\/$/, ''); } @@ -1355,9 +1474,11 @@ async function connectUnit() { document.getElementById('device-bar').style.display = 'flex'; document.getElementById('monitor-panel').style.display = 'flex'; - document.getElementById('load-btn').disabled = eventList.length === 0; - document.getElementById('prev-btn').disabled = true; - document.getElementById('next-btn').disabled = eventList.length <= 1; + document.getElementById('load-btn').disabled = eventList.length === 0; + document.getElementById('save-btn').disabled = eventList.length === 0; + document.getElementById('download-btn').disabled = eventList.length === 0; + document.getElementById('prev-btn').disabled = true; + document.getElementById('next-btn').disabled = eventList.length <= 1; document.getElementById('cfg-read-btn').disabled = false; document.getElementById('cfg-write-btn').disabled = false; document.getElementById('ch-read-btn').disabled = false; @@ -1857,11 +1978,104 @@ async function loadWaveform() { document.getElementById('load-btn').disabled = false; } +// ── Persist current event to the SFM database + waveform store ────────────── +// +// Calls /device/event/{idx}/blastware_file, which on the server side: +// 1. Downloads the full waveform from the device (5A bulk stream) +// 2. Writes the Blastware-format event file into /waveforms// +// 3. Writes the .a5.pkl sidecar next to it (so the file can be regenerated) +// 4. Upserts a row into seismo_relay.db `events` table (dedup'd on serial+timestamp) +// +// We discard the response body — the side effects are what we want. The +// filename comes back in the Content-Disposition header for confirmation. +async function saveEventToDb() { + if (!devHost()) { setStatus('Enter device host first.', 'error'); return; } + const idx = currentEvent; + const btn = document.getElementById('save-btn'); + btn.disabled = true; + const orig = btn.textContent; + btn.textContent = '⏳ Saving…'; + setStatus(`Downloading event #${idx} and saving to DB…`, 'loading'); + + try { + const r = await fetch(`${api()}/device/event/${idx}/blastware_file?${deviceParams()}`); + if (!r.ok) { + const e = await r.json().catch(() => ({})); + throw new Error(e.detail || r.statusText); + } + // Pull the body to completion so the connection releases promptly, + // then drop it on the floor — we just want the server-side persist. + await r.blob(); + const filename = parseFilenameFromContentDisposition(r.headers.get('Content-Disposition')) + || `event ${idx}`; + setStatus(`Saved ${filename} to database + waveform store`, 'ok'); + } catch (e) { + setStatus(`Save error: ${e.message}`, 'error'); + } finally { + btn.disabled = false; + btn.textContent = orig; + } +} + +// ── Download the event file to the user's computer ────────────────────────── +// +// Uses a transient anchor + click trick so the browser surfaces its native +// "Save As" / Downloads behaviour. Same backend endpoint as Save to DB — +// the file is also persisted to the server store as a side effect. +function downloadEventFile() { + if (!devHost()) { setStatus('Enter device host first.', 'error'); return; } + const idx = currentEvent; + const url = `${api()}/device/event/${idx}/blastware_file?${deviceParams()}`; + setStatus(`Downloading event #${idx}…`, 'loading'); + // Hidden iframe avoids navigating away from the SPA. FastAPI's FileResponse + // sets Content-Disposition: attachment so the browser saves rather than displays. + const a = document.createElement('a'); + a.href = url; + a.style.display = 'none'; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + // We can't reliably detect when the browser finishes downloading; show a + // soft confirmation immediately. Errors will surface as a download failure + // dialog from the browser itself. + setTimeout(() => setStatus(`Download started for event #${idx} (also saved server-side)`, 'ok'), 250); +} + +function parseFilenameFromContentDisposition(header) { + if (!header) return null; + // RFC 6266: `attachment; filename="M529LKIQ.7M0W"` (or filename*=UTF-8''…) + const m = /filename\*?=(?:UTF-8'')?["']?([^"';]+)["']?/i.exec(header); + return m ? decodeURIComponent(m[1]) : null; +} + +// renderWaveform consumes the `sfm.plot.v1` JSON shape: +// { +// schema: "sfm.plot.v1", +// time_axis: { sample_rate, pretrig_samples, t0_ms, dt_ms, n_samples, ... }, +// channels: { Tran|Vert|Long|MicL: { unit, values, peak, peak_t_ms } }, +// geo_range, geo_full_scale_ips, trigger_ms, peak_values, ... +// } +// +// All sample arrays are already in PHYSICAL UNITS (in/s for geo, psi for +// mic) — the server applied the right scaling for the unit's geo_range. +// The viewer used to multiply ADC ints by `geoAdcScale / 32767` here, +// which silently scaled every plot ~38% too low because `geoAdcScale` is +// the in/s-per-V hardware constant, not the ADC-counts-to-velocity +// factor. No scaling happens client-side now. function renderWaveform(data) { - const sr = data.sample_rate || 1024; - const pretrig = data.pretrig_samples || 0; - const decoded = data.samples_decoded || 0; - const total = data.total_samples || decoded; + // Backward-compat shim: if we ever get the legacy shape from a stale + // cache, normalise it on the client so the viewer still works. + if (!data.schema && data.channels && Array.isArray(data.channels.Tran)) { + data = _legacyWaveformToPlotV1(data); + } + + const t = data.time_axis || {}; + const sr = t.sample_rate || 1024; + const pretrig = t.pretrig_samples || 0; + const total = t.total_samples || t.n_samples || 0; + const decoded = t.n_samples || 0; + const t0 = t.t0_ms ?? -(pretrig / sr * 1000); + const dt = t.dt_ms ?? (1000 / sr); const channels = data.channels || {}; // Status bar @@ -1869,70 +2083,83 @@ function renderWaveform(data) { bar.innerHTML = ''; bar.className = 'ok'; const ts = data.timestamp; - bar.textContent = ts ? `Event #${data.index} — ${ts.display} ` : `Event #${data.index} `; + // Title prefers `index` (live device, 0-based slot on the unit) and + // falls back to event_id (DB lookup) when index is absent. + const eventLabel = (data.index != null) ? `#${data.index}` : (data.event_id || ''); + bar.textContent = ts ? `Event ${eventLabel} — ${ts} ` : `Event ${eventLabel} `; addPill(`${data.record_type || '?'}`); addPill(`${sr} sps`); addPill(`${decoded.toLocaleString()} / ${total.toLocaleString()} samples`); addPill(`pretrig ${pretrig}`); - addPill(`${data.rectime_seconds ?? '?'} s`); + addPill(`${t.rectime_seconds ?? '?'} s`); + if (data.geo_range) addPill(`geo: ${data.geo_range} (${data.geo_full_scale_ips} in/s FS)`); + // Any record_type starting with "Waveform" is a viewable triggered + // event (the timestamp-header byte layout varies across firmware but + // doesn't change the sample stream). Only block when there's actually + // no waveform payload to plot. + const isWaveformLike = !!(data.record_type || '').match(/^Waveform/i); if (decoded === 0) { document.getElementById('empty-state').style.display = 'flex'; document.getElementById('empty-state').querySelector('p').textContent = - data.record_type === 'Waveform' + isWaveformLike ? 'No samples decoded — check server logs' - : `Record type "${data.record_type}" — waveform not supported yet`; + : `Record type "${data.record_type}" — not a waveform event`; document.getElementById('charts').style.display = 'none'; Object.values(charts).forEach(c => c.destroy()); charts = {}; return; } - const times = Array.from({length: decoded}, (_, i) => ((i - pretrig) / sr * 1000).toFixed(2)); + // Time axis: explicit ms values from t0_ms + i*dt_ms. More precise + // than the old (i - pretrig) / sr * 1000 since dt_ms came from the + // server with full float precision. + const times = Array.from({length: decoded}, (_, i) => (t0 + i * dt).toFixed(2)); document.getElementById('empty-state').style.display = 'none'; const chartsDiv = document.getElementById('charts'); chartsDiv.style.display = 'flex'; chartsDiv.innerHTML = ''; Object.values(charts).forEach(c => c.destroy()); charts = {}; - const micPeakPsi = data.peak_values?.micl_psi ?? null; - for (const [ch, color] of Object.entries(CHANNEL_COLORS)) { - const samples = channels[ch]; - if (!samples || samples.length === 0) continue; + const chData = channels[ch]; + if (!chData || !chData.values || chData.values.length === 0) continue; - const isGeo = ch !== 'Mic'; - let plotData, peakLabel, yUnit, ttFmt, tickFmt; + const plotData = chData.values; + const unit = chData.unit || (ch === 'MicL' ? 'psi' : 'in/s'); + const peak = chData.peak; + const peakTms = chData.peak_t_ms; - if (isGeo) { - const scale = geoAdcScale / 32767; - plotData = samples.map(s => s * scale); - // Use the device-recorded peak from the 0C waveform record — authoritative - // and matches Blastware. Computing from raw samples can catch rogue - // near-full-scale values from decoding artifacts. - const peakKey = { Tran:'tran_in_s', Vert:'vert_in_s', Long:'long_in_s' }[ch]; - const devicePeak = data.peak_values?.[peakKey] ?? null; - peakLabel = devicePeak != null ? `${devicePeak.toFixed(5)} in/s` : `${Math.max(...plotData.map(Math.abs)).toFixed(5)} in/s`; - yUnit = 'in/s'; - ttFmt = v => `${ch}: ${v.toFixed(5)} in/s`; - tickFmt = v => v.toFixed(4); + let peakLabel, ttFmt, tickFmt; + if (unit === 'psi') { + const peakDbl = (peak != null && peak > 0) + ? 20 * Math.log10(peak / DBL_REF) : -Infinity; + peakLabel = `${peakDbl.toFixed(1)} dBL (${peak != null ? peak.toExponential(2) : '—'} psi)`; + ttFmt = v => `${v.toExponential(3)} psi`; + tickFmt = v => v.toExponential(1); } else { - const peakCounts = Math.max(...samples.map(Math.abs)); - const micScale = (micPeakPsi !== null && peakCounts > 0) ? Math.abs(micPeakPsi) / peakCounts : 1.0; - plotData = samples.map(s => s * micScale); - const peakPsi = Math.max(...plotData.map(Math.abs)); - const peakDbl = peakPsi > 0 ? 20 * Math.log10(peakPsi / DBL_REF) : -Infinity; - peakLabel = `${peakDbl.toFixed(1)} dBL`; - yUnit = 'psi'; - ttFmt = v => `${v.toExponential(3)} psi`; - tickFmt = v => v.toExponential(1); + peakLabel = peak != null ? `${peak.toFixed(5)} in/s` : '—'; + ttFmt = v => `${ch}: ${v.toFixed(5)} in/s`; + tickFmt = v => v.toFixed(4); } + // Downsample for display when the chart would otherwise have to + // rasterise tens of thousands of points. Uses every-Nth — fine for + // monthly-summary glance work; analysis tools should use the .h5 file. const MAX_PTS = 4000; - let rTimes = times, rData = plotData; + let rTimes = times, rData = plotData, peakPlotIdx = -1; if (plotData.length > MAX_PTS) { const step = Math.ceil(plotData.length / MAX_PTS); rTimes = times.filter((_, i) => i % step === 0); rData = plotData.filter((_, i) => i % step === 0); + // Try to keep the peak sample from being downsampled away. + if (peakTms != null) { + const exactIdx = Math.round((peakTms - t0) / dt); + if (exactIdx >= 0 && exactIdx < plotData.length) { + peakPlotIdx = Math.floor(exactIdx / step); + } + } + } else if (peakTms != null) { + peakPlotIdx = Math.round((peakTms - t0) / dt); } const wrap = document.createElement('div'); @@ -1960,27 +2187,94 @@ function renderWaveform(data) { }, scales: { x: { type: 'category', ticks: { color:'#484f58', maxTicksLimit:10, maxRotation:0, callback:(v,i) => rTimes[i]+' ms' }, grid: { color:'#21262d' } }, - y: { ticks: { color:'#484f58', maxTicksLimit:5, callback: v => tickFmt(v) }, grid: { color:'#21262d' }, title: { display:true, text:yUnit, color:'#484f58', font:{size:10} } }, + y: { ticks: { color:'#484f58', maxTicksLimit:5, callback: v => tickFmt(v) }, grid: { color:'#21262d' }, title: { display:true, text:unit, color:'#484f58', font:{size:10} } }, }, }, plugins: [{ - id: 'triggerLine', + id: 'triggerAndPeakMarkers', afterDraw(chart) { - const zeroIdx = rTimes.findIndex(t => parseFloat(t) >= 0); - if (zeroIdx < 0) return; const { ctx, scales: {x, y} } = chart; - const px = x.getPixelForValue(zeroIdx); - ctx.save(); - ctx.beginPath(); - ctx.moveTo(px, y.top); ctx.lineTo(px, y.bottom); - ctx.strokeStyle = 'rgba(248,81,73,0.7)'; ctx.lineWidth = 1.5; - ctx.setLineDash([4, 3]); ctx.stroke(); ctx.restore(); + // Trigger line at t = trigger_ms (typically 0). + const triggerMs = data.trigger_ms ?? 0; + const zeroIdx = rTimes.findIndex(s => parseFloat(s) >= triggerMs); + if (zeroIdx >= 0) { + const px = x.getPixelForValue(zeroIdx); + ctx.save(); + ctx.beginPath(); + ctx.moveTo(px, y.top); ctx.lineTo(px, y.bottom); + ctx.strokeStyle = 'rgba(248,81,73,0.7)'; ctx.lineWidth = 1.5; + ctx.setLineDash([4, 3]); ctx.stroke(); ctx.restore(); + } + // Peak marker (dot at the channel's peak sample). + if (peakPlotIdx >= 0 && peakPlotIdx < rData.length) { + const px = x.getPixelForValue(peakPlotIdx); + const py = y.getPixelForValue(rData[peakPlotIdx]); + ctx.save(); + ctx.beginPath(); + ctx.arc(px, py, 3.2, 0, Math.PI * 2); + ctx.fillStyle = color; + ctx.strokeStyle = '#0d1117'; + ctx.lineWidth = 1.5; + ctx.fill(); ctx.stroke(); + ctx.restore(); + } }, }], }); } } +// One-time normaliser for the legacy /device/event/{idx}/waveform shape +// (samples as int16 ADC counts in `channels.{ch}: [...]`). Bridges the +// gap if a stale cache or non-upgraded server returns the old format. +function _legacyWaveformToPlotV1(data) { + const sr = data.sample_rate || 1024; + const pretrig = data.pretrig_samples || 0; + const decoded = data.samples_decoded || 0; + const total = data.total_samples || decoded; + const dt = 1000 / sr; + const t0 = -pretrig * dt; + + // Apply the CORRECT scale: 10 in/s full-scale for Normal range. + const geoFs = 10.0; + const geoScale = geoFs / 32768; + const ch = data.channels || {}; + const micPeak = data.peak_values?.micl_psi ?? null; + const micPeakCounts = (ch.MicL || ch.Mic || []).reduce((m, v) => Math.max(m, Math.abs(v)), 0); + const micScale = (micPeak != null && micPeakCounts > 0) ? micPeak / micPeakCounts : 1.0; + + const mkGeo = (counts) => { + if (!counts || !counts.length) return []; + return counts.map(c => c * geoScale); + }; + const mkMic = (counts) => { + if (!counts || !counts.length) return []; + return counts.map(c => c * micScale); + }; + + return { + schema: 'sfm.plot.v1', + event_id: data.event_id || null, + serial: data.serial || '', + timestamp: data.timestamp?.display || data.timestamp || '', + record_type: data.record_type, + waveform_key: null, + time_axis: { + sample_rate: sr, pretrig_samples: pretrig, total_samples: total, + n_samples: decoded, t0_ms: t0, dt_ms: dt, + rectime_seconds: data.rectime_seconds || 0, + }, + geo_range: 'normal', geo_full_scale_ips: geoFs, trigger_ms: 0, + channels: { + Tran: { unit:'in/s', values: mkGeo(ch.Tran), peak: data.peak_values?.tran_in_s ?? null, peak_t_ms: null }, + Vert: { unit:'in/s', values: mkGeo(ch.Vert), peak: data.peak_values?.vert_in_s ?? null, peak_t_ms: null }, + Long: { unit:'in/s', values: mkGeo(ch.Long), peak: data.peak_values?.long_in_s ?? null, peak_t_ms: null }, + MicL: { unit:'psi', values: mkMic(ch.MicL || ch.Mic), peak: micPeak, peak_t_ms: null }, + }, + peak_values: data.peak_values || {}, + }; +} + // ── DB tabs ──────────────────────────────────────────────────────────────────── let histLoaded = false; let unitsLoaded = false; @@ -2082,7 +2376,9 @@ async function loadHistory() { for (const ev of events) { const tr = document.createElement('tr'); const pvs = ev.peak_vector_sum; - const maxPPV = Math.max(ev.tran_ppv ?? 0, ev.vert_ppv ?? 0, ev.long_ppv ?? 0); + tr.classList.add('clickable'); + tr.title = 'Click to review (open sidecar editor)'; + tr.dataset.eventId = ev.id; tr.innerHTML = ` ${_fmtTs(ev.timestamp)} ${ev.serial ?? '—'} @@ -2095,24 +2391,157 @@ async function loadHistory() { ${ev.client ?? '—'} ${ev.record_type ?? '—'} ${ev.waveform_key ?? '—'} - ${ev.false_trigger ? 'FALSE' : ``} + ${ev.false_trigger ? 'FALSE' : ''} `; + tr.addEventListener('click', () => openSidecarModal(ev.id)); tbody.appendChild(tr); } } -async function toggleFalseTrigger(id, btn) { - btn.disabled = true; +// ── Sidecar review modal ─────────────────────────────────────────────────────── +// +// Opens on row click in the History table. Loads the .sfm.json sidecar +// for the event via GET /db/events/{id}/sidecar, lets the user toggle +// false_trigger / edit notes / set reviewer, and saves via PATCH on the +// same URL. This mirrors the workflow used by the monthly vibration +// summary process — most of the rich review UX lives in Terra-View; +// this is the SFM-standalone equivalent for testing / direct edits. + +let _scCurrentEventId = null; +let _scCurrentSidecar = null; + +async function openSidecarModal(eventId) { + _scCurrentEventId = eventId; + _scCurrentSidecar = null; + document.getElementById('sc-status').textContent = 'Loading sidecar…'; + document.getElementById('sc-status').className = 'sc-status'; + document.getElementById('sc-overlay').classList.add('visible'); + // Reset edit fields + document.getElementById('sc-edit-ft').checked = false; + document.getElementById('sc-edit-reviewer').value = ''; + document.getElementById('sc-edit-notes').value = ''; + try { - const r = await fetch(`${api()}/db/events/${id}/false_trigger?value=true`, { method: 'PATCH' }); - if (!r.ok) throw new Error(r.statusText); - btn.outerHTML = 'FALSE'; + const r = await fetch(`${api()}/db/events/${eventId}/sidecar`); + if (!r.ok) { + const e = await r.json().catch(() => ({})); + throw new Error(e.detail || r.statusText); + } + const data = await r.json(); + _scCurrentSidecar = data; + _renderSidecar(data); + document.getElementById('sc-status').textContent = ''; } catch (e) { - btn.disabled = false; - alert(`Failed to flag: ${e.message}`); + document.getElementById('sc-status').className = 'sc-status error'; + document.getElementById('sc-status').textContent = `Load failed: ${e.message}`; } } +function _renderSidecar(data) { + const ev = data.event || {}; + const pv = data.peak_values || {}; + const pi = data.project_info || {}; + const bw = data.blastware || {}; + const src = data.source || {}; + const rev = data.review || {}; + + document.getElementById('sc-title').textContent = `Event — ${bw.filename || ev.waveform_key || 'unknown'}`; + + const fmtPpv = v => (v == null ? '—' : Number(v).toFixed(5) + ' in/s'); + const fmtMic = v => { + if (v == null || v <= 0) return '—'; + const dbl = 20 * Math.log10(v / DBL_REF); + return `${dbl.toFixed(1)} dBL (${v.toExponential(2)} psi)`; + }; + + document.getElementById('sc-f-serial').textContent = ev.serial || '—'; + document.getElementById('sc-f-ts').textContent = ev.timestamp || '—'; + document.getElementById('sc-f-rt').textContent = ev.record_type || '—'; + document.getElementById('sc-f-sr').textContent = (ev.sample_rate ?? '—') + (ev.sample_rate ? ' sps' : ''); + document.getElementById('sc-f-key').textContent = ev.waveform_key || '—'; + + document.getElementById('sc-f-tran').textContent = fmtPpv(pv.transverse); + document.getElementById('sc-f-vert').textContent = fmtPpv(pv.vertical); + document.getElementById('sc-f-long').textContent = fmtPpv(pv.longitudinal); + document.getElementById('sc-f-pvs').textContent = fmtPpv(pv.vector_sum); + document.getElementById('sc-f-mic').textContent = fmtMic(pv.mic_psi); + + document.getElementById('sc-f-project').textContent = pi.project || '—'; + document.getElementById('sc-f-client').textContent = pi.client || '—'; + document.getElementById('sc-f-operator').textContent = pi.operator || '—'; + document.getElementById('sc-f-loc').textContent = pi.sensor_location || '—'; + + document.getElementById('sc-f-bw').textContent = bw.filename || '—'; + document.getElementById('sc-f-bwsize').textContent = bw.filesize != null ? `${bw.filesize} bytes` : '—'; + document.getElementById('sc-f-sha').textContent = bw.sha256 || '—'; + document.getElementById('sc-f-src').textContent = src.kind || '—'; + document.getElementById('sc-f-cap').textContent = src.captured_at || '—'; + + document.getElementById('sc-edit-ft').checked = !!rev.false_trigger; + document.getElementById('sc-edit-reviewer').value = rev.reviewer || ''; + document.getElementById('sc-edit-notes').value = rev.notes || ''; + + document.getElementById('sc-raw-json').textContent = JSON.stringify(data, null, 2); +} + +function closeSidecarModal() { + document.getElementById('sc-overlay').classList.remove('visible'); + _scCurrentEventId = null; + _scCurrentSidecar = null; +} + +function onSidecarOverlayClick(e) { + // Click on the dimmed backdrop (but NOT on the modal itself) closes. + if (e.target.id === 'sc-overlay') closeSidecarModal(); +} + +async function saveSidecarReview() { + if (!_scCurrentEventId) return; + const btn = document.getElementById('sc-save-btn'); + const status = document.getElementById('sc-status'); + btn.disabled = true; + status.className = 'sc-status'; + status.textContent = 'Saving…'; + + const review = { + false_trigger: document.getElementById('sc-edit-ft').checked, + reviewer: document.getElementById('sc-edit-reviewer').value.trim() || null, + notes: document.getElementById('sc-edit-notes').value, + }; + + try { + const r = await fetch(`${api()}/db/events/${_scCurrentEventId}/sidecar`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ review }), + }); + if (!r.ok) { + const e = await r.json().catch(() => ({})); + throw new Error(e.detail || r.statusText); + } + const updated = await r.json(); + _scCurrentSidecar = updated; + _renderSidecar(updated); + status.className = 'sc-status ok'; + status.textContent = 'Saved.'; + // Refresh the History table so the false_trigger badge reflects the change. + if (typeof loadHistory === 'function') loadHistory(); + setTimeout(closeSidecarModal, 600); + } catch (e) { + status.className = 'sc-status error'; + status.textContent = `Save failed: ${e.message}`; + } finally { + btn.disabled = false; + } +} + +// Esc closes the modal. +document.addEventListener('keydown', (e) => { + if (e.key === 'Escape' && document.getElementById('sc-overlay').classList.contains('visible')) { + closeSidecarModal(); + } +}); + // ── Units tab ────────────────────────────────────────────────────────────────── async function loadUnits() { unitsLoaded = true; @@ -2274,5 +2703,81 @@ document.getElementById('api-base').value = window.location.origin; document.getElementById(id)?.addEventListener('keydown', e => { if (e.key === 'Enter') connectUnit(); }); }); + + +
+
+
+

Event

+ +
+
+
+

Event

+
+
Serial
+
Timestamp
+
Record type
+
Sample rate
+
Waveform key
+
+
+
+

Peaks

+
+
Tran
+
Vert
+
Long
+
PVS
+
Mic
+
+
+
+

Project

+
+
Project
+
Client
+
Operator
+
Location
+
+
+
+

Source / files

+
+
BW filename
+
BW filesize
+
BW sha256
+
Source kind
+
Captured at
+
+
+
+

Review (editable)

+
+ + +
+
+ + +
+ + +
+
+ Raw sidecar JSON (read-only peek) +

+      
+
+ +
+
+ diff --git a/sfm/waveform_store.py b/sfm/waveform_store.py index 83216f8..8d39032 100644 --- a/sfm/waveform_store.py +++ b/sfm/waveform_store.py @@ -1,34 +1,46 @@ """ sfm/waveform_store.py — On-disk store for Blastware-format event files. -Layout (flat per-serial): +Layout (flat per-serial, four files per event): - // ← event file (Blastware-readable binary) + // ← event file (BW-readable binary) //.a5.pkl ← pickled list of A5 S3Frame dicts + //.h5 ← clean waveform arrays (HDF5) + //.sfm.json ← modern sidecar (peaks, project, + review state, extensions) `` is whatever `minimateplus.blastware_file.blastware_filename` -produces for the event. The extension is NOT a fixed type tag — it encodes -the event timestamp (`AB0T` format: 2-char base-36 of `total_seconds % -1296`, literal `0`, then `W`=Full Waveform / `H`=Full Histogram for ACH -downloads, or 3-char `AB0` for direct/manual downloads). Every event's -filename therefore contains its own timestamp + record-type fingerprint and -collisions across the same physical event don't occur. +produces for the event. The extension is NOT a fixed type tag — it +encodes the event timestamp (`AB0T` format). -The `.a5.pkl` sidecar lets the event file be regenerated later if the -encoder changes — captures the raw 5A frame stream as serializable dicts so -the schema isn't tied to the `S3Frame` dataclass layout. +Roles: + - BW binary: what Blastware reads. Untouched. The user-facing review + waveform viewer. + - .a5.pkl: regenerative source. Lets the BW binary be rebuilt + byte-for-byte if the encoder changes. Never delete. + - .h5: clean per-channel waveform arrays in physical units (in/s for + geo, psi for mic) plus event metadata. Canonical format for + downstream analysis tools and the `/device/event/{idx}/waveform` + endpoint's plot-JSON output. + - .sfm.json: small, queryable metadata + review state. SQL + `events.false_trigger` is a derived index kept in sync via + `patch_sidecar()`. """ from __future__ import annotations +import datetime import logging import pickle +import shutil from pathlib import Path from typing import Optional +from minimateplus import event_file_io from minimateplus.blastware_file import blastware_filename, write_blastware_file from minimateplus.framing import S3Frame from minimateplus.models import Event +from sfm import event_hdf5 log = logging.getLogger("sfm.waveform_store") @@ -80,10 +92,22 @@ class WaveformStore: return d def paths_for(self, serial: str, filename: str) -> tuple[Path, Path]: - """Return (blastware_path, a5_pickle_path) for a given serial+filename.""" + """Return (blastware_path, a5_pickle_path) for a given serial+filename. + + For the sidecar path use `sidecar_path_for()` — kept separate so + existing callers don't need to unpack a 3-tuple. + """ d = self._serial_dir(serial) return d / filename, d / f"{filename}.a5.pkl" + def sidecar_path_for(self, serial: str, filename: str) -> Path: + """Return absolute path to the .sfm.json sidecar for a given event.""" + return self._serial_dir(serial) / f"{filename}.sfm.json" + + def hdf5_path_for(self, serial: str, filename: str) -> Path: + """Return absolute path to the .h5 clean-waveform file for a given event.""" + return self._serial_dir(serial) / f"{filename}.h5" + def open_blastware(self, serial: str, filename: str) -> Optional[Path]: """Return absolute path to an existing event file or None.""" bw_path, _ = self.paths_for(serial, filename) @@ -96,23 +120,43 @@ class WaveformStore: ev: Event, serial: str, a5_frames: list[S3Frame], + *, + source_kind: str = "sfm-live", + geo_range = "normal", ) -> dict: """ - Write the event file and its .a5.pkl sidecar for one event. + Write all four event-file artifacts for one event: + - BW binary + - .a5.pkl raw A5 frame pickle + - .h5 clean waveform (HDF5) + - .sfm.json modern sidecar (metadata + review) Returns a record dict suitable for persisting alongside the DB row: { "filename": "M529LKIQ.7M0W", "filesize": 8708, + "sha256": "a1b2c3...", "a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl", + "hdf5_filename": "M529LKIQ.7M0W.h5", + "sidecar_filename": "M529LKIQ.7M0W.sfm.json", } - The exact extension is timestamp-encoded per event (see - `minimateplus.blastware_file.blastware_filename`). + `source_kind` flows into `sidecar.source.kind` — callers should + pass "sfm-live" (default) for the live endpoint and "sfm-ach" for + the ACH ingestion path. BW-imported events use save_imported_bw() + instead. - Idempotent: if the event file already exists, it is overwritten with - the freshly-encoded version (same bytes for the same a5_frames). + `geo_range` controls the ADC-counts → in/s scaling in the HDF5 + file ("normal" = 10 in/s FS, "sensitive" = 1.25 in/s FS). + Defaults to "normal" — callers with compliance-config access + should pass the actual unit setting so the saved samples are in + the right units. + + Idempotent: if the event file already exists, it is overwritten + with the freshly-encoded version (same bytes for the same + a5_frames) and the sidecar's review block is preserved across + re-saves. """ if not a5_frames: raise ValueError("WaveformStore.save: a5_frames is empty") @@ -121,17 +165,18 @@ class WaveformStore: filename = blastware_filename(ev, serial) bw_path, a5_path = self.paths_for(serial, filename) + sidecar_path = self.sidecar_path_for(serial, filename) + hdf5_path = self.hdf5_path_for(serial, filename) - # 1. encode the event file - # Delete any stale file at this path so partial writes never leak - # trailing bytes from a previous larger file (matches the live - # endpoint's defensive unlink). + # 1. encode the event file (defensive unlink prevents trailing-byte + # leaks from a previous larger file on synced/odd filesystems). try: bw_path.unlink() except FileNotFoundError: pass write_blastware_file(ev, a5_frames, bw_path) filesize = bw_path.stat().st_size + sha256 = event_file_io.file_sha256(bw_path) # 2. write the .a5.pkl sidecar try: @@ -145,14 +190,176 @@ class WaveformStore: with a5_path.open("wb") as fp: pickle.dump(payload, fp, protocol=pickle.HIGHEST_PROTOCOL) + # 3. write the .h5 clean-waveform file (samples in physical units). + # Best-effort: a write failure shouldn't sink the rest of the save + # (the HDF5 can be regenerated later from the .a5.pkl). + hdf5_filename: Optional[str] = None + try: + event_hdf5.write_event_hdf5( + hdf5_path, ev, + serial=serial, + geo_range=geo_range, + source_kind=source_kind, + ) + hdf5_filename = hdf5_path.name + except Exception as exc: + log.warning( + "save: HDF5 write failed for %s: %s — continuing without .h5", + hdf5_path, exc, + ) + + # 4. write the .sfm.json sidecar. Preserve any existing review + # block + extensions across re-saves so user edits aren't lost + # when the same event is re-downloaded (e.g. via Force refresh). + existing_review = None + existing_extensions = None + if sidecar_path.exists(): + try: + old = event_file_io.read_sidecar(sidecar_path) + existing_review = old.get("review") + existing_extensions = old.get("extensions") + except Exception as exc: + log.warning( + "save: existing sidecar at %s unreadable (%s); overwriting", + sidecar_path, exc, + ) + + sidecar = event_file_io.event_to_sidecar_dict( + ev, + serial=serial, + blastware_filename=filename, + blastware_filesize=filesize, + blastware_sha256=sha256, + source_kind=source_kind, + a5_pickle_filename=a5_path.name, + review=existing_review, + extensions=existing_extensions, + ) + event_file_io.write_sidecar(sidecar_path, sidecar) + log.info( - "WaveformStore.save serial=%s filename=%s filesize=%d frames=%d", + "WaveformStore.save serial=%s filename=%s filesize=%d frames=%d " + "h5=%s sidecar=%s", serial, filename, filesize, len(a5_frames), + hdf5_filename or "(skipped)", sidecar_path.name, ) return { "filename": filename, "filesize": filesize, + "sha256": sha256, "a5_pickle_filename": a5_path.name, + "hdf5_filename": hdf5_filename, + "sidecar_filename": sidecar_path.name, + } + + def save_imported_bw( + self, + bw_bytes: bytes, + source_path: Path, + *, + serial_hint: Optional[str] = None, + ) -> tuple[Event, dict]: + """ + Ingest a Blastware event file produced by an external tool + (Blastware's own ACH, manual download, etc.) where the source A5 + frames aren't available. + + Workflow: + 1. Parse the bytes via event_file_io.read_blastware_file (writes + a temp file to do that, since the parser takes a path). + 2. Resolve serial from BW filename (`

...`) or use + serial_hint. Falls back to "UNKNOWN". + 3. Copy the BW bytes verbatim into //. + 4. Write the .sfm.json sidecar with source.kind = "bw-import" + and a5_pickle_filename = None. Does NOT write a .a5.pkl + (no A5 source available; byte-for-byte regeneration not + possible — the on-disk BW file IS the byte-for-byte source). + + Returns (event, record_dict) so callers can both insert into + SeismoDb and surface the parsed Event. + """ + # Stash the bytes to a temp path so read_blastware_file (path-based) + # can parse without us duplicating its logic. + import tempfile + with tempfile.NamedTemporaryFile(suffix=".bw", delete=False) as tmp: + tmp.write(bw_bytes) + tmp_path = Path(tmp.name) + try: + ev = event_file_io.read_blastware_file(tmp_path) + finally: + try: + tmp_path.unlink() + except FileNotFoundError: + pass + + # Resolve serial. blastware_filename derives a 4-char prefix from + # the numeric serial (e.g. BE11529 → M529); we go the other way + # via the source filename if a hint wasn't given. + serial = serial_hint or _serial_from_bw_filename(source_path.name) or "UNKNOWN" + + # Use the source filename verbatim — it already encodes timestamp + # + record type per BW's AB0T scheme, and we want to preserve it + # so the file BW knows about can be opened back in BW. + filename = source_path.name + bw_path = self._serial_dir(serial) / filename + + # 1. copy bytes + bw_path.write_bytes(bw_bytes) + filesize = bw_path.stat().st_size + sha256 = event_file_io.file_sha256(bw_path) + + # 2. write the .h5 clean-waveform file from the parsed Event. + # Note: peaks here are computed from raw samples (the BW file + # doesn't carry the device-authoritative 0C peaks). Best-effort. + hdf5_path = self.hdf5_path_for(serial, filename) + hdf5_filename: Optional[str] = None + try: + event_hdf5.write_event_hdf5( + hdf5_path, ev, + serial=serial, + geo_range="normal", # BW file doesn't carry the range; assume Normal + source_kind="bw-import", + ) + hdf5_filename = hdf5_path.name + except Exception as exc: + log.warning( + "save_imported_bw: HDF5 write failed for %s: %s — continuing", + hdf5_path, exc, + ) + + # 3. write sidecar with source.kind = bw-import + sidecar_path = self.sidecar_path_for(serial, filename) + existing_review = None + if sidecar_path.exists(): + try: + existing_review = event_file_io.read_sidecar(sidecar_path).get("review") + except Exception: + pass + + sidecar = event_file_io.event_to_sidecar_dict( + ev, + serial=serial, + blastware_filename=filename, + blastware_filesize=filesize, + blastware_sha256=sha256, + source_kind="bw-import", + a5_pickle_filename=None, + review=existing_review, + ) + event_file_io.write_sidecar(sidecar_path, sidecar) + + log.info( + "WaveformStore.save_imported_bw serial=%s filename=%s filesize=%d " + "h5=%s (no .a5.pkl — A5 source unavailable for BW-imported files)", + serial, filename, filesize, hdf5_filename or "(skipped)", + ) + return ev, { + "filename": filename, + "filesize": filesize, + "sha256": sha256, + "a5_pickle_filename": None, + "hdf5_filename": hdf5_filename, + "sidecar_filename": sidecar_path.name, } def load_a5(self, serial: str, filename: str) -> Optional[list[S3Frame]]: @@ -169,3 +376,71 @@ class WaveformStore: log.warning("WaveformStore.load_a5: malformed sidecar at %s", a5_path) return None return [_dict_to_frame(d) for d in payload["frames"]] + + # ── modern .sfm.json sidecar accessors ────────────────────────────────────── + + def load_sidecar(self, serial: str, filename: str) -> Optional[dict]: + """Return the parsed .sfm.json sidecar dict, or None if missing.""" + path = self.sidecar_path_for(serial, filename) + if not path.exists(): + return None + try: + return event_file_io.read_sidecar(path) + except Exception as exc: + log.warning("load_sidecar: failed to read %s: %s", path, exc) + return None + + def patch_sidecar( + self, + serial: str, + filename: str, + *, + review: Optional[dict] = None, + extensions: Optional[dict] = None, + reviewer_now: bool = True, + ) -> Optional[dict]: + """ + JSON-merge-patch the .sfm.json sidecar's review/extensions blocks. + Returns the new full dict, or None if the sidecar doesn't exist. + """ + path = self.sidecar_path_for(serial, filename) + if not path.exists(): + return None + return event_file_io.patch_sidecar( + path, + review=review, + extensions=extensions, + reviewer_now=reviewer_now, + ) + + +# ── helpers ───────────────────────────────────────────────────────────────────── + +def _serial_from_bw_filename(name: str) -> Optional[str]: + """ + Reverse of `blastware_filename`'s serial-prefix encoding. + + BW filename format (V10.72): `

.` + where P = chr(ord('B') + floor(serial // 1000)) + and serial3 = f"{serial % 1000:03d}". + + Examples (from CLAUDE.md verification archive): + P036... → BE14036 H907... → BE6907 + M529... → BE11529 T003... → BE18003 + + Returns the inferred BE-prefix serial (e.g. "BE11529") or None when + the filename doesn't match the expected pattern. + """ + if not name: + return None + # First letter encodes the thousands group; next 3 chars encode the + # last 3 digits of the serial. + base = name.split(".", 1)[0] + if len(base) < 4 or not base[0].isalpha() or not base[1:4].isdigit(): + return None + prefix_letter = base[0].upper() + if prefix_letter < "B": + return None + thousands = ord(prefix_letter) - ord("B") + serial_num = thousands * 1000 + int(base[1:4]) + return f"BE{serial_num}" diff --git a/tests/test_event_file_io.py b/tests/test_event_file_io.py new file mode 100644 index 0000000..806d64f --- /dev/null +++ b/tests/test_event_file_io.py @@ -0,0 +1,348 @@ +""" +test_event_file_io.py — sidecar write/read/patch round-trips, +WaveformStore sidecar integration, and the BW-import path. + +Run: + python tests/test_event_file_io.py +""" + +from __future__ import annotations + +import json +import os +import sys +import tempfile +from pathlib import Path + +try: + import pytest +except ImportError: + pytest = None # type: ignore + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from minimateplus import event_file_io +from minimateplus.framing import S3Frame +from minimateplus.models import Event, Timestamp + + +# ── Fixtures shared with test_waveform_store.py ─────────────────────────────── + + +def _make_synthetic_event() -> tuple[Event, list[S3Frame]]: + """Same shape as tests/test_waveform_store.py — minimum viable Event + + A5 stream that makes write_blastware_file emit a parseable file. + + STRT is exactly 21 bytes; rectime_seconds lands at byte 18 to match + `_decode_a5_waveform`'s expected layout (which is also what + `read_blastware_file()` reads back).""" + key4 = bytes.fromhex("01110000") + rectime = 3 + strt = bytearray(21) + strt[0:4] = b"STRT" + strt[4:6] = b"\xff\xfe" + strt[6:10] = key4 # end_key (per data[23:27] in CLAUDE.md) + strt[10:14] = key4 # start_key (per data[27:31]) + strt[18] = rectime + strt = bytes(strt) + + probe_data = bytes(7) + strt + bytes(32) + probe = S3Frame(sub=0xA5, page_hi=0x10, page_lo=0x00, data=probe_data, + checksum_valid=True, chk_byte=0x00) + + sample = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x10, + data=bytes(7) + bytes(0x0200), checksum_valid=True, + chk_byte=0x00) + + # Build a valid 26-byte footer (0e 08 + ts1 + ts2 + 6 const + 2 crc) + # and embed it at the END of the terminator's contribution so + # write_blastware_file finds the real `0e 08` marker rather than + # falling back to slicing the last 26 bytes of zero garbage. + # ts byte order: [day][month][year_HI][year_LO][0x00][hour][min][sec] + footer = ( + b"\x0e\x08" + + bytes([6, 5, 0x07, 0xea, 0, 12, 34, 56]) # ts1 = 2026-05-06 12:34:56 + + bytes([6, 5, 0x07, 0xea, 0, 12, 35, 6]) # ts2 = ts1 + ~10s + + b"\x00\x01\x00\x02\x00\x00" + + b"\x00\x00" + ) + assert len(footer) == 26 + term_data = bytes(11) + bytes(38) + footer # 11 prefix + 38 pad + 26 footer = 75 + term = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x00, + data=term_data, checksum_valid=True, chk_byte=0x00) + + ev = Event(index=0) + ev._waveform_key = key4 + ev.timestamp = Timestamp( + raw=b"", flag=0x10, year=2026, unknown_byte=0, + month=5, day=6, hour=12, minute=34, second=56, + ) + ev.rectime_seconds = rectime + ev.record_type = "Waveform" + ev._a5_frames = [probe, sample, term] + return ev, [probe, sample, term] + + +# ── Sidecar write/read round-trip ───────────────────────────────────────────── + + +def test_event_to_sidecar_dict_shape(): + ev, _ = _make_synthetic_event() + d = event_file_io.event_to_sidecar_dict( + ev, + serial="BE11529", + blastware_filename="M529LKIQ.7M0W", + blastware_filesize=1024, + blastware_sha256="abcd" * 16, + source_kind="sfm-live", + a5_pickle_filename="M529LKIQ.7M0W.a5.pkl", + ) + + assert d["schema_version"] == event_file_io.SCHEMA_VERSION + assert d["kind"] == event_file_io.SIDECAR_KIND + assert d["event"]["serial"] == "BE11529" + assert d["event"]["timestamp"] == "2026-05-06T12:34:56" + assert d["event"]["waveform_key"] == "01110000" + assert d["blastware"]["sha256"] == "abcd" * 16 + assert d["source"]["kind"] == "sfm-live" + assert d["review"] == { + "false_trigger": False, "reviewer": None, + "reviewed_at": None, "notes": "", + } + assert d["extensions"] == {} + + +def test_sidecar_write_and_read_round_trip(tmp_path: Path): + ev, _ = _make_synthetic_event() + path = tmp_path / "M529LKIQ.7M0W.sfm.json" + src = event_file_io.event_to_sidecar_dict( + ev, serial="BE11529", + blastware_filename="M529LKIQ.7M0W", blastware_filesize=1024, + blastware_sha256="x" * 64, source_kind="sfm-ach", + ) + event_file_io.write_sidecar(path, src) + loaded = event_file_io.read_sidecar(path) + assert loaded["event"] == src["event"] + assert loaded["blastware"] == src["blastware"] + assert loaded["source"]["kind"] == "sfm-ach" + + +def test_sidecar_rejects_unsupported_schema_version(tmp_path: Path): + path = tmp_path / "future.sfm.json" + path.write_text(json.dumps({ + "schema_version": event_file_io.SCHEMA_VERSION + 1, + "kind": event_file_io.SIDECAR_KIND, + })) + try: + event_file_io.read_sidecar(path) + except ValueError as exc: + assert "schema_version" in str(exc) + return + raise AssertionError("read_sidecar should have rejected unsupported version") + + +def test_sidecar_extensions_survive_round_trip(tmp_path: Path): + """Forward-compat: unknown keys inside `extensions` survive a r/w cycle.""" + ev, _ = _make_synthetic_event() + path = tmp_path / "x.sfm.json" + d = event_file_io.event_to_sidecar_dict( + ev, serial="BE11529", + blastware_filename="X", blastware_filesize=0, blastware_sha256="", + source_kind="sfm-live", + extensions={"vendor.acme.gps": {"lat": 40.7, "lon": -74.0}}, + ) + event_file_io.write_sidecar(path, d) + back = event_file_io.read_sidecar(path) + assert back["extensions"]["vendor.acme.gps"]["lat"] == 40.7 + + +def test_sidecar_patch_review_stamps_reviewed_at(tmp_path: Path): + ev, _ = _make_synthetic_event() + path = tmp_path / "patch.sfm.json" + event_file_io.write_sidecar( + path, + event_file_io.event_to_sidecar_dict( + ev, serial="BE11529", + blastware_filename="X", blastware_filesize=0, blastware_sha256="", + source_kind="sfm-live", + ), + ) + new = event_file_io.patch_sidecar( + path, + review={"false_trigger": True, "notes": "truck thump", "reviewer": "brian"}, + ) + assert new["review"]["false_trigger"] is True + assert new["review"]["notes"] == "truck thump" + assert new["review"]["reviewer"] == "brian" + assert new["review"]["reviewed_at"], "reviewed_at must be auto-stamped" + + on_disk = event_file_io.read_sidecar(path) + assert on_disk["review"]["false_trigger"] is True + + +# ── WaveformStore integration ───────────────────────────────────────────────── + + +def test_waveform_store_save_writes_sidecar(tmp_path: Path): + from sfm.waveform_store import WaveformStore + + store = WaveformStore(tmp_path / "waveforms") + ev, frames = _make_synthetic_event() + rec = store.save(ev, serial="BE11529", a5_frames=frames, source_kind="sfm-live") + + assert rec["sidecar_filename"].endswith(".sfm.json") + assert rec["sha256"] and len(rec["sha256"]) == 64 + + sc = store.load_sidecar("BE11529", rec["filename"]) + assert sc is not None + assert sc["blastware"]["filename"] == rec["filename"] + assert sc["blastware"]["sha256"] == rec["sha256"] + assert sc["source"]["kind"] == "sfm-live" + # The .a5.pkl reference should match the actual filename on disk. + assert sc["source"]["a5_pickle_filename"] == rec["a5_pickle_filename"] + + +def test_waveform_store_save_preserves_review_across_resave(tmp_path: Path): + """Re-saving the same event must preserve a user's prior review edits.""" + from sfm.waveform_store import WaveformStore + + store = WaveformStore(tmp_path / "waveforms") + ev, frames = _make_synthetic_event() + rec = store.save(ev, serial="BE11529", a5_frames=frames) + + # User flips false_trigger and adds a note. + store.patch_sidecar( + "BE11529", rec["filename"], + review={"false_trigger": True, "notes": "hello"}, + ) + + # A second save (e.g. Force refresh re-download) must keep those edits. + store.save(ev, serial="BE11529", a5_frames=frames) + sc = store.load_sidecar("BE11529", rec["filename"]) + assert sc["review"]["false_trigger"] is True + assert sc["review"]["notes"] == "hello" + + +def test_waveform_store_patch_sidecar_returns_none_when_missing(tmp_path: Path): + from sfm.waveform_store import WaveformStore + + store = WaveformStore(tmp_path / "waveforms") + out = store.patch_sidecar("BE99999", "no.such.W", review={"notes": "x"}) + assert out is None + + +# ── DB integration: sidecar_filename column + update_event_review ───────────── + + +def test_seismodb_persists_sidecar_filename_and_review_sync(tmp_path: Path): + from sfm.database import SeismoDb + + db = SeismoDb(tmp_path / "seismo_relay.db") + ev, _ = _make_synthetic_event() + + rec = { + "filename": "M529LKIQ.7M0W", + "filesize": 8708, + "a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl", + "sidecar_filename": "M529LKIQ.7M0W.sfm.json", + } + inserted, _ = db.insert_events( + [ev], serial="BE11529", + waveform_records={ev._waveform_key.hex(): rec}, + ) + assert inserted == 1 + + rows = db.query_events(serial="BE11529") + row = rows[0] + assert row["sidecar_filename"] == rec["sidecar_filename"] + + # update_event_review keeps false_trigger column in sync with sidecar. + assert db.update_event_review(row["id"], {"false_trigger": True}) is True + again = db.get_event(row["id"]) + assert again["false_trigger"] == 1 + + # Empty review block (no false_trigger key) → no-op but row exists. + assert db.update_event_review(row["id"], {"notes": "x"}) is True + + +# ── BW-file reader (read_blastware_file) ───────────────────────────────────── + + +def test_read_blastware_file_round_trip(tmp_path: Path): + """write → read → key/timestamp/rectime survive.""" + from minimateplus.blastware_file import write_blastware_file, blastware_filename + + ev, frames = _make_synthetic_event() + bw_path = tmp_path / blastware_filename(ev, "BE11529") + write_blastware_file(ev, frames, bw_path) + + parsed = event_file_io.read_blastware_file(bw_path) + assert parsed._waveform_key == ev._waveform_key + assert parsed.rectime_seconds == ev.rectime_seconds + # Timestamp lands via the footer; year/month/day/hour/min/sec all survive. + assert parsed.timestamp is not None + assert parsed.timestamp.year == ev.timestamp.year + assert parsed.timestamp.month == ev.timestamp.month + assert parsed.timestamp.day == ev.timestamp.day + assert parsed.timestamp.hour == ev.timestamp.hour + assert parsed.timestamp.minute == ev.timestamp.minute + assert parsed.timestamp.second == ev.timestamp.second + # No A5 source recoverable. + assert parsed._a5_frames is None + # Peaks computed from samples (synthetic = zero samples → zero peaks). + assert parsed.peak_values is not None + assert parsed.peak_values.peak_vector_sum == 0.0 + + +def test_save_imported_bw_round_trip(tmp_path: Path): + """save_imported_bw stores a copy + sidecar with source.kind = bw-import.""" + from minimateplus.blastware_file import write_blastware_file, blastware_filename + from sfm.waveform_store import WaveformStore + + # Produce a BW file outside the store. + ev, frames = _make_synthetic_event() + fname = blastware_filename(ev, "BE11529") + src = tmp_path / fname + write_blastware_file(ev, frames, src) + + store = WaveformStore(tmp_path / "waveforms") + parsed_ev, rec = store.save_imported_bw(src.read_bytes(), source_path=src) + + assert rec["filename"] == fname + assert rec["a5_pickle_filename"] is None # no A5 source for BW imports + sc = store.load_sidecar("BE11529", fname) + assert sc is not None + assert sc["source"]["kind"] == "bw-import" + assert sc["source"]["a5_pickle_filename"] is None + # The stored binary should match the source byte-for-byte (we just copied). + stored_path = store.open_blastware("BE11529", fname) + assert stored_path is not None + assert stored_path.read_bytes() == src.read_bytes() + + +if __name__ == "__main__": + if pytest is not None: + pytest.main([__file__, "-v"]) + else: + import inspect + import traceback as _tb + + passed = failed = 0 + for _name, _fn in sorted(globals().items()): + if not _name.startswith("test_") or not callable(_fn): + continue + try: + _sig = inspect.signature(_fn) + if "tmp_path" in _sig.parameters: + with tempfile.TemporaryDirectory() as _td: + _fn(Path(_td)) + else: + _fn() + print(f"PASS {_name}") + passed += 1 + except Exception: + print(f"FAIL {_name}") + _tb.print_exc() + failed += 1 + print(f"\n{passed} passed, {failed} failed") + sys.exit(0 if failed == 0 else 1) diff --git a/tests/test_event_hdf5.py b/tests/test_event_hdf5.py new file mode 100644 index 0000000..86c3336 --- /dev/null +++ b/tests/test_event_hdf5.py @@ -0,0 +1,296 @@ +""" +test_event_hdf5.py — HDF5 codec round-trip + plot.v1 JSON shape sanity. + +Run: + python tests/test_event_hdf5.py +""" + +from __future__ import annotations + +import os +import sys +import tempfile +from pathlib import Path + +try: + import pytest +except ImportError: + pytest = None # type: ignore + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from minimateplus.framing import S3Frame +from minimateplus.models import Event, PeakValues, ProjectInfo, Timestamp +from sfm import event_hdf5 + + +# ── Fixtures ────────────────────────────────────────────────────────────────── + + +def _make_event_with_samples(n: int = 256) -> Event: + """An Event with synthetic int16 ADC samples on all four channels. + + Channel content: + - Tran: ramp from -16384 to +16383 (peak ≈ 5 in/s for Normal range) + - Vert: full-scale dirac at index n//2 (peak = 10 in/s) + - Long: zeros + - MicL: small ramp + Peak values are set on the event the way the device's 0C record + would supply them — used by the HDF5 writer for the mic per-count + factor. + """ + tran = [int((i / max(n - 1, 1)) * 32767 - 16384) for i in range(n)] + vert = [0] * n + if n: + vert[n // 2] = 32767 + long_ = [0] * n + mic = [int((i / max(n - 1, 1)) * 5000) for i in range(n)] + + ev = Event(index=0) + ev._waveform_key = bytes.fromhex("01110000") + ev.timestamp = Timestamp( + raw=b"", flag=0x10, + year=2026, unknown_byte=0, month=5, day=7, + hour=10, minute=0, second=0, + ) + ev.record_type = "Waveform" + ev.sample_rate = 1024 + ev.pretrig_samples = n // 4 + ev.total_samples = n + ev.rectime_seconds = n / 1024.0 + ev.raw_samples = {"Tran": tran, "Vert": vert, "Long": long_, "MicL": mic} + ev.peak_values = PeakValues( + tran=5.0, vert=10.0, long=0.0, + peak_vector_sum=10.0, micl=0.001, + ) + ev.project_info = ProjectInfo( + project="TestProj", client="TestClient", + operator="brian", sensor_location="loc-A", + ) + return ev + + +# ── HDF5 round-trip ─────────────────────────────────────────────────────────── + + +def test_hdf5_round_trip_preserves_metadata(tmp_path: Path): + ev = _make_event_with_samples() + h5 = tmp_path / "test.h5" + event_hdf5.write_event_hdf5( + h5, ev, serial="BE11529", geo_range="normal", + ) + + data = event_hdf5.read_event_hdf5(h5) + a = data["attrs"] + assert a["schema_version"] == event_hdf5.SCHEMA_VERSION + assert a["kind"] == event_hdf5.HDF5_KIND + assert a["serial"] == "BE11529" + assert a["waveform_key"] == "01110000" + assert a["sample_rate"] == 1024 + assert a["pretrig_samples"] == 64 + assert a["geo_range"] == "normal" + assert a["geo_full_scale_ips"] == 10.0 + assert a["project"] == "TestProj" + assert a["client"] == "TestClient" + assert a["operator"] == "brian" + # Float attrs may round-trip with tiny precision noise. + assert abs(a["peak_tran_ips"] - 5.0) < 1e-6 + assert abs(a["peak_vert_ips"] - 10.0) < 1e-6 + + +def test_hdf5_samples_in_physical_units_normal_range(tmp_path: Path): + """Vert hits ADC full-scale (32767) → with Normal range FS=10 in/s, + the HDF5 sample value should be ≈ 10 * 32767/32768 in/s.""" + ev = _make_event_with_samples() + h5 = tmp_path / "n.h5" + event_hdf5.write_event_hdf5(h5, ev, serial="BE11529", geo_range="normal") + data = event_hdf5.read_event_hdf5(h5) + + vert = data["samples"]["Vert"] + assert vert.dtype.name == "float32" + assert max(abs(v) for v in vert) > 9.99 # full-scale ≈ 10.0 + # The dirac was at n//2 → 32767 ADC counts. + expected_peak = 10.0 * 32767 / 32768 + assert abs(max(vert) - expected_peak) < 1e-3 + + +def test_hdf5_samples_in_physical_units_sensitive_range(tmp_path: Path): + """Same fixture but Sensitive range → full-scale 1.250 in/s.""" + ev = _make_event_with_samples() + h5 = tmp_path / "s.h5" + event_hdf5.write_event_hdf5(h5, ev, serial="BE11529", geo_range="sensitive") + data = event_hdf5.read_event_hdf5(h5) + + vert = data["samples"]["Vert"] + expected_peak = 1.250 * 32767 / 32768 + assert abs(max(vert) - expected_peak) < 1e-4 + + +def test_hdf5_includes_int16_samples(tmp_path: Path): + ev = _make_event_with_samples() + h5 = tmp_path / "i.h5" + event_hdf5.write_event_hdf5(h5, ev, serial="BE11529") + data = event_hdf5.read_event_hdf5(h5) + assert data["samples_int16"] is not None + assert "Tran" in data["samples_int16"] + assert data["samples_int16"]["Vert"].dtype.name == "int16" + + +def test_hdf5_rejects_unsupported_schema(tmp_path: Path): + """Round-tripping with a tampered schema_version raises ValueError.""" + import h5py + h5 = tmp_path / "future.h5" + with h5py.File(h5, "w") as f: + f.attrs["schema_version"] = 99 + f.attrs["kind"] = event_hdf5.HDF5_KIND + try: + event_hdf5.read_event_hdf5(h5) + except ValueError as exc: + assert "schema_version" in str(exc) + return + raise AssertionError("read_event_hdf5 should reject unsupported schema_version") + + +# ── plot.v1 JSON shape ──────────────────────────────────────────────────────── + + +def test_event_to_plot_json_shape(): + ev = _make_event_with_samples() + j = event_hdf5.event_to_plot_json(ev, serial="BE11529", geo_range="normal") + assert j["schema"] == "sfm.plot.v1" + assert j["serial"] == "BE11529" + assert j["geo_range"] == "normal" + assert j["geo_full_scale_ips"] == 10.0 + assert j["trigger_ms"] == 0.0 + + t = j["time_axis"] + assert t["sample_rate"] == 1024 + assert t["pretrig_samples"] == 64 + assert t["n_samples"] == 256 + # t0_ms = -pretrig * dt_ms = -64 * (1000/1024) ≈ -62.5 + assert abs(t["t0_ms"] - (-64 * 1000 / 1024)) < 1e-3 + assert abs(t["dt_ms"] - (1000 / 1024)) < 1e-6 + + chans = j["channels"] + for name in ("Tran", "Vert", "Long", "MicL"): + assert name in chans, f"missing channel: {name}" + assert chans[name]["unit"] in ("in/s", "psi") + assert "values" in chans[name] + assert "peak" in chans[name] + assert "peak_t_ms" in chans[name] + + # Values are in physical units: Vert peak ≈ 10 in/s. + assert max(chans["Vert"]["values"]) > 9.99 + + +def test_event_to_plot_json_peak_t_ms_locates_dirac(): + """The Vert channel's full-scale dirac at sample n//2 should produce + peak_t_ms = (n//2 - pretrig) * dt_ms.""" + ev = _make_event_with_samples(n=256) + j = event_hdf5.event_to_plot_json(ev, serial="BE11529") + expected = (128 - 64) * (1000 / 1024) # = 62.5 ms + assert abs(j["channels"]["Vert"]["peak_t_ms"] - expected) < 1e-2 + + +def test_plot_json_from_hdf5_round_trip(tmp_path: Path): + """plot_json_from_hdf5 produces the same shape as event_to_plot_json.""" + ev = _make_event_with_samples() + h5 = tmp_path / "rt.h5" + event_hdf5.write_event_hdf5(h5, ev, serial="BE11529", geo_range="normal") + + j_disk = event_hdf5.plot_json_from_hdf5(h5, event_id="abc-123") + j_mem = event_hdf5.event_to_plot_json(ev, serial="BE11529", geo_range="normal", event_id="abc-123") + + # Top-level shape parity + for k in ("schema", "serial", "geo_range", "geo_full_scale_ips", + "trigger_ms", "record_type", "waveform_key", "event_id"): + assert j_disk.get(k) == j_mem.get(k), f"mismatch on {k}" + assert j_disk["time_axis"]["sample_rate"] == j_mem["time_axis"]["sample_rate"] + assert j_disk["time_axis"]["n_samples"] == j_mem["time_axis"]["n_samples"] + + # Sample values must match within float32 precision. + for ch in ("Tran", "Vert", "Long", "MicL"): + a = j_disk["channels"][ch]["values"] + b = j_mem["channels"][ch]["values"] + assert len(a) == len(b) + if a: + mx = max(abs(x - y) for x, y in zip(a, b)) + assert mx < 1e-3, f"{ch}: max diff {mx}" + + +# ── WaveformStore integration with HDF5 ─────────────────────────────────────── + + +def _make_synthetic_event_for_save() -> tuple[Event, list[S3Frame]]: + """Same flavour as test_event_file_io.py but ensures _make_event_with_samples + is also wired into the BW write path so we can exercise WaveformStore.save.""" + ev = _make_event_with_samples(n=128) + # Build a minimum 3-frame A5 stream (probe + sample + term) — same + # shape used in the other test files. The encoder only really needs + # the STRT in the probe + a non-zero body and a footer in the term. + key4 = ev._waveform_key + rectime = int(ev.rectime_seconds or 0) or 1 + strt = bytearray(21) + strt[0:4] = b"STRT" + strt[4:6] = b"\xff\xfe" + strt[6:10] = key4 + strt[10:14] = key4 + strt[18] = rectime + probe = S3Frame(sub=0xA5, page_hi=0x10, page_lo=0x00, + data=bytes(7) + bytes(strt) + bytes(32), + checksum_valid=True, chk_byte=0x00) + sample = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x10, + data=bytes(7) + bytes(0x0200), checksum_valid=True, chk_byte=0x00) + footer = ( + b"\x0e\x08" + + bytes([7, 5, 0x07, 0xea, 0, 10, 0, 0]) + + bytes([7, 5, 0x07, 0xea, 0, 10, 0, 1]) + + b"\x00\x01\x00\x02\x00\x00\x00\x00" + ) + term = S3Frame(sub=0xA5, page_hi=0x00, page_lo=0x00, + data=bytes(11) + bytes(38) + footer, checksum_valid=True, chk_byte=0x00) + ev._a5_frames = [probe, sample, term] + return ev, [probe, sample, term] + + +def test_waveform_store_save_emits_hdf5(tmp_path: Path): + from sfm.waveform_store import WaveformStore + store = WaveformStore(tmp_path / "waveforms") + ev, frames = _make_synthetic_event_for_save() + rec = store.save(ev, serial="BE11529", a5_frames=frames, geo_range="normal") + + assert rec["hdf5_filename"], "hdf5_filename should be present in save() record" + h5 = store.hdf5_path_for("BE11529", rec["filename"]) + assert h5.exists(), "WaveformStore.save should produce a .h5 file" + # The HDF5 round-trip should match the event's metadata. + data = event_hdf5.read_event_hdf5(h5) + assert data["attrs"]["serial"] == "BE11529" + assert data["attrs"]["geo_range"] == "normal" + + +if __name__ == "__main__": + if pytest is not None: + pytest.main([__file__, "-v"]) + else: + import inspect + import traceback as _tb + + passed = failed = 0 + for _name, _fn in sorted(globals().items()): + if not _name.startswith("test_") or not callable(_fn): + continue + try: + _sig = inspect.signature(_fn) + if "tmp_path" in _sig.parameters: + with tempfile.TemporaryDirectory() as _td: + _fn(Path(_td)) + else: + _fn() + print(f"PASS {_name}") + passed += 1 + except Exception: + print(f"FAIL {_name}") + _tb.print_exc() + failed += 1 + print(f"\n{passed} passed, {failed} failed") + sys.exit(0 if failed == 0 else 1)