feat: v0.15.0

### Added - **Layered event storage architecture.** Each event now lands as four files in the per-serial waveform store, each with a clear role: - `<filename>` — the Blastware-readable binary (BW file). Untouched. - `<filename>.a5.pkl` — the raw 5A frames (regenerative source). - `<filename>.h5` — clean per-channel waveform arrays in physical units (in/s for geo, psi for mic) plus event metadata (HDF5 with gzip compression). This is the canonical format for downstream analysis tools. - `<filename>.sfm.json` — the modern review/metadata sidecar (peaks, project, source provenance, review state, extensions). SQLite (`seismo_relay.db`) is the searchable index over all four. - **Plot-ready waveform JSON (`sfm.plot.v1`).** The `/device/event/{idx}/waveform` and `/db/events/{id}/waveform.json` endpoints now return samples in physical units with explicit time-axis metadata, peak markers, and per-channel unit hints — no more guessing the ADC-to-velocity scale client-side. The webapp waveform viewer was rewritten to consume this shape. - **In-app waveform viewer accuracy fix.** The standalone SFM webapp viewer was scaling geophone amplitudes by `geoAdcScale / 32767` (≈ 6.206 / 32767), where `geoAdcScale = 6.206053` is the device's *in/s per V* hardware constant — not the ADC-counts-to-velocity factor. This silently scaled every plot ~38% too low for Normal-range geophones (the correct full-scale is 10.0 in/s, or 1.25 in/s for Sensitive). Conversion is now done server-side using the geo_range from compliance config; the client just plots. - New `sfm/event_hdf5.py` module: `write_event_hdf5()`, `read_event_hdf5()`, plus a plot-JSON helper. - Backfill script extended to also emit `.h5` for existing events. ### Dependencies - Added `h5py>=3.10` and `numpy>=1.24` for the HDF5 storage layer. - Added `python-multipart>=0.0.7` (required by FastAPI for the `/db/import/blastware_file` endpoint introduced in this release).
2026-05-08 04:39:51 +00:00
parent 9afa3484f4
commit c641d5fc10
14 changed files with 3511 additions and 177 deletions
@@ -1362,29 +1362,36 @@ def _decode_waveform_record_into(data: bytes, event: Event) -> None:

    Modifies event in-place.
    """
-    # ── Record type ───────────────────────────────────────────────────────────
-    # Decoded from byte[1] (sub_code) first so we can gate timestamp parsing.
+    # ── Record type + format detection ────────────────────────────────────────
+    # `record_type` is the user-facing label ("Waveform" for any triggered
+    # event regardless of timestamp-header layout).  `fmt` is the internal
+    # format code used to pick the right Timestamp parser; it stays
+    # internal and doesn't leak to the API / sidecar / UI.
    try:
        event.record_type = _extract_record_type(data)
    except Exception as exc:
        log.warning("waveform record type decode failed: %s", exc)
+    fmt = _detect_record_format(data)

    # ── Timestamp ─────────────────────────────────────────────────────────────
-    # 9-byte format for sub_code=0x10 Waveform records:
-    #   [day][sub_code][month][year:2 BE][unknown][hour][min][sec]
-    # sub_code=0x10 and sub_code=0x03 have different timestamp byte layouts.
-    # Both confirmed against Blastware event reports (BE11529, 2026-04-01 and 2026-04-03).
-    if event.record_type == "Waveform":
+    # Three timestamp-header layouts have been observed across BE11529
+    # firmware S338.17 — each picks a different Timestamp parser:
+    #   "single_shot": 9-byte  [day][0x10][month][year:2][unk][h][m][s]
+    #   "continuous":  10-byte [0x10][day][0x10][month][year:2][unk][h][m][s]
+    #   "short":       8-byte  [day][month][year:2][unk][h][m][s]
+    # All decoded into the same Timestamp dataclass — only the byte
+    # offsets differ.
+    if fmt == "single_shot":
        try:
            event.timestamp = Timestamp.from_waveform_record(data)
        except Exception as exc:
-            log.warning("waveform record timestamp decode failed: %s", exc)
-    elif event.record_type == "Waveform (Continuous)":
+            log.warning("single_shot record timestamp decode failed: %s", exc)
+    elif fmt == "continuous":
        try:
            event.timestamp = Timestamp.from_continuous_record(data)
        except Exception as exc:
            log.warning("continuous record timestamp decode failed: %s", exc)
-    elif event.record_type == "Waveform (Short)":
+    elif fmt == "short":
        try:
            event.timestamp = Timestamp.from_short_record(data)
        except Exception as exc:
@@ -1562,17 +1569,33 @@ def _decode_a5_waveform(
        log.warning("_decode_a5_waveform: STRT record truncated (%dB)", len(strt))
        return

+    # STRT byte layout (21 bytes; verified against M529LIY6 reference files
+    # and re-confirmed against live BE11529 captures, 2026-05-08):
+    #   [0:4]   b'STRT'
+    #   [4:6]   0xff 0xfe  fixed
+    #   [6:10]  end_key      (4-byte device flash address where event ends)
+    #   [10:14] start_key    (4-byte device flash address where event starts)
+    #   [14:18] device-specific (4 bytes; semantics not pinned)
+    #   [18]    0x46         record-type marker (= 70 in decimal — NOT rectime!)
+    #   [19]    device-specific
+    #   [20]    rectime      (uint8 seconds, user-set Record Time)
+    #
+    # The earlier reading of `rectime_seconds = strt[18]` always returned
+    # 70 for a real waveform event because it was reading the 0x46 marker.
+    # Caller should prefer compliance_config.record_time when available
+    # (that's the authoritative user-set value) and fall back to this.
    total_samples   = struct.unpack_from(">H", strt, 8)[0]
    pretrig_samples = struct.unpack_from(">H", strt, 16)[0]
-    rectime_seconds = strt[18]
+    rectime_seconds = strt[20]

    event.total_samples   = total_samples
    event.pretrig_samples = pretrig_samples
    event.rectime_seconds = rectime_seconds

    log.debug(
-        "_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds",
-        total_samples, pretrig_samples, rectime_seconds,
+        "_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds "
+        "(strt[18]=0x%02X record-type marker, strt[20]=0x%02X rectime)",
+        total_samples, pretrig_samples, rectime_seconds, strt[18], strt[20],
    )

    # ── Collect per-frame waveform bytes with global offset tracking ─────────
@@ -1724,22 +1747,30 @@ def _detect_record_format(data: bytes) -> Optional[str]:

 def _extract_record_type(data: bytes) -> Optional[str]:
    """
-    Return a human-readable name for the waveform record format detected
-    in the first bytes of a 210-byte 0C record.
+    Return a user-facing name for a waveform record.  All three internal
+    timestamp-header layouts represent the *same* user concept — a
+    triggered seismic event — so they all surface as just "Waveform".

-    Maps to the format codes returned by _detect_record_format():
-      "single_shot" → "Waveform"
-      "continuous"  → "Waveform (Continuous)"
-      "short"       → "Waveform (Short)"
-      None          → "Unknown(XX.YY.ZZ)"
+    The internal format code is preserved for parsing logic (timestamp
+    decoder selection) but doesn't leak into the API / UI / sidecar.
+    Callers that need the raw layout can call `_detect_record_format`
+    directly.
+
+    Background: across BE11529 firmware S338.17 we've observed three
+    different byte layouts for the timestamp header at the start of the
+    0C record (8 / 9 / 10 bytes, distinguished by the position of the
+    BE-encoded year and the presence of `0x10` marker bytes).  An older
+    revision of this code labelled them "Waveform" / "Waveform
+    (Continuous)" / "Waveform (Short)", which created the false
+    impression that there were three distinct event "types" the user
+    could configure.  In reality the user only ever picks Single Shot
+    vs Continuous vs Histogram in the compliance config — the byte
+    layout is a firmware-internal detail that doesn't always correlate
+    with that choice.
    """
    fmt = _detect_record_format(data)
-    if fmt == "single_shot":
+    if fmt in ("single_shot", "continuous", "short"):
        return "Waveform"
-    if fmt == "continuous":
-        return "Waveform (Continuous)"
-    if fmt == "short":
-        return "Waveform (Short)"
    if len(data) >= 3:
        log.warning(
            "_extract_record_type: unrecognized header: data[0:3]=%02X %02X %02X",