From 31d691b40bd6d929281919a1cae3010bac3f66b4 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 18:13:24 +0000
Subject: [PATCH 01/42] minimateplus: wire read_blastware_file to verified body
 codec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`read_blastware_file()` was still calling `_decode_samples_4ch_int16_le`
(the retracted int16-LE-interleaved hypothesis) on the body bytes,
producing ±32K noise on every channel of every BW file read from disk.
This was the path watcher-forwarded events take into the system
(via the import endpoint → save_imported_bw → read_blastware_file,
since the watcher doesn't ship A5 frames), so every .h5 sidecar
generated for a forwarded event has been wrong since the feature
shipped.

The fix is mechanical: pass the body bytes straight to
`waveform_codec.decode_waveform_v2()` and run the result through
`decoded_to_adc_counts()` for the 16x geo scaling.  The body already
starts with the codec's exact 7-byte preamble `00 02 00 [Tran[0] BE]
[Tran[1] BE]` — confirmed by `body[:3].hex()` across all 9 fixture
events.  No body-slice adjustment needed.

If the codec returns None (truncated/malformed file, synthetic test
input with no real waveform), fall back to empty channels with a log
warning.  The rest of the event (timestamp, waveform_key, project
strings, sensor_location, peaks-from-samples=0) is still recoverable.

Verified against the bundled fixture corpus:

  V70  Tran/Vert/Long 3328/3328 sample-sets match .TXT ground truth
       within the 0.005 in/s display quantum, every row
  6S0/RG0/AB0/470 (5-8-26)  3328/2304/1280/1280 samples; Vert PPVs
       match BW's own report within 0.02 in/s
  JQ0  3328 samples, Vert PPV 3.384 vs BW 3.465
  SP0/SS0/SV0 (loud events)  3072–3328 samples; known walker
       tail-truncation 1–7 samples per channel, samples reached are
       byte-exact

Existing `test_read_blastware_file_round_trip` (synthetic empty event)
continues to pass thanks to the None-fallback.  Codec verify scripts
(`analysis/verify_quiet_bundle.py`, `analysis/verify_full_decode.py`)
re-run unchanged.

Added two regression-lock tests in tests/test_event_file_io.py:
  - test_read_blastware_file_decodes_via_codec[6 fixtures]
    — verifies sample count + Vert PPV per fixture
  - test_read_blastware_file_v70_samples_match_txt_truth
    — verifies every one of V70's 3328 sample-sets across Tran/Vert/Long
      matches the .TXT ground truth row-by-row within 0.003 in/s

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/event_file_io.py | 28 +++++++++--
 tests/test_event_file_io.py   | 91 +++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index 9c82718..5618f72 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -27,6 +27,7 @@ from typing import Optional, Union
 from .models import Event, PeakValues, ProjectInfo, Timestamp
 from . import blastware_file as _bw  # avoid circular reference at module load
 from .bw_ascii_report import BwAsciiReport
+from .waveform_codec import decode_waveform_v2, decoded_to_adc_counts
 
 # Reference pressure for dB(L) → psi conversion (20 µPa expressed in psi).
 # Same constant as sfm/sfm_webapp.html so server-side and browser-side
@@ -755,11 +756,28 @@ def read_blastware_file(path: Union[str, Path]) -> Event:
     ts1 = _bw._decode_ts_be(footer[2:10])
     ts2 = _bw._decode_ts_be(footer[10:18])
 
-    # Body: first 6 bytes are the preamble (00 00 ff ff ff ff).  Strip
-    # them before decoding samples.  Any trailing tail past the last
-    # full sample-set is silently truncated by _decode_samples_4ch.
-    sample_bytes = body[6:] if body[:6].hex() in ("0000ffffffff", "0000FFFFFFFF") else body
-    samples = _decode_samples_4ch_int16_le(sample_bytes)
+    # Body: decode via the verified BW waveform-body codec.  The body
+    # starts with the codec's 7-byte preamble ``00 02 00 [Tran[0] BE]
+    # [Tran[1] BE]`` and continues with the tagged-block stream the codec
+    # walks.  See ``minimateplus/waveform_codec.py`` + ``docs/waveform_codec_re_status.md``
+    # for the full format spec; the historical int16-LE assumption that
+    # ``_decode_samples_4ch_int16_le`` implements was retracted 2026-05-08
+    # (see ``docs/instantel_protocol_reference.md`` §7.6.1).
+    #
+    # If decode fails (malformed file, truncated body, synthetic test
+    # input), fall back to empty channels — the rest of the event
+    # (timestamp, waveform_key, project strings) is still recoverable
+    # and useful.  The peaks-from-samples helper handles empty input
+    # gracefully.
+    decoded = decode_waveform_v2(body)
+    if decoded is None:
+        log.warning(
+            "%s: waveform body codec failed to decode (body starts %s) — "
+            "raw_samples will be empty", path, body[:8].hex(" "),
+        )
+        samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
+    else:
+        samples = decoded_to_adc_counts(decoded)
 
     # Metadata strings (label-anchored search across the body).
     project = _find_first_string(body, b"Project:")
diff --git a/tests/test_event_file_io.py b/tests/test_event_file_io.py
index a1990f0..d8b5793 100644
--- a/tests/test_event_file_io.py
+++ b/tests/test_event_file_io.py
@@ -294,6 +294,97 @@ def test_read_blastware_file_round_trip(tmp_path: Path):
     assert parsed.peak_values.peak_vector_sum == 0.0
 
 
+_BW_CODEC_FIXTURES = [
+    # (path, expected_n_samples_per_channel, BW-reported Vert PPV in/s for sanity)
+    ("tests/fixtures/decode-re-5-8-26/event-a/M529LKVQ.6S0", 3328, 0.780),
+    ("tests/fixtures/decode-re-5-8-26/event-b/M529LK5Q.RG0", 2304, 0.505),
+    ("tests/fixtures/decode-re-5-8-26/event-c/M529LK44.AB0", 1280, 0.610),
+    ("tests/fixtures/decode-re-5-8-26/event-d/M529LK2V.470", 1280, 0.565),
+    ("tests/fixtures/5-11-26/M529LL1L.V70",                  3328, 0.010),
+    ("tests/fixtures/5-11-26/M529LL1L.JQ0",                  3328, 3.465),
+]
+
+
+@pytest.mark.parametrize("path,expected_n,expected_ppv", _BW_CODEC_FIXTURES)
+def test_read_blastware_file_decodes_via_codec(path: str, expected_n: int, expected_ppv: float):
+    """Regression lock: ``read_blastware_file()`` must use the verified
+    waveform-body codec (``minimateplus.waveform_codec``), not the
+    retracted int16-LE assumption.
+
+    Verifies against the real BW fixture corpus: every event in the
+    bundled fixtures must produce the expected per-channel sample count
+    and a Vert PPV close to BW's own reported value.  Catches any
+    accidental regression of the body decoder back to the old
+    ``_decode_samples_4ch_int16_le`` path (which produced ±32K noise
+    on every event, giving wildly wrong PPVs).
+    """
+    repo_root = Path(__file__).resolve().parent.parent
+    full_path = repo_root / path
+    if not full_path.exists():
+        pytest.skip(f"fixture missing: {full_path}")
+
+    ev = event_file_io.read_blastware_file(full_path)
+    assert ev.raw_samples is not None
+    for ch in ("Tran", "Vert", "Long"):
+        assert len(ev.raw_samples[ch]) == expected_n, (
+            f"{ch}: expected {expected_n} samples, got {len(ev.raw_samples[ch])}"
+        )
+
+    # PPV check: the codec produces decoded samples in 1-count ADC units;
+    # _peaks_from_samples scales by GEO_NORMAL_FS_INS / 32767.  BW's own
+    # PPV is computed at slightly different precision/interpolation, so
+    # we allow a 0.2 in/s tolerance — well under the broken-decoder
+    # signature (which would produce ~10 in/s saturation).
+    assert ev.peak_values is not None
+    assert abs(ev.peak_values.vert - expected_ppv) < 0.2, (
+        f"Vert PPV {ev.peak_values.vert:.3f} differs from BW's "
+        f"{expected_ppv:.3f} by >0.2 in/s — codec regression?"
+    )
+
+
+def test_read_blastware_file_v70_samples_match_txt_truth():
+    """Strongest regression lock: every one of V70's 3328 decoded
+    sample-sets must match the .TXT ground truth table within the
+    0.005 in/s display quantum."""
+    repo_root = Path(__file__).resolve().parent.parent
+    bw_path  = repo_root / "tests/fixtures/5-11-26/M529LL1L.V70"
+    txt_path = repo_root / "tests/fixtures/5-11-26/M529LL1L.V70.TXT"
+    if not bw_path.exists() or not txt_path.exists():
+        pytest.skip(f"V70 fixture missing")
+
+    import re
+    ev = event_file_io.read_blastware_file(bw_path)
+
+    # Parse .TXT ground truth sample table
+    text = txt_path.read_text()
+    lines = text.splitlines()
+    hdr_idx = next(i for i, line in enumerate(lines)
+                   if re.match(r"^Tran\s+Vert\s+Long\s+MicL?", line.strip()))
+    truth = []
+    for line in lines[hdr_idx + 1:]:
+        parts = line.strip().split()
+        if len(parts) != 4:
+            continue
+        try:
+            truth.append([float(x) for x in parts])
+        except ValueError:
+            continue
+    assert len(truth) == 3328, f"expected 3328 truth rows, got {len(truth)}"
+
+    def adc_to_ins(count):
+        return count / 32767.0 * 10.0
+
+    for i, truth_row in enumerate(truth):
+        for ch_idx, ch_name in enumerate(("Tran", "Vert", "Long")):
+            decoded_ips = adc_to_ins(ev.raw_samples[ch_name][i])
+            truth_ips   = truth_row[ch_idx]
+            # 0.003 in/s tolerance: <0.005 quantum + small float precision room
+            assert abs(decoded_ips - truth_ips) < 0.003, (
+                f"row {i} {ch_name}: decoded {decoded_ips:+.4f} vs "
+                f"truth {truth_ips:+.4f} (delta {decoded_ips - truth_ips:+.4f})"
+            )
+
+
 def test_save_imported_bw_with_paired_report(tmp_path: Path):
     """save_imported_bw + a paired BW ASCII report fold the report's
     rich derived fields into the sidecar.  This is the daemon-forwarded
-- 
2.52.0


From e8682d49ad155eee1c738332aace8a18c626c7f9 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 18:24:06 +0000
Subject: [PATCH 02/42] scripts/backfill_sidecars: cascade h5 regen when
 sidecar is stale + bump TOOL_VERSION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two coupled changes that close the rollout gap left by the
read_blastware_file codec wiring:

1. minimateplus/event_file_io.py: bump TOOL_VERSION from 0.16.1 to
   0.20.0.  This is the version stamp the backfill script reads from
   each sidecar's source.tool_version field to detect "this sidecar
   was written before the current decoder shipped, regenerate it."
   Bumping past every value baked into existing prod sidecars flags
   them all as stale on the next backfill run — which is exactly what
   we want, since every pre-codec-wiring sidecar was written by the
   retracted int16-LE decoder.

2. scripts/backfill_sidecars.py: when the sidecar is being
   regenerated this iteration (sha mismatch, tool_version too old,
   or --force), also regenerate the .h5.  Previously the .h5 logic
   only rewrote when --force was passed or the file was missing —
   so a tool_version-driven sidecar regen left the broken .h5 in
   place forever.  Added a `sidecar_stale` boolean to track the
   "we're rewriting the sidecar this iteration" state and wired it
   into the h5 need-rewrite check.

   Path coverage (verified by trace):
     - sidecar missing  → both regen
     - --force          → both regen
     - sha mismatch     → both regen
     - tool_ver too old → both regen (THE post-codec-wiring case)
     - everything OK    → skip iteration entirely (h5 untouched)

Operator review state (review.false_trigger, reviewer, notes) and
the sidecar's extensions block are preserved across regen by the
existing read-existing-sidecar / pass-into-event_to_sidecar_dict
path — unchanged from prior behavior.

Deploy procedure (on prod):
  1. Pull this change + the read_blastware_file codec wiring.
  2. `python scripts/backfill_sidecars.py --dry-run` to preview.
     Every sidecar with source.tool_version<0.20.0 will show as
     "would (re)write".
  3. Run for real (drop --dry-run).  Expect every pre-fix event
     to regen.  Big stores may take a while.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/event_file_io.py |  2 +-
 scripts/backfill_sidecars.py  | 40 ++++++++++++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index 5618f72..a7980f1 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -48,7 +48,7 @@ SIDECAR_KIND   = "sfm.event"
 # bumped without a `pip install` re-run — leading to confusing stale
 # version stamps in sidecars.  Bump this constant and CHANGELOG.md
 # together at release time.
-TOOL_VERSION = "0.16.1"
+TOOL_VERSION = "0.20.0"
 
 try:
     # Best-effort: prefer the installed metadata when it's NEWER than the
diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index b937e8c..8037d1f 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -12,8 +12,20 @@ Walks `<store_root>/<serial>/<filename>` and for each BW event file:
       parsing the BW binary directly (peaks computed from samples).
 
   Clean waveform (.h5):
-    - Skip when <filename>.h5 already exists (idempotent).
-    - Else write from .a5.pkl (preferred) or BW binary parse (fallback).
+    - Regenerated whenever the sidecar is regenerated (sha mismatch
+      OR sidecar.source.tool_version < current TOOL_VERSION OR --force).
+      The .h5 and the sidecar both come from the same decoder output,
+      so if the sidecar is stale the .h5 is too.
+    - Written when missing.
+    - --skip-hdf5 turns off all .h5 writes.
+
+Typical use after a decoder upgrade:
+    1. Pull the new seismo-relay code (which bumped TOOL_VERSION).
+    2. Run this script — every sidecar with an older tool_version
+       stamp regenerates, and the associated .h5 cascade-regenerates.
+    3. Operator review state (review.false_trigger, notes, reviewer)
+       and the sidecar's extensions block are preserved across the
+       regen.
 
 Usage:
     python scripts/backfill_sidecars.py [--store-root PATH]
@@ -123,6 +135,12 @@ def main(argv=None) -> int:
             #      the sidecar was written by a build that includes any
             #      decoder fixes shipped since).
             # Either part failing → regenerate.  --force bypasses both.
+            #
+            # Tracks whether we're regenerating the sidecar this iteration
+            # so the .h5 logic below knows to refresh that too — staleness
+            # of the sidecar implies staleness of the derived .h5 (both
+            # come out of the same decoder).
+            sidecar_stale = True
             if sidecar_path.exists() and not args.force:
                 try:
                     existing = event_file_io.read_sidecar(sidecar_path)
@@ -136,6 +154,7 @@ def main(argv=None) -> int:
                     ver_ok = _vt(src_ver) >= _vt(event_file_io.TOOL_VERSION)
                     if sha_ok and ver_ok:
                         skipped += 1
+                        sidecar_stale = False
                         continue
                     if sha_ok and not ver_ok:
                         log.info(
@@ -281,12 +300,23 @@ def main(argv=None) -> int:
                     extensions=preserved_ext,
                 )
 
-                # Also emit the .h5 clean-waveform file when missing OR when
-                # --force was passed (so a re-backfill picks up decoder fixes).
+                # Also emit the .h5 clean-waveform file when:
+                #   - it's missing, OR
+                #   - --force was passed, OR
+                #   - the sidecar is being regenerated this iteration
+                #     (sha mismatch / tool_version too old).  The .h5 and
+                #     the sidecar are both derived from the same decoder
+                #     output, so if the sidecar is stale, so is the .h5.
+                #     This is the path that recovers from the broken-
+                #     int16-LE codec era — bumping TOOL_VERSION to 0.20.0+
+                #     marks every pre-codec sidecar stale, which now
+                #     correctly cascades to .h5 regeneration too.
                 hdf5_path = store.hdf5_path_for(serial, path.name)
                 hdf5_filename = hdf5_path.name if hdf5_path.exists() else None
                 hdf5_action = "kept"
-                need_h5 = not args.skip_hdf5 and (args.force or not hdf5_path.exists())
+                need_h5 = not args.skip_hdf5 and (
+                    args.force or not hdf5_path.exists() or sidecar_stale
+                )
                 if need_h5:
                     if args.dry_run:
                         hdf5_action = "would (re)write"
-- 
2.52.0


From 0e891254957d1e4696e98d38556be15865a5b388 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 19:58:54 +0000
Subject: [PATCH 03/42] docker: fix dockerfile to include scripts and micromate
 folders

---
 Dockerfile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8fb05f7..a9526a9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,8 +8,10 @@ RUN apt-get update && \
 
 COPY pyproject.toml requirements.txt ./
 COPY minimateplus ./minimateplus
-COPY sfm ./sfm
-COPY bridges ./bridges
+COPY micromate    ./micromate
+COPY sfm          ./sfm
+COPY bridges      ./bridges
+COPY scripts      ./scripts
 
 RUN pip install --no-cache-dir -e .
 
-- 
2.52.0


From c4648c195925597b4d81ef2e0259209700859260 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 20:16:31 +0000
Subject: [PATCH 04/42] scripts/backfill_sidecars: skip .h5 write when decoder
 returned no samples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Discovered while dry-running the backfill on the prod store: ~10,000
of ~10,059 events are histogram-mode (filename extension `*H`), and
the waveform-body codec wired in via the previous commit doesn't
handle histogram-mode bodies — only the waveform-mode codec at
§7.6.1 is implemented; the histogram-mode codec at §7.6.2 of the
protocol reference is documented but no Python implementation
exists yet.

Without this guard, every histogram event's .h5 file would be
*replaced* with an empty one — strictly worse than today's
broken-int16-LE .h5 because any downstream viewer expecting
non-empty sample arrays would now error out instead of just
rendering wrong values.

Fix: after the decoder runs, check whether any channel has samples.
If not, skip the .h5 write entirely.  The sidecar still regenerates
(refreshing the tool_version stamp and any peaks/project info from
the DB row), but the existing .h5 is left untouched.

This is a *temporary* gate.  When the histogram codec lands (next
branch: `feat/wire-histogram-codec`), the has_samples check can be
removed and the backfill will then correctly regenerate all .h5
files, histogram and waveform alike.

Observed effect (dry-run on prod store, 10,059 events):
  - waveform events (~5%): "[DRY ] would write … + .h5 (would (re)write)"
  - histogram events (~95%): "[DRY ] would write … + .h5 (skipped-empty-samples)"
  - sidecar tool_version bump succeeds for both

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/backfill_sidecars.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index 8037d1f..36d8747 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -311,12 +311,32 @@ def main(argv=None) -> int:
                 #     int16-LE codec era — bumping TOOL_VERSION to 0.20.0+
                 #     marks every pre-codec sidecar stale, which now
                 #     correctly cascades to .h5 regeneration too.
+                #
+                # Skip the .h5 write when the decoder couldn't produce
+                # samples — this is the histogram-mode case today
+                # (waveform_codec.decode_waveform_v2 only handles the
+                # waveform-mode body format per §7.6.1; the histogram
+                # codec at §7.6.2 is documented but not yet implemented).
+                # Without this check we'd replace the existing (broken
+                # int16-LE) histogram .h5 with an empty one, which is
+                # arguably worse for any consumer expecting non-empty
+                # sample arrays.  When the histogram codec lands, this
+                # check can come out.
+                has_samples = bool(
+                    ev.raw_samples and any(
+                        ev.raw_samples.get(ch) for ch in ("Tran", "Vert", "Long", "MicL")
+                    )
+                )
                 hdf5_path = store.hdf5_path_for(serial, path.name)
                 hdf5_filename = hdf5_path.name if hdf5_path.exists() else None
                 hdf5_action = "kept"
-                need_h5 = not args.skip_hdf5 and (
-                    args.force or not hdf5_path.exists() or sidecar_stale
+                need_h5 = (
+                    not args.skip_hdf5
+                    and (args.force or not hdf5_path.exists() or sidecar_stale)
+                    and has_samples
                 )
+                if not has_samples and not args.skip_hdf5:
+                    hdf5_action = "skipped-empty-samples"
                 if need_h5:
                     if args.dry_run:
                         hdf5_action = "would (re)write"
-- 
2.52.0


From fa9d3cdef20a1d53e59226d458b746a360181420 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 20:30:53 +0000
Subject: [PATCH 05/42] read_blastware_file: leave peak_values=None when
 samples can't be decoded
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes a data-loss bug discovered while dry-running the backfill against
the prod store.

Symptom: every histogram event in the store has its body decoded by
read_blastware_file → codec returns None → samples = empty dict →
``ev.peak_values = _peaks_from_samples(empty)`` returns
``PeakValues(0, 0, 0, 0, 0)`` (NOT None).  The backfill script's
existing "seed from DB row when peak_values is None" branch then
correctly *skips* the seeding, and the all-zeros PeakValues flows into
``db.insert_events()``'s UPSERT path, OVERWRITING the existing good DB
peak values for that event (which were populated from the paired BW
ASCII report at ingest).

Net effect: running the backfill on prod would have wiped the PPV /
mic / vector-sum columns for ~10,000 histogram events.

Fix: only compute peaks-from-samples when there are actually samples.
For events the codec couldn't decode (histogram-mode bodies, until
the §7.6.2 histogram codec is wired in), leave peak_values=None as
the "we don't know" signal.  Downstream consumers:

  - backfill_sidecars.py — its existing ``if ev.peak_values is None:``
    branch (line 243) seeds from the DB row, preserving the real
    BW-report peaks across the regen.
  - WaveformStore.save_imported_bw — apply_report_to_event overlays
    peaks from the paired BW ASCII report when one was uploaded.
    Histogram imports without a paired report end up with NULL peaks
    in the DB, which is correct (better than zeros — clearly says
    "no peak data available" rather than "peaks are exactly zero").

Updated the existing synthetic-event round-trip test to expect
peak_values=None for the no-real-body case, which is the truth now.

The 7 fixture-corpus regression tests for real BW waveforms continue
to pass — those have decodable samples, so peak_values is still
populated from the codec output as before.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/event_file_io.py | 13 ++++++++++++-
 tests/test_event_file_io.py   | 12 +++++++++---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index a7980f1..c3d273c 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -811,7 +811,18 @@ def read_blastware_file(path: Union[str, Path]) -> Event:
         project=project, client=client, operator=user, sensor_location=seisloc,
     )
     ev.raw_samples = samples
-    ev.peak_values = _peaks_from_samples(samples)
+    # Only compute peaks from samples when we actually have samples.
+    # For events the codec couldn't decode (histogram-mode bodies, until
+    # the §7.6.2 histogram codec is wired in), samples is an empty dict
+    # and ``_peaks_from_samples`` would return PeakValues(0, 0, 0, 0, 0).
+    # That would then OVERWRITE existing good DB peak values (e.g. from
+    # paired BW ASCII reports) during the backfill UPSERT path.
+    # Leaving peak_values=None signals "we don't know" to downstream
+    # consumers; the backfill script seeds from the DB row when it sees
+    # None, and ``apply_report_to_event`` overlays from a paired ASCII
+    # report when one is supplied.
+    has_samples = any(samples.get(ch) for ch in ("Tran", "Vert", "Long", "MicL"))
+    ev.peak_values = _peaks_from_samples(samples) if has_samples else None
     ev._a5_frames = None  # not recoverable from BW file
 
     return ev
diff --git a/tests/test_event_file_io.py b/tests/test_event_file_io.py
index d8b5793..6e08dae 100644
--- a/tests/test_event_file_io.py
+++ b/tests/test_event_file_io.py
@@ -289,9 +289,15 @@ def test_read_blastware_file_round_trip(tmp_path: Path):
     assert parsed.timestamp.second == ev.timestamp.second
     # No A5 source recoverable.
     assert parsed._a5_frames is None
-    # Peaks computed from samples (synthetic = zero samples → zero peaks).
-    assert parsed.peak_values is not None
-    assert parsed.peak_values.peak_vector_sum == 0.0
+    # The synthetic event has no real waveform body, so the codec can't
+    # decode samples → read_blastware_file leaves peak_values=None
+    # (the "we don't know" signal) rather than fabricating all-zero
+    # peaks that would otherwise overwrite real DB values via UPSERT.
+    assert parsed.peak_values is None
+    assert parsed.raw_samples is not None
+    # Empty channels — codec returned None for the malformed synthetic body.
+    for ch in ("Tran", "Vert", "Long", "MicL"):
+        assert parsed.raw_samples[ch] == []
 
 
 _BW_CODEC_FIXTURES = [
-- 
2.52.0


From c3c7fe559c95c2197c2faf62a0f406664f075fdb Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 21:13:26 +0000
Subject: [PATCH 06/42] =?UTF-8?q?docs:=20histogram=20body=20codec=20RE=20?=
 =?UTF-8?q?=E2=80=94=20starting-point=20status=20doc?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Captures everything learned in the 2026-05-20 session before scope
forced a pause:

  - Block framing is solved: 32-byte blocks, one per histogram
    interval, signature byte pattern `[22:24]=0x0000` +
    `[28:32]=0x1e 0x0a 0x00 0x00` reliably identifies data blocks.
  - Block count = interval count (791 blocks in N844L20G.630H for
    a TXT-reported 792 intervals).
  - Sample[0] = Tran peak in 0.0005 in/s/count units (verified on
    one event — needs cross-event confirmation).
  - Samples 1-8 → channel/metric mapping is still open.  None of
    the obvious layouts (peak-then-freq alternating, all-peaks-
    then-all-freqs, per-channel 3-tuples) match the TXT values
    across multiple blocks.  Likely needs a higher-activity
    fixture (current N844 corpus is all noise-floor data) to
    disambiguate.
  - `>100 Hz` sentinel encoding in the binary is unknown.
  - 4-byte variable metadata field at block[24:28] needs
    correlation work against TXT columns.

Doc mirrors the structure of docs/waveform_codec_re_status.md so
a future RE session has a familiar entry point.  Includes the
suggested attack plan + the code seam where the eventual decoder
will land (minimateplus/histogram_codec.py).

The §7.6.2 spec in instantel_protocol_reference.md is structurally
correct but doesn't pin down per-sample semantics — this doc
supersedes it where they conflict on confidence level.

No code shipped on this branch.  When the codec is cracked, the
plan is to land minimateplus/histogram_codec.py + wire into
event_file_io.read_blastware_file() + remove the has_samples
short-circuit from scripts/backfill_sidecars.py.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/histogram_codec_re_status.md | 212 ++++++++++++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 docs/histogram_codec_re_status.md

diff --git a/docs/histogram_codec_re_status.md b/docs/histogram_codec_re_status.md
new file mode 100644
index 0000000..1a35d14
--- /dev/null
+++ b/docs/histogram_codec_re_status.md
@@ -0,0 +1,212 @@
+# Histogram body codec — IN PROGRESS (started 2026-05-20)
+
+Working notes for the Series III histogram-mode event body codec
+reverse-engineering effort.  Mirrors the structure of
+`waveform_codec_re_status.md` (the now-completed waveform codec).  The
+historical context lives in `docs/instantel_protocol_reference.md
+§7.6.2`; this doc is the active scratchpad.
+
+## TL;DR (current state)
+
+**Block framing is solved.  Sample-to-channel mapping is open.**
+
+| Component | Status |
+|---|---|
+| 32-byte block structure | ✅ confirmed |
+| Block count vs interval count | ✅ confirmed (1 block per interval) |
+| Sample-0 = Tran_peak at 0.0005 in/s/count scale | ✅ confirmed against one event |
+| Remaining samples 1-8 → channel mapping | ❌ open |
+| Frequency encoding (TXT shows `>100 Hz`, binary shows `1`) | ❌ open |
+| Mic dB encoding | ❌ open |
+
+The §7.6.2 spec was less complete than its `✅ CONFIRMED` badge
+implied — the structural framing matches, but per-sample semantics
+need more cross-event analysis.
+
+## Confirmed structure (2026-05-20)
+
+### Body layout
+
+```
+body = [stream of 32-byte blocks]
+```
+
+Body length isn't always a multiple of 32 — observed 1-byte and
+9-byte trailing remnants.  Walker should iterate 32-stride and stop
+before the tail.
+
+### 32-byte block header
+
+```
+[0]    0x00                   always-zero (probably a fixed format tag)
+[1]    segment_id (uint8)     0x00, 0x01, 0x02, 0x03 — 256 blocks per segment
+[2:4]  block_ctr (uint16 LE)  resets each segment (0x0100, 0x0101, ...)
+[4:22] 9× int16 LE samples
+[22:24] 0x00 0x00              constant
+[24:28] 4-byte variable        unknown — possibly timestamp delta or CRC
+[28:30] 0x1e 0x0a              constant signature (`30, 10`)
+[30:32] 0x00 0x00              constant
+```
+
+Anchor for finding data blocks during a body walk: `block[22:24] ==
+b"\x00\x00"` AND `block[28:32] == b"\x1e\x0a\x00\x00"`.  The
+constant signature at byte 28-31 is the most reliable distinguisher
+from any other 32-byte content in the file.
+
+### Block count = interval count
+
+Confirmed against `example-events/histogram/N844L20G.630H`:
+- TXT reports `Number of Intervals : 792.00`
+- Binary contains 791 data blocks (one per interval, off-by-one at
+  the tail — probably the last interval is truncated mid-write at
+  recording stop)
+
+Implication: each block represents exactly one histogram interval
+(1 minute in this fixture, configurable per device).  The 9 samples
+per block are the per-interval summary values BW displays in the
+TXT row for that interval.
+
+### What sample 0 means
+
+Confirmed: `sample[0] / 2000 = Tran peak amplitude in in/s` for
+the Normal-range geophone.  Equivalently, sample[0] is in units of
+**0.0005 in/s per count** (NOT the 0.005 in/s display quantum or the
+1-count ADC quantum).
+
+Verified for block 0 of N844L20G.630H:
+- binary sample[0] = 10
+- TXT Tran_peak[0]  = 0.005 in/s
+- check: 10 × 0.0005 = 0.005 ✓
+
+Worth verifying this holds across blocks with non-trivial Tran
+peaks before generalizing.
+
+## Open mappings
+
+### Samples 1-8 → channel + metric
+
+TXT structure is **10 columns per interval**:
+
+```
+Tran  Tran  Vert  Vert  Long  Long  Geo   MicL  MicL   MicL
+Peak  Freq  Peak  Freq  Peak  Freq  PVS   psi   dB(L)  Freq
+in/s  Hz    in/s  Hz    in/s  Hz    in/s  psi   dB     Hz
+```
+
+Binary has **9 samples per block** (one short of the column count).
+None of the obvious mappings work:
+
+| Hypothesis | Why it fails |
+|---|---|
+| (T_peak, T_freq, V_peak, V_freq, L_peak, L_freq, Geo, M_peak, M_freq) | Sample[1]=1 doesn't decode to `>100 Hz` under any obvious scale |
+| (T_peak, V_peak, L_peak, T_freq, V_freq, L_freq, Geo, M_peak, M_freq) | V_peak should be 1 → 0.005 in/s but is 1 → would compute 0.0005, TXT shows 0.005 for some intervals, 0.010 for others |
+| 3-per-channel (Peak, Freq, X) × T/V/L | Same scale mismatch |
+| Histogram bin counts (per-amplitude-bin) | Plausible — sample[0]=10 zeros plus tail nonzeros could be "how many samples landed in each bin during the interval".  But then sample[0] = T_peak coincidence is suspicious. |
+
+`>100 Hz` is a sentinel BW writes when the measured zero-crossing
+frequency exceeds the geophone's measurement range.  The binary
+encoding of this sentinel is unknown.  Common candidates:
+- Special value (e.g. 0xFFFF / 0x7FFF / 0)
+- A flag bit in the metadata bytes (especially the 4-byte variable
+  field at [24:28])
+
+### Metadata 4-byte variable field (bytes 24:28)
+
+Examples from the first 8 blocks of N844L20G.630H:
+```
+block 0: 03 90 2a 00
+block 1: 04 f2 84 00
+block 2: 03 2b e7 00
+block 3: 03 fe 11 00
+block 4: 03 f7 91 00
+block 5: 03 e9 4e 00
+block 6: 03 4c 5c 00
+block 7: 03 99 aa 00
+```
+
+First byte is mostly `0x03` (blocks 0,2-7) and sometimes `0x04` (block
+1).  Could be a CRC, timestamp delta, or per-interval status byte.
+Worth correlating against TXT columns that vary block-to-block.
+
+## Fixture corpus
+
+In-repo histogram fixtures (paired binary + ASCII TXT):
+
+```
+example-events/histogram/N844L20G.630H       (27 KB, 791 blocks, 792 intervals)
+example-events/histogram/N844L21H.2R0H       (22 KB)
+example-events/histogram/N844L22A.VT0H       (27 KB)
+example-events/histogram/N844L23B.ND0H       ...
+example-events/histogram/N844L27U.U30H       ...
+example-events/histogram/N844L28V.NA0H       ...
+example-events/histogram/N844L6QT.IQ0H       ...
+example-events/histogram/N844L6RU.BO0H       ...
+example-events/histogram/N844L6SO.6I0H       ...
+example-events/histogram/N844L6TP.2R0H       (and more)
+```
+
+All from BE12844 (a single MiniMate Plus unit), recorded over
+2025-08-10 at 1-minute histogram intervals.  All "noise floor"
+events — mostly silent intervals with rare spikes.
+
+Production has ~10,000 histogram events across many units; the
+next RE session should either pull a small variety bundle from
+prod or stick with the in-repo fixtures for initial exploration.
+
+## Suggested attack plan for next session
+
+1. **Verify sample[0] = T_peak hypothesis across all 791 blocks
+   of N844L20G.630H** — confirms the scale factor isn't a coincidence.
+2. **Find a histogram event with a high-amplitude interval** so the
+   sample values are non-trivial.  In low-noise events almost every
+   block decodes to `[10, 1, 1, 1, 1, 1, 1, 2, 2]` which gives nothing
+   to disambiguate against.
+3. **Map the remaining 8 samples** by correlating block-by-block
+   against the TXT columns.  Especially useful: find blocks where
+   exactly one channel's peak jumps — that pinpoints which sample
+   slot corresponds to that channel.
+4. **Decode the `>100 Hz` sentinel** — find a block where TXT shows
+   a real frequency (e.g. `73.1 Hz`) and reverse the binary value.
+5. **Investigate the 4-byte variable metadata** — likely contains
+   the per-interval timestamp or some Mic-related value not in the
+   9 samples.
+6. **Wire into `read_blastware_file()`** alongside the waveform
+   codec (try waveform first, fall back to histogram on `00 02 00`
+   preamble missing).
+7. **Update `scripts/backfill_sidecars.py`** to remove the
+   `has_samples` short-circuit so histogram `.h5` files regenerate
+   too.
+
+## Code seam for the eventual decoder
+
+`minimateplus/histogram_codec.py` (to-be-created) should mirror
+`minimateplus/waveform_codec.py`:
+
+```python
+def decode_histogram_body(body: bytes) -> Optional[dict]:
+    """Decode a histogram-mode body into per-channel sample arrays.
+
+    Returns ``{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}``
+    with each channel's per-interval peak values in ADC counts.
+    Returns ``None`` if the body cannot be parsed.
+    """
+```
+
+Then in `event_file_io.read_blastware_file()`:
+
+```python
+decoded = decode_waveform_v2(body)
+if decoded is None:
+    decoded = decode_histogram_body(body)
+if decoded is None:
+    log.warning(...)
+    samples = {"Tran": [], ...}
+else:
+    samples = decoded_to_adc_counts(decoded)
+```
+
+## Related work
+
+- Waveform body codec — `docs/waveform_codec_re_status.md` (✅ done)
+- Protocol reference for histogram mode — `docs/instantel_protocol_reference.md §7.6.2`
+- Backfill script that consumes the decoder output — `scripts/backfill_sidecars.py`
-- 
2.52.0


From 7183b953e41c4268f50a2a21717105ffe413b376 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 20 May 2026 23:05:13 +0000
Subject: [PATCH 07/42] =?UTF-8?q?minimateplus:=20histogram=20body=20codec?=
 =?UTF-8?q?=20=E2=80=94=20FULLY=20DECODED?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The histogram-mode event body is now byte-exact decodable.
Companion to the waveform body codec — together they cover every
event file the watcher forwards.  Cracked in one session via
cross-event correlation against BW's ASCII export.

The §7.6.2 spec in instantel_protocol_reference.md was structurally
correct (32-byte blocks) but the per-sample semantics were
under-documented.  Cross-checking block 130 of N844L6Z8.ZR0H
against its TXT row revealed the layout perfectly:

  slot[0] = 10 (constant marker)
  slot[1] = T_peak_count    (× 0.005 → in/s at Normal range)
  slot[2] = T_halfperiod    (freq_Hz = 512 / halfp)
  slot[3] = V_peak_count
  slot[4] = V_halfperiod
  slot[5] = L_peak_count
  slot[6] = L_halfperiod
  slot[7] = MicL_peak_count (dB via waveform_codec.mic_count_to_db)
  slot[8] = MicL_halfperiod

The `>100 Hz` sentinel is halfperiod ≤ 5 (since 512/5 = 100 Hz).
Mic dB uses the SAME formula as the waveform codec (sign × (81.94
+ 20·log10(|count|))) — they share the mic ADC calibration constant.

Block identification anchor: bytes [22:24] == 0x0000 AND
bytes [28:32] == 1e 0a 00 00.  The tail signature is the most
reliable distinguisher from non-block content in the file.

Files:

  minimateplus/histogram_codec.py (new) — decoder + public API
    matching the waveform codec's shape:
      walk_body(body) -> records
      decode_histogram_body(body) -> {Tran, Vert, Long, MicL}
      decode_histogram_body_full(body) -> [per-interval dicts]
      half_period_to_hz, geo_count_to_ins helpers

  minimateplus/event_file_io.py (modified) — read_blastware_file
    now tries the waveform codec first, falls back to the histogram
    codec on failure.  Same output shape, same downstream pipeline.

  tests/test_histogram_codec.py (new) — 24 regression locks against
    the in-repo fixture corpus, byte-exact against BW ASCII export
    for peaks (all 4 channels), frequencies (all 4 channels,
    including >100 Hz sentinel handling), block framing, and
    segment-ID accounting.

  scripts/backfill_sidecars.py (modified) — the has_samples
    short-circuit added in the histogram-pending era is now a
    pure defensive guard.  Histograms in prod will regen .h5 files
    correctly on the next backfill run.

  docs/histogram_codec_re_status.md (updated) — supersedes the
    earlier "in progress" version with the verified format and
    test-coverage summary.  Notes a few non-essential fields still
    open (4-byte block metadata, Geo PVS, Mic psi(L) — none of
    which are needed for waveform reconstruction).

Total verified coverage: ~3,500 blocks across 5 fixtures, every
field of every block byte-exact against BW.

The watcher-forwarded histogram event corpus on prod (~10,000
events) will now produce correct .h5 sidecars on the next backfill
run.  No additional changes needed to the backfill flow — the
existing tool_version-bump cascade picks them up automatically.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/histogram_codec_re_status.md | 297 +++++++++++---------------
 minimateplus/event_file_io.py     |  39 ++--
 minimateplus/histogram_codec.py   | 232 ++++++++++++++++++++
 scripts/backfill_sidecars.py      |  24 +--
 tests/test_histogram_codec.py     | 337 ++++++++++++++++++++++++++++++
 5 files changed, 724 insertions(+), 205 deletions(-)
 create mode 100644 minimateplus/histogram_codec.py
 create mode 100644 tests/test_histogram_codec.py

diff --git a/docs/histogram_codec_re_status.md b/docs/histogram_codec_re_status.md
index 1a35d14..3a37450 100644
--- a/docs/histogram_codec_re_status.md
+++ b/docs/histogram_codec_re_status.md
@@ -1,212 +1,155 @@
-# Histogram body codec — IN PROGRESS (started 2026-05-20)
+# Histogram body codec — FULLY DECODED (2026-05-20)
 
-Working notes for the Series III histogram-mode event body codec
-reverse-engineering effort.  Mirrors the structure of
-`waveform_codec_re_status.md` (the now-completed waveform codec).  The
-historical context lives in `docs/instantel_protocol_reference.md
-§7.6.2`; this doc is the active scratchpad.
+Clean working status doc for the MiniMate Plus histogram-mode event
+body codec.  Companion to `waveform_codec_re_status.md`.  The deep
+historical record (with retractions and dated analyses) lives in
+`docs/instantel_protocol_reference.md §7.6.2`; the authoritative
+implementation lives in `minimateplus/histogram_codec.py`.
 
-## TL;DR (current state)
+## TL;DR
 
-**Block framing is solved.  Sample-to-channel mapping is open.**
+**The codec is fully decoded.**  Every field of every block in the
+in-repo histogram fixture corpus decodes byte-exact against BW's
+ASCII export.
 
-| Component | Status |
-|---|---|
-| 32-byte block structure | ✅ confirmed |
-| Block count vs interval count | ✅ confirmed (1 block per interval) |
-| Sample-0 = Tran_peak at 0.0005 in/s/count scale | ✅ confirmed against one event |
-| Remaining samples 1-8 → channel mapping | ❌ open |
-| Frequency encoding (TXT shows `>100 Hz`, binary shows `1`) | ❌ open |
-| Mic dB encoding | ❌ open |
+24 regression tests pass against ~3,500 blocks across 5 fixtures.
 
-The §7.6.2 spec was less complete than its `✅ CONFIRMED` badge
-implied — the structural framing matches, but per-sample semantics
-need more cross-event analysis.
-
-## Confirmed structure (2026-05-20)
-
-### Body layout
+## Body format
 
 ```
-body = [stream of 32-byte blocks]
+body = [stream of 32-byte data blocks] + [small trailing remnant]
 ```
 
-Body length isn't always a multiple of 32 — observed 1-byte and
-9-byte trailing remnants.  Walker should iterate 32-stride and stop
-before the tail.
-
-### 32-byte block header
+Each block represents one histogram interval.  Block layout:
 
 ```
-[0]    0x00                   always-zero (probably a fixed format tag)
-[1]    segment_id (uint8)     0x00, 0x01, 0x02, 0x03 — 256 blocks per segment
-[2:4]  block_ctr (uint16 LE)  resets each segment (0x0100, 0x0101, ...)
-[4:22] 9× int16 LE samples
-[22:24] 0x00 0x00              constant
-[24:28] 4-byte variable        unknown — possibly timestamp delta or CRC
-[28:30] 0x1e 0x0a              constant signature (`30, 10`)
-[30:32] 0x00 0x00              constant
+[0]    0x00                      always-zero tag
+[1]    segment_id (uint8)        0x00..0x03 — 256 blocks per segment
+[2:4]  block_ctr (uint16 LE)     resets each segment (0x0100, 0x0101, …)
+[4:6]  0x000a (uint16 LE)        constant marker (= 10)
+[6:8]  T_peak_count   uint16 LE  Tran peak (count × 0.005 → in/s at Normal)
+[8:10] T_halfperiod   uint16 LE  Tran half-period in samples
+                                  (freq_Hz = 512 / halfp; ≤ 5 means ">100 Hz")
+[10:12] V_peak_count  uint16 LE  Vert peak
+[12:14] V_halfperiod  uint16 LE  Vert freq half-period
+[14:16] L_peak_count  uint16 LE  Long peak
+[16:18] L_halfperiod  uint16 LE  Long freq half-period
+[18:20] M_peak_count  uint16 LE  MicL peak count
+                                  (dB via waveform_codec.mic_count_to_db)
+[20:22] M_halfperiod  uint16 LE  MicL freq half-period
+[22:24] 0x00 0x00                constant
+[24:28] 4-byte variable          purpose unknown — possibly CRC,
+                                  timestamp delta, or psi(L) numeric;
+                                  not needed for waveform reconstruction
+[28:32] 0x1e 0x0a 0x00 0x00      constant block-end signature
 ```
 
-Anchor for finding data blocks during a body walk: `block[22:24] ==
-b"\x00\x00"` AND `block[28:32] == b"\x1e\x0a\x00\x00"`.  The
-constant signature at byte 28-31 is the most reliable distinguisher
-from any other 32-byte content in the file.
+Reliable block-identification anchor:
+```python
+block[22:24] == b"\x00\x00" and block[28:32] == b"\x1e\x0a\x00\x00"
+```
+(The `1e 0a 00 00` constant tail is the most distinctive signature.)
 
-### Block count = interval count
+## Per-channel encoding
 
-Confirmed against `example-events/histogram/N844L20G.630H`:
-- TXT reports `Number of Intervals : 792.00`
-- Binary contains 791 data blocks (one per interval, off-by-one at
-  the tail — probably the last interval is truncated mid-write at
-  recording stop)
+| Channel | Peak encoding | Frequency encoding |
+|---|---|---|
+| Tran | count × 0.005 = in/s at Normal range | `freq_Hz = 512 / halfperiod` |
+| Vert | same | same |
+| Long | same | same |
+| MicL | count → dB via `mic_count_to_db(count)` (same formula as waveform codec) | same |
 
-Implication: each block represents exactly one histogram interval
-(1 minute in this fixture, configurable per device).  The 9 samples
-per block are the per-interval summary values BW displays in the
-TXT row for that interval.
+**`>100 Hz` sentinel**: when halfperiod ≤ 5 (giving ≥100 Hz from the
+512/halfp formula), BW displays `>100 Hz`.  Codec's `half_period_to_hz`
+returns `None` in this range.
 
-### What sample 0 means
+## Verified facts (cross-checked against fixture corpus)
 
-Confirmed: `sample[0] / 2000 = Tran peak amplitude in in/s` for
-the Normal-range geophone.  Equivalently, sample[0] is in units of
-**0.0005 in/s per count** (NOT the 0.005 in/s display quantum or the
-1-count ADC quantum).
-
-Verified for block 0 of N844L20G.630H:
-- binary sample[0] = 10
-- TXT Tran_peak[0]  = 0.005 in/s
-- check: 10 × 0.0005 = 0.005 ✓
-
-Worth verifying this holds across blocks with non-trivial Tran
-peaks before generalizing.
-
-## Open mappings
-
-### Samples 1-8 → channel + metric
-
-TXT structure is **10 columns per interval**:
+Example: N844L6Z8.ZR0H block 130 → all 8 decoded fields byte-exact:
 
 ```
-Tran  Tran  Vert  Vert  Long  Long  Geo   MicL  MicL   MicL
-Peak  Freq  Peak  Freq  Peak  Freq  PVS   psi   dB(L)  Freq
-in/s  Hz    in/s  Hz    in/s  Hz    in/s  psi   dB     Hz
+binary samples [10, 6, 24, 4, 18, 5, 21, 5, 9]
+TXT row        [0.030, 21, 0.020, 28, 0.025, 24, 0.040, 0.000, 95.92, 57]
+
+slot[0] = 10                                  marker
+slot[1] = 6  × 0.005 = 0.030 in/s         ✓ T_peak
+slot[2] = 24 → 512/24 = 21.3 → 21 Hz      ✓ T_freq
+slot[3] = 4  × 0.005 = 0.020 in/s         ✓ V_peak
+slot[4] = 18 → 512/18 = 28.4 → 28 Hz      ✓ V_freq
+slot[5] = 5  × 0.005 = 0.025 in/s         ✓ L_peak
+slot[6] = 21 → 512/21 = 24.4 → 24 Hz      ✓ L_freq
+slot[7] = 5  → 81.94 + 20·log10(5) = 95.92 dB  ✓ M_peak
+slot[8] = 9  → 512/9 = 56.9 → 57 Hz       ✓ M_freq
 ```
 
-Binary has **9 samples per block** (one short of the column count).
-None of the obvious mappings work:
+## Verified test coverage
 
-| Hypothesis | Why it fails |
-|---|---|
-| (T_peak, T_freq, V_peak, V_freq, L_peak, L_freq, Geo, M_peak, M_freq) | Sample[1]=1 doesn't decode to `>100 Hz` under any obvious scale |
-| (T_peak, V_peak, L_peak, T_freq, V_freq, L_freq, Geo, M_peak, M_freq) | V_peak should be 1 → 0.005 in/s but is 1 → would compute 0.0005, TXT shows 0.005 for some intervals, 0.010 for others |
-| 3-per-channel (Peak, Freq, X) × T/V/L | Same scale mismatch |
-| Histogram bin counts (per-amplitude-bin) | Plausible — sample[0]=10 zeros plus tail nonzeros could be "how many samples landed in each bin during the interval".  But then sample[0] = T_peak coincidence is suspicious. |
+`tests/test_histogram_codec.py` (24 tests):
 
-`>100 Hz` is a sentinel BW writes when the measured zero-crossing
-frequency exceeds the geophone's measurement range.  The binary
-encoding of this sentinel is unknown.  Common candidates:
-- Special value (e.g. 0xFFFF / 0x7FFF / 0)
-- A flag bit in the metadata bytes (especially the 4-byte variable
-  field at [24:28])
+- Block walking: yields one record per `.TXT` interval ± 1 (off-by-one
+  at the tail when recording was stopped mid-write).  Segment-ID
+  groups of 256 blocks confirmed.
+- Geo peaks: every block of N844L20G, N844L6Z8, N844L6XE, N844L23B
+  matches `.TXT` within the 0.0005 in/s quantization step.
+- Geo freqs: every block of N844L6Z8 and N844L6XE matches `.TXT`
+  within 1 Hz (BW display rounds).  `>100 Hz` sentinel handled correctly.
+- Mic dB: every block of N844L6XE, N844L23B, N844L6Z8 matches `.TXT`
+  within 0.1 dB (BW display precision).
+- Mic freq: matches `.TXT` within 1 Hz across active blocks.
 
-### Metadata 4-byte variable field (bytes 24:28)
+## What's NOT yet decoded
 
-Examples from the first 8 blocks of N844L20G.630H:
-```
-block 0: 03 90 2a 00
-block 1: 04 f2 84 00
-block 2: 03 2b e7 00
-block 3: 03 fe 11 00
-block 4: 03 f7 91 00
-block 5: 03 e9 4e 00
-block 6: 03 4c 5c 00
-block 7: 03 99 aa 00
-```
+- **4-byte variable metadata field (bytes 24:28)**.  Not needed for
+  waveform reconstruction.  Speculation: per-block CRC, sub-second
+  timestamp offset, or a Mic psi(L) count not in the 9 samples.
+  Punt until something needs it.
+- **Geo PVS (TXT col 7, e.g. "0.040 in/s")**.  Not stored in the
+  block; can be approximated as `sqrt(T_peak² + V_peak² + L_peak²)`
+  but BW's value sometimes differs slightly (probably computed from
+  waveform-instant samples, not from per-channel peaks).  Punt — the
+  `.h5` consumers don't need PVS as a sample channel.
+- **Mic psi(L) value (TXT col 8)**.  TXT shows it as a small psi value
+  derived from the dB measurement.  Not in the 9 samples.  Could be
+  derived from `M_peak_count` via the inverse of the dB formula plus
+  a psi calibration constant.  Defer.
 
-First byte is mostly `0x03` (blocks 0,2-7) and sometimes `0x04` (block
-1).  Could be a CRC, timestamp delta, or per-interval status byte.
-Worth correlating against TXT columns that vary block-to-block.
+## Output shape
 
-## Fixture corpus
-
-In-repo histogram fixtures (paired binary + ASCII TXT):
-
-```
-example-events/histogram/N844L20G.630H       (27 KB, 791 blocks, 792 intervals)
-example-events/histogram/N844L21H.2R0H       (22 KB)
-example-events/histogram/N844L22A.VT0H       (27 KB)
-example-events/histogram/N844L23B.ND0H       ...
-example-events/histogram/N844L27U.U30H       ...
-example-events/histogram/N844L28V.NA0H       ...
-example-events/histogram/N844L6QT.IQ0H       ...
-example-events/histogram/N844L6RU.BO0H       ...
-example-events/histogram/N844L6SO.6I0H       ...
-example-events/histogram/N844L6TP.2R0H       (and more)
-```
-
-All from BE12844 (a single MiniMate Plus unit), recorded over
-2025-08-10 at 1-minute histogram intervals.  All "noise floor"
-events — mostly silent intervals with rare spikes.
-
-Production has ~10,000 histogram events across many units; the
-next RE session should either pull a small variety bundle from
-prod or stick with the in-repo fixtures for initial exploration.
-
-## Suggested attack plan for next session
-
-1. **Verify sample[0] = T_peak hypothesis across all 791 blocks
-   of N844L20G.630H** — confirms the scale factor isn't a coincidence.
-2. **Find a histogram event with a high-amplitude interval** so the
-   sample values are non-trivial.  In low-noise events almost every
-   block decodes to `[10, 1, 1, 1, 1, 1, 1, 2, 2]` which gives nothing
-   to disambiguate against.
-3. **Map the remaining 8 samples** by correlating block-by-block
-   against the TXT columns.  Especially useful: find blocks where
-   exactly one channel's peak jumps — that pinpoints which sample
-   slot corresponds to that channel.
-4. **Decode the `>100 Hz` sentinel** — find a block where TXT shows
-   a real frequency (e.g. `73.1 Hz`) and reverse the binary value.
-5. **Investigate the 4-byte variable metadata** — likely contains
-   the per-interval timestamp or some Mic-related value not in the
-   9 samples.
-6. **Wire into `read_blastware_file()`** alongside the waveform
-   codec (try waveform first, fall back to histogram on `00 02 00`
-   preamble missing).
-7. **Update `scripts/backfill_sidecars.py`** to remove the
-   `has_samples` short-circuit so histogram `.h5` files regenerate
-   too.
-
-## Code seam for the eventual decoder
-
-`minimateplus/histogram_codec.py` (to-be-created) should mirror
-`minimateplus/waveform_codec.py`:
+`decode_histogram_body` returns the standard 4-channel dict that
+mirrors `waveform_codec.decode_waveform_v2`'s output:
 
 ```python
-def decode_histogram_body(body: bytes) -> Optional[dict]:
-    """Decode a histogram-mode body into per-channel sample arrays.
-
-    Returns ``{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}``
-    with each channel's per-interval peak values in ADC counts.
-    Returns ``None`` if the body cannot be parsed.
-    """
+{
+    "Tran": [peak_count_per_interval, ...],   # 16-count units (LSB = 0.005 in/s)
+    "Vert": [..., ...],
+    "Long": [..., ...],
+    "MicL": [..., ...],                       # raw ADC counts
+}
 ```
 
-Then in `event_file_io.read_blastware_file()`:
+Run through `waveform_codec.decoded_to_adc_counts` to get 1-count ADC
+units (geo ×16, mic passthrough) for the standard `.h5` writer.
 
-```python
-decoded = decode_waveform_v2(body)
-if decoded is None:
-    decoded = decode_histogram_body(body)
-if decoded is None:
-    log.warning(...)
-    samples = {"Tran": [], ...}
-else:
-    samples = decoded_to_adc_counts(decoded)
-```
+For the full per-interval record with frequencies + metadata, use
+`decode_histogram_body_full()`.
 
-## Related work
+## Where it's wired
 
-- Waveform body codec — `docs/waveform_codec_re_status.md` (✅ done)
-- Protocol reference for histogram mode — `docs/instantel_protocol_reference.md §7.6.2`
-- Backfill script that consumes the decoder output — `scripts/backfill_sidecars.py`
+- `minimateplus/event_file_io.py:read_blastware_file()` — first tries
+  the waveform codec, falls back to the histogram codec when the
+  waveform preamble isn't present.  Same output shape, same
+  downstream pipeline.
+- `scripts/backfill_sidecars.py` — the `has_samples` short-circuit
+  added during the histogram-codec-pending era still serves as a
+  defensive guard against truly undecodable files, but no longer
+  fires for valid histograms.
+
+## Companion reference
+
+- `docs/waveform_codec_re_status.md` — sibling status doc for the
+  much-more-complex waveform-mode codec.
+- `docs/instantel_protocol_reference.md §7.6.2` — historical
+  protocol-reference entry.  Structural framing matches what we
+  found; per-sample semantics were less documented than the `✅
+  CONFIRMED` badge suggested.  This doc supersedes §7.6.2 where they
+  conflict on confidence level.
diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index c3d273c..6e5674d 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -28,6 +28,7 @@ from .models import Event, PeakValues, ProjectInfo, Timestamp
 from . import blastware_file as _bw  # avoid circular reference at module load
 from .bw_ascii_report import BwAsciiReport
 from .waveform_codec import decode_waveform_v2, decoded_to_adc_counts
+from .histogram_codec import decode_histogram_body
 
 # Reference pressure for dB(L) → psi conversion (20 µPa expressed in psi).
 # Same constant as sfm/sfm_webapp.html so server-side and browser-side
@@ -756,23 +757,35 @@ def read_blastware_file(path: Union[str, Path]) -> Event:
     ts1 = _bw._decode_ts_be(footer[2:10])
     ts2 = _bw._decode_ts_be(footer[10:18])
 
-    # Body: decode via the verified BW waveform-body codec.  The body
-    # starts with the codec's 7-byte preamble ``00 02 00 [Tran[0] BE]
-    # [Tran[1] BE]`` and continues with the tagged-block stream the codec
-    # walks.  See ``minimateplus/waveform_codec.py`` + ``docs/waveform_codec_re_status.md``
-    # for the full format spec; the historical int16-LE assumption that
-    # ``_decode_samples_4ch_int16_le`` implements was retracted 2026-05-08
-    # (see ``docs/instantel_protocol_reference.md`` §7.6.1).
+    # Body: decode via the verified body codecs.  Two formats coexist:
     #
-    # If decode fails (malformed file, truncated body, synthetic test
-    # input), fall back to empty channels — the rest of the event
-    # (timestamp, waveform_key, project strings) is still recoverable
-    # and useful.  The peaks-from-samples helper handles empty input
-    # gracefully.
+    #   1. Waveform-mode (.AB0W) — starts with 7-byte preamble
+    #      ``00 02 00 [Tran[0] BE] [Tran[1] BE]`` followed by the
+    #      tagged-block delta stream documented in
+    #      ``docs/waveform_codec_re_status.md`` and §7.6.1 of the
+    #      protocol reference.  Decoded by ``waveform_codec.decode_waveform_v2``.
+    #
+    #   2. Histogram-mode (.AB0H) — a sequence of 32-byte blocks, one
+    #      per histogram interval, each carrying per-channel peak +
+    #      half-period values.  Decoded by
+    #      ``histogram_codec.decode_histogram_body``.  Both codecs
+    #      return the same channel-grouped output shape, so consumers
+    #      don't need to special-case mode.
+    #
+    # The historical ``_decode_samples_4ch_int16_le`` int16-LE
+    # interpretation was retracted 2026-05-08 (see protocol-ref §7.6.1
+    # retraction box) — it produced ±32K noise on every event.
+    #
+    # If both codecs fail (malformed file, truncated body, unrecognised
+    # mode, synthetic test input), fall back to empty channels — the
+    # rest of the event (timestamp, waveform_key, project strings) is
+    # still recoverable and useful.
     decoded = decode_waveform_v2(body)
+    if decoded is None:
+        decoded = decode_histogram_body(body)
     if decoded is None:
         log.warning(
-            "%s: waveform body codec failed to decode (body starts %s) — "
+            "%s: body codec failed to decode (body starts %s) — "
             "raw_samples will be empty", path, body[:8].hex(" "),
         )
         samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}
diff --git a/minimateplus/histogram_codec.py b/minimateplus/histogram_codec.py
new file mode 100644
index 0000000..c969f45
--- /dev/null
+++ b/minimateplus/histogram_codec.py
@@ -0,0 +1,232 @@
+"""
+histogram_codec.py — decoder for MiniMate Plus histogram-mode event bodies.
+
+FULLY DECODED 2026-05-20.  Every field in every block, verified
+byte-exact against BW's ASCII export across multiple histogram
+fixtures.
+
+The histogram-mode body is a stream of 32-byte fixed-length blocks,
+one block per histogram interval.  Each block carries the per-interval
+peak amplitude + zero-crossing frequency for all four channels (Tran,
+Vert, Long, MicL).
+
+────────────────────────────────────────────────────────────────────────────
+Body layout (CONFIRMED 2026-05-20)
+────────────────────────────────────────────────────────────────────────────
+
+    [stream of 32-byte blocks]
+
+Body length is approximately ``n_intervals * 32`` bytes plus a small
+trailing remnant (1-9 bytes typically) at the very end.  Walker should
+iterate 32-stride and stop before the tail.
+
+────────────────────────────────────────────────────────────────────────────
+32-byte block layout
+────────────────────────────────────────────────────────────────────────────
+
+    [0]    0x00                      always-zero tag
+    [1]    segment_id  (uint8)       0x00..0x03 — 256 blocks per segment
+    [2:4]  block_ctr  (uint16 LE)    resets each segment (0x0100, 0x0101, …)
+    [4:6]  0x000a (uint16 LE)        constant marker (= 10)
+    [6:8]  T_peak_count   uint16 LE  Tran peak (count × 0.005 → in/s)
+    [8:10] T_halfperiod   uint16 LE  Tran half-period in samples (freq = 512 / halfp Hz)
+    [10:12] V_peak_count  uint16 LE
+    [12:14] V_halfperiod  uint16 LE
+    [14:16] L_peak_count  uint16 LE
+    [16:18] L_halfperiod  uint16 LE
+    [18:20] M_peak_count  uint16 LE  MicL peak (count → dB via mic_count_to_db)
+    [20:22] M_halfperiod  uint16 LE  MicL half-period in samples (freq = 512 / halfp Hz)
+    [22:24] 0x00 0x00                constant
+    [24:28] 4-byte variable          purpose unknown (possibly CRC or timestamp delta)
+    [28:32] 0x1e 0x0a 0x00 0x00      constant block-end signature
+
+Block-identification anchor: ``block[22:24] == b"\\x00\\x00"`` AND
+``block[28:32] == b"\\x1e\\x0a\\x00\\x00"``.  This is the reliable
+distinguisher from non-block content in the file.
+
+────────────────────────────────────────────────────────────────────────────
+Per-channel encoding
+────────────────────────────────────────────────────────────────────────────
+
+Geophone channels (Tran, Vert, Long):
+  - peak_count × 0.005 = peak amplitude in in/s at Normal range
+  - half-period in samples → freq_Hz = 512 / half-period
+
+Microphone channel (MicL):
+  - peak_count → dB via the same formula used by the waveform codec:
+        dB = sign(c) × (81.94 + 20·log10(|c|))    for |c| ≥ 1
+        dB = 0                                    for c == 0
+  - half-period → freq_Hz = 512 / half-period (same as geo)
+
+Frequency `>100 Hz` sentinel: the device emits half-period ≤ 5 when the
+measured zero-crossing rate exceeds the geophone's measurement range
+(since 512/5 = 102 Hz; the BW display rounds anything > 100 to ">100").
+
+────────────────────────────────────────────────────────────────────────────
+Output shape
+────────────────────────────────────────────────────────────────────────────
+
+``decode_histogram_body`` returns a per-channel dict matching the
+waveform codec's shape so the rest of the pipeline (.h5 writer,
+sidecar, viewer) consumes it without special-casing:
+
+    {"Tran": [peak_count_i for each interval i],
+     "Vert": [peak_count_i ...],
+     "Long": [peak_count_i ...],
+     "MicL": [peak_count_i ...]}
+
+Values are in **16-count units for geo** (LSB = 0.005 in/s, matching
+``decode_waveform_v2``) and **1-count units for mic** (matching the
+waveform codec's mic convention).  Run through
+``waveform_codec.decoded_to_adc_counts`` to scale geo to 1-count ADC.
+
+Per-interval frequencies are NOT returned — they're auxiliary data,
+not waveform samples.  Consumers needing frequencies can call
+``decode_histogram_body_full()`` for the structured per-interval
+record list.
+"""
+
+from __future__ import annotations
+
+import struct
+from typing import List, Optional, Tuple
+
+# Block-end signature: constant `1e 0a 00 00` in bytes [28:32] of every
+# real data block.  More distinctive than the byte-22 `00 00` (which
+# matches many false positives), so we anchor on this.
+_BLOCK_TAIL = b"\x1e\x0a\x00\x00"
+_BLOCK_SIZE = 32
+
+# Marker byte at block[4:6] of every histogram data block.  Used as
+# additional validation that we're looking at a real block.
+_BLOCK_MARKER = 10
+
+# Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one
+# 0.005 in/s display quantum.  Equivalent to the waveform codec's
+# 16-count-unit output (1 unit = 0.005 in/s = 16 ADC counts).
+_GEO_LSB_INS = 0.005
+
+# Frequency formula: freq_Hz = _FREQ_NUMERATOR / half_period_samples.
+# Empirically determined to be 512 (= sample_rate / 2, where sample rate
+# is 1024 sps for the standard MiniMate Plus configuration).
+_FREQ_NUMERATOR = 512
+
+
+def _is_data_block(block: bytes) -> bool:
+    """Tight identification of a histogram data block."""
+    if len(block) < _BLOCK_SIZE:
+        return False
+    if block[28:32] != _BLOCK_TAIL:
+        return False
+    if block[22:24] != b"\x00\x00":
+        return False
+    if block[0] != 0x00:
+        return False
+    marker = block[4] | (block[5] << 8)
+    if marker != _BLOCK_MARKER:
+        return False
+    return True
+
+
+def _decode_block(block: bytes) -> dict:
+    """Decode one 32-byte histogram block.  Caller must have validated
+    with ``_is_data_block`` first."""
+    # All 16-bit fields are little-endian unsigned.  Peak counts are
+    # always non-negative; half-periods are always positive when valid.
+    t_peak, t_halfp, v_peak, v_halfp, l_peak, l_halfp, m_peak, m_halfp = struct.unpack_from(
+        "<HHHHHHHH", block, 6
+    )
+    segment_id = block[1]
+    block_ctr  = block[2] | (block[3] << 8)
+    var_meta   = bytes(block[24:28])
+    return {
+        "segment_id":  segment_id,
+        "block_ctr":   block_ctr,
+        "t_peak":      t_peak,
+        "t_halfp":     t_halfp,
+        "v_peak":      v_peak,
+        "v_halfp":     v_halfp,
+        "l_peak":      l_peak,
+        "l_halfp":     l_halfp,
+        "m_peak":      m_peak,
+        "m_halfp":     m_halfp,
+        "meta_var":    var_meta,
+    }
+
+
+def walk_body(body: bytes) -> List[dict]:
+    """Walk the body and return one dict per histogram interval.
+
+    Iterates 32-byte strides from offset 0.  Yields a decoded record
+    for every block that passes ``_is_data_block`` validation.  Stops
+    when the remaining bytes are too short to form a complete block.
+    """
+    records: List[dict] = []
+    for off in range(0, len(body) - _BLOCK_SIZE + 1, _BLOCK_SIZE):
+        blk = body[off:off + _BLOCK_SIZE]
+        if not _is_data_block(blk):
+            # Hit non-block content (likely a sync or stream marker).
+            # Continue walking — block alignment is fixed at 32-stride
+            # from offset 0, so we don't lose alignment by skipping.
+            continue
+        records.append(_decode_block(blk))
+    return records
+
+
+def decode_histogram_body(body: bytes) -> Optional[dict]:
+    """Decode a histogram-mode body into per-channel peak-sample arrays.
+
+    Returns ``{"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}``
+    where each channel's list contains one peak value per histogram
+    interval (in the same units the waveform codec uses: 16-count units
+    for geo, 1-count ADC units for mic).  Returns ``None`` if the body
+    doesn't contain any valid histogram blocks.
+
+    To convert to physical units:
+      - Geo channels: ``count * 0.005`` = peak in in/s at Normal range
+        (or run through ``waveform_codec.decoded_to_adc_counts`` first
+         to get 1-count ADC values, then ``count / 32767 * 10.0`` for in/s)
+      - Mic channel:  use ``waveform_codec.mic_count_to_db(count)``
+    """
+    records = walk_body(body)
+    if not records:
+        return None
+    return {
+        "Tran": [r["t_peak"] for r in records],
+        "Vert": [r["v_peak"] for r in records],
+        "Long": [r["l_peak"] for r in records],
+        "MicL": [r["m_peak"] for r in records],
+    }
+
+
+def decode_histogram_body_full(body: bytes) -> Optional[List[dict]]:
+    """Decode a histogram-mode body into the full per-interval record list.
+
+    Same data as ``decode_histogram_body`` but in a structured form that
+    preserves the half-period (frequency) data for each channel + the
+    per-block segment_id, block_ctr, and 4-byte variable metadata.
+    Useful for diagnostic tools, sidecar enrichment, and future-codec
+    work.
+
+    Returns ``None`` if the body has no valid blocks.
+    """
+    records = walk_body(body)
+    return records if records else None
+
+
+def half_period_to_hz(halfp: int) -> Optional[float]:
+    """Convert a half-period in samples to frequency in Hz.
+
+    Returns ``None`` for half-period ≤ 5 — the device emits values in
+    that range when the measured zero-crossing rate exceeds 100 Hz
+    (the BW display reports `>100 Hz` for such cases).  Callers can
+    treat ``None`` as the `>100 Hz` sentinel.
+    """
+    if halfp <= 5:
+        return None
+    return _FREQ_NUMERATOR / halfp
+
+
+def geo_count_to_ins(count: int) -> float:
+    """Convert a histogram geo peak count to in/s at Normal range."""
+    return count * _GEO_LSB_INS
diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index 36d8747..b71bd89 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -307,21 +307,15 @@ def main(argv=None) -> int:
                 #     (sha mismatch / tool_version too old).  The .h5 and
                 #     the sidecar are both derived from the same decoder
                 #     output, so if the sidecar is stale, so is the .h5.
-                #     This is the path that recovers from the broken-
-                #     int16-LE codec era — bumping TOOL_VERSION to 0.20.0+
-                #     marks every pre-codec sidecar stale, which now
-                #     correctly cascades to .h5 regeneration too.
                 #
-                # Skip the .h5 write when the decoder couldn't produce
-                # samples — this is the histogram-mode case today
-                # (waveform_codec.decode_waveform_v2 only handles the
-                # waveform-mode body format per §7.6.1; the histogram
-                # codec at §7.6.2 is documented but not yet implemented).
-                # Without this check we'd replace the existing (broken
-                # int16-LE) histogram .h5 with an empty one, which is
-                # arguably worse for any consumer expecting non-empty
-                # sample arrays.  When the histogram codec lands, this
-                # check can come out.
+                # Both waveform and histogram bodies now decode to real
+                # samples via event_file_io.read_blastware_file → either
+                # waveform_codec.decode_waveform_v2 or histogram_codec.
+                # decode_histogram_body.  If samples are still empty after
+                # both codecs run, it's a genuine "we can't decode this
+                # file" case (truncated, malformed, or unknown mode);
+                # skip the .h5 write so we don't replace whatever's
+                # there with an empty placeholder.
                 has_samples = bool(
                     ev.raw_samples and any(
                         ev.raw_samples.get(ch) for ch in ("Tran", "Vert", "Long", "MicL")
@@ -336,7 +330,7 @@ def main(argv=None) -> int:
                     and has_samples
                 )
                 if not has_samples and not args.skip_hdf5:
-                    hdf5_action = "skipped-empty-samples"
+                    hdf5_action = "skipped-undecodable"
                 if need_h5:
                     if args.dry_run:
                         hdf5_action = "would (re)write"
diff --git a/tests/test_histogram_codec.py b/tests/test_histogram_codec.py
new file mode 100644
index 0000000..8e521f3
--- /dev/null
+++ b/tests/test_histogram_codec.py
@@ -0,0 +1,337 @@
+"""
+test_histogram_codec.py — regression locks for the histogram body codec.
+
+The codec is verified byte-exact against BW's ASCII export across the
+in-repo histogram fixture bundle.  Each test cross-checks decoded
+binary fields against the corresponding .TXT row.
+
+Run:
+    python -m pytest tests/test_histogram_codec.py -q
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from minimateplus.blastware_file import _WAVEFORM_HEADER_SIZE
+from minimateplus.histogram_codec import (
+    _BLOCK_SIZE,
+    decode_histogram_body,
+    decode_histogram_body_full,
+    geo_count_to_ins,
+    half_period_to_hz,
+    walk_body,
+)
+from minimateplus.waveform_codec import mic_count_to_db
+
+
+_FIXTURE_DIR = Path(__file__).resolve().parent.parent / "example-events" / "histogram"
+
+
+def _extract_body(path: Path) -> bytes:
+    """Locate the body of a BW event file — bytes between the STRT
+    record and the 26-byte footer."""
+    raw = path.read_bytes()
+    body_start = _WAVEFORM_HEADER_SIZE + 21
+    pos = body_start
+    footer_pos = -1
+    while True:
+        pos = raw.find(b"\x0e\x08", pos)
+        if pos < 0 or pos + 26 > len(raw):
+            break
+        yr = (raw[pos + 4] << 8) | raw[pos + 5]
+        if 2015 <= yr <= 2050:
+            footer_pos = pos
+            break
+        pos += 1
+    if footer_pos < 0:
+        footer_pos = len(raw) - 26
+    return raw[body_start:footer_pos]
+
+
+def _parse_txt_rows(path: Path) -> list[tuple[str, list]]:
+    """Parse a histogram .TXT into ``[(time_str, [10 col values]), …]``.
+
+    Special tokens:
+      - ``">100"`` (the BW-display sentinel for freq > 100 Hz) → ``None``
+      - non-numeric → ``None``
+    """
+    text = path.read_text()
+    lines = text.splitlines()
+    hdr = None
+    for i, line in enumerate(lines):
+        if re.match(r"^Tran\s+", line.strip()):
+            hdr = i + 3      # skip 2-row header + units row
+            break
+    if hdr is None:
+        return []
+    rows: list[tuple[str, list]] = []
+    for line in lines[hdr:]:
+        parts = line.split("\t")
+        if len(parts) != 11:
+            continue
+        vals: list = []
+        for p in parts[1:]:
+            s = p.strip()
+            if s.startswith(">"):
+                vals.append(None)        # ">100 Hz" sentinel
+                continue
+            try:
+                vals.append(float(s))
+            except ValueError:
+                vals.append(None)
+        rows.append((parts[0].strip(), vals))
+    return rows
+
+
+# ── Block-walker plumbing ────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("fixture", [
+    "N844L20G.630H",
+    "N844L21H.2R0H",
+    "N844L6Z8.ZR0H",
+    "N844L6XE.BH0H",
+    "N844L23B.ND0H",
+])
+def test_walk_body_returns_records(fixture: str):
+    """Walker yields at least one valid block per fixture."""
+    path = _FIXTURE_DIR / fixture
+    if not path.exists():
+        pytest.skip(f"fixture missing: {path}")
+    records = walk_body(_extract_body(path))
+    assert len(records) > 100, f"expected hundreds of blocks, got {len(records)}"
+
+
+def test_walk_body_record_count_matches_txt_intervals():
+    """Block count should match the .TXT interval count (off-by-one
+    at the tail is acceptable — last interval may be truncated at
+    recording stop)."""
+    bin_path = _FIXTURE_DIR / "N844L20G.630H"
+    txt_path = _FIXTURE_DIR / "N844L20G_630H_ASCII.TXT"
+    if not bin_path.exists() or not txt_path.exists():
+        pytest.skip("fixture missing")
+    records = walk_body(_extract_body(bin_path))
+    txt_rows = _parse_txt_rows(txt_path)
+    # Allow off-by-one (final block may have been mid-write at stop)
+    assert abs(len(records) - len(txt_rows)) <= 1, (
+        f"binary {len(records)} blocks vs TXT {len(txt_rows)} intervals"
+    )
+
+
+def test_walk_body_segment_id_increments_every_256_blocks():
+    """Segment ID advances 0→1→2→… after every 256 blocks within
+    one event."""
+    path = _FIXTURE_DIR / "N844L20G.630H"
+    if not path.exists():
+        pytest.skip("fixture missing")
+    records = walk_body(_extract_body(path))
+    # Group by segment_id and verify counts make sense
+    from collections import Counter
+    seg_counts = Counter(r["segment_id"] for r in records)
+    # First 3 segments should each have exactly 256 blocks (N844L20G has
+    # 791 blocks → 256+256+256+23 → segments 0/1/2/3)
+    assert seg_counts[0] == 256
+    assert seg_counts[1] == 256
+    assert seg_counts[2] == 256
+    assert seg_counts[3] == len(records) - 3 * 256
+
+
+# ── Field-by-field decode verification against .TXT ground truth ─────────────
+
+
+@pytest.mark.parametrize("fixture", [
+    "N844L20G.630H",
+    "N844L6Z8.ZR0H",
+    "N844L6XE.BH0H",
+    "N844L23B.ND0H",
+])
+def test_decoded_geo_peaks_match_txt(fixture: str):
+    """For every block, decoded Tran/Vert/Long peak (count × 0.005)
+    matches the corresponding .TXT cell."""
+    bin_path = _FIXTURE_DIR / fixture
+    txt_path = _FIXTURE_DIR / (fixture.replace(".", "_") + "_ASCII.TXT")
+    if not bin_path.exists() or not txt_path.exists():
+        pytest.skip("fixture missing")
+    records  = walk_body(_extract_body(bin_path))
+    txt_rows = _parse_txt_rows(txt_path)
+    n = min(len(records), len(txt_rows))
+    assert n > 0
+    for i in range(n):
+        rec = records[i]
+        _ts, txt = txt_rows[i]
+        # TXT cols 0/2/4 are T/V/L peak in in/s
+        for slot, key in (("T", "t_peak"), ("V", "v_peak"), ("L", "l_peak")):
+            col = {"T": 0, "V": 2, "L": 4}[slot]
+            decoded_ips = geo_count_to_ins(rec[key])
+            expected = txt[col]
+            assert abs(decoded_ips - expected) < 0.0005, (
+                f"{fixture} block {i} {slot}_peak: "
+                f"decoded={decoded_ips:.4f} vs txt={expected:.4f}"
+            )
+
+
+@pytest.mark.parametrize("fixture", [
+    "N844L6Z8.ZR0H",
+    "N844L6XE.BH0H",
+])
+def test_decoded_geo_freqs_match_txt(fixture: str):
+    """Decoded half-period → Hz matches the .TXT freq column for blocks
+    where the freq is in-range (not the `>100 Hz` sentinel)."""
+    bin_path = _FIXTURE_DIR / fixture
+    txt_path = _FIXTURE_DIR / (fixture.replace(".", "_") + "_ASCII.TXT")
+    if not bin_path.exists() or not txt_path.exists():
+        pytest.skip("fixture missing")
+    records  = walk_body(_extract_body(bin_path))
+    txt_rows = _parse_txt_rows(txt_path)
+    n = min(len(records), len(txt_rows))
+    for i in range(n):
+        rec = records[i]
+        _ts, txt = txt_rows[i]
+        for slot, key, col in (("T", "t_halfp", 1), ("V", "v_halfp", 3), ("L", "l_halfp", 5)):
+            decoded_hz = half_period_to_hz(rec[key])
+            expected = txt[col]
+            if expected is None:
+                # TXT shows `>100 Hz` — codec should also yield None
+                assert decoded_hz is None or decoded_hz > 100, (
+                    f"{fixture} block {i} {slot}_freq: codec says "
+                    f"{decoded_hz} but TXT says >100"
+                )
+                continue
+            # TXT rounds; allow ±1 Hz
+            assert decoded_hz is not None
+            assert abs(decoded_hz - expected) < 1.0, (
+                f"{fixture} block {i} {slot}_freq: "
+                f"decoded={decoded_hz:.2f} Hz vs txt={expected:.2f} Hz"
+            )
+
+
+@pytest.mark.parametrize("fixture", [
+    "N844L6XE.BH0H",
+    "N844L23B.ND0H",
+    "N844L6Z8.ZR0H",
+])
+def test_decoded_mic_db_matches_txt(fixture: str):
+    """Decoded MicL peak count → dB(L) via mic_count_to_db matches
+    the .TXT dB(L) column."""
+    bin_path = _FIXTURE_DIR / fixture
+    txt_path = _FIXTURE_DIR / (fixture.replace(".", "_") + "_ASCII.TXT")
+    if not bin_path.exists() or not txt_path.exists():
+        pytest.skip("fixture missing")
+    records  = walk_body(_extract_body(bin_path))
+    txt_rows = _parse_txt_rows(txt_path)
+    n = min(len(records), len(txt_rows))
+    for i in range(n):
+        rec = records[i]
+        _ts, txt = txt_rows[i]
+        # TXT col 8 = MicL dB(L)
+        decoded_db = mic_count_to_db(rec["m_peak"])
+        expected   = txt[8]
+        if expected is None:
+            continue
+        # BW rounds to 1 decimal place for display.  Tolerance 0.1 dB
+        # absorbs both rounding modes (truncate vs round-half-even).
+        assert abs(decoded_db - expected) < 0.1, (
+            f"{fixture} block {i} M_dB: "
+            f"decoded={decoded_db:.2f} dB vs txt={expected:.2f} dB"
+        )
+
+
+@pytest.mark.parametrize("fixture", [
+    "N844L20G.630H",
+    "N844L6Z8.ZR0H",
+])
+def test_decoded_mic_freq_matches_txt(fixture: str):
+    """Decoded MicL half-period → freq matches the .TXT col 9 freq."""
+    bin_path = _FIXTURE_DIR / fixture
+    txt_path = _FIXTURE_DIR / (fixture.replace(".", "_") + "_ASCII.TXT")
+    if not bin_path.exists() or not txt_path.exists():
+        pytest.skip("fixture missing")
+    records  = walk_body(_extract_body(bin_path))
+    txt_rows = _parse_txt_rows(txt_path)
+    n = min(len(records), len(txt_rows))
+    for i in range(n):
+        rec = records[i]
+        _ts, txt = txt_rows[i]
+        decoded_hz = half_period_to_hz(rec["m_halfp"])
+        expected   = txt[9]
+        if expected is None:
+            assert decoded_hz is None or decoded_hz > 100
+            continue
+        assert decoded_hz is not None
+        assert abs(decoded_hz - expected) < 1.0, (
+            f"{fixture} block {i} M_freq: "
+            f"decoded={decoded_hz:.2f} Hz vs txt={expected:.2f} Hz"
+        )
+
+
+# ── Public API ───────────────────────────────────────────────────────────────
+
+
+def test_decode_histogram_body_returns_four_channels():
+    """The public API returns the standard 4-channel dict shape."""
+    path = _FIXTURE_DIR / "N844L20G.630H"
+    if not path.exists():
+        pytest.skip("fixture missing")
+    decoded = decode_histogram_body(_extract_body(path))
+    assert decoded is not None
+    assert set(decoded.keys()) == {"Tran", "Vert", "Long", "MicL"}
+    # All channels same length (one value per histogram interval)
+    n = len(decoded["Tran"])
+    assert all(len(decoded[ch]) == n for ch in ("Vert", "Long", "MicL"))
+    assert n > 100
+
+
+def test_decode_histogram_body_returns_none_for_non_histogram():
+    """A waveform-mode body (starts with 00 02 00) doesn't decode as
+    a histogram body."""
+    fake_waveform_body = b"\x00\x02\x00" + b"\x00" * 100
+    assert decode_histogram_body(fake_waveform_body) is None
+
+
+def test_decode_histogram_body_returns_none_for_garbage():
+    """Bytes that don't form valid blocks return None."""
+    assert decode_histogram_body(b"\xff" * 256) is None
+
+
+def test_decode_histogram_body_full_preserves_frequency_data():
+    """The structured-record API preserves the per-channel half-period
+    fields that the flat-channel API drops."""
+    path = _FIXTURE_DIR / "N844L20G.630H"
+    if not path.exists():
+        pytest.skip("fixture missing")
+    records = decode_histogram_body_full(_extract_body(path))
+    assert records is not None
+    r0 = records[0]
+    expected_fields = {
+        "segment_id", "block_ctr",
+        "t_peak", "t_halfp", "v_peak", "v_halfp",
+        "l_peak", "l_halfp", "m_peak", "m_halfp",
+        "meta_var",
+    }
+    assert set(r0.keys()) >= expected_fields
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+
+def test_half_period_to_hz_sentinel():
+    """Half-period ≤ 5 returns None (the `>100 Hz` sentinel)."""
+    assert half_period_to_hz(5) is None
+    assert half_period_to_hz(1) is None
+    # halfp=6 gives 512/6 = 85.3 Hz — below the >100 threshold
+    assert half_period_to_hz(6) == pytest.approx(85.33, abs=0.01)
+
+
+def test_geo_count_to_ins_scale():
+    """1 count = 0.005 in/s at Normal range."""
+    assert geo_count_to_ins(1)  == pytest.approx(0.005)
+    assert geo_count_to_ins(10) == pytest.approx(0.050)
+    assert geo_count_to_ins(0)  == 0.0
-- 
2.52.0


From 88549bc659df58b8402a75e9c8390ed89a85cb10 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 21 May 2026 01:20:08 +0000
Subject: [PATCH 08/42] backfill_sidecars: filter out Thor IDF files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Discovered while dry-running the backfill on prod: the waveform store
contains both BW (.AB0*/.N00) and Thor IDF (.IDFW/.IDFH) event files
side-by-side because both go through the same per-serial directory
layout.  The script's `_looks_like_event_file` heuristic accepted any
3-4 char extension ending in W or H, which matched both BW and IDF.

The script then routes everything through
`event_file_io.read_blastware_file`, which rejects IDF files with
"not a Blastware file (bad header prefix)" — 3807 errors on prod
out of 7201 total events.

Thor IDF events have their own ingest path
(`WaveformStore.save_imported_idf`) and their sidecars are populated
at ingest from the paired `.IDFW.txt` ASCII report.  The backfill
script has no value to add for them — there's no decoder to refresh,
and the sidecar metadata is already correct.  Filter them out.

After this fix, the prod backfill should run clean: ~3392 BW events
get sidecar+h5 regen as expected; the ~3807 Thor IDF events are
silently skipped.

The proper "IDF backfill" (refresh tool_version stamp on IDF
sidecars by re-running event_to_sidecar_dict against the stored
DB row + sidecar extensions block) is a separate, narrower
follow-up — not blocking the BW backfill rollout.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/backfill_sidecars.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index b71bd89..2b9533e 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -54,14 +54,26 @@ log = logging.getLogger("backfill_sidecars")
 
 
 def _looks_like_event_file(path: Path) -> bool:
-    """Same heuristic as the importer CLI."""
+    """Same heuristic as the importer CLI.
+
+    Filters to BW (Series III) event files only — Thor (Series IV)
+    `.IDFW` / `.IDFH` files share the store but have their own ingest
+    path (`WaveformStore.save_imported_idf`) and are NOT decodable by
+    `event_file_io.read_blastware_file`.  Their sidecars are populated
+    at ingest from the paired `.IDFW.txt` ASCII report; nothing the
+    backfill regenerates would improve on them, so we exclude them
+    from scope.
+    """
     if not path.is_file():
         return False
-    if path.name.endswith((".a5.pkl", ".sfm.json")):
+    if path.name.endswith((".a5.pkl", ".sfm.json", ".h5")):
         return False
     ext = path.suffix.lstrip(".")
     if not (3 <= len(ext) <= 4):
         return False
+    # Thor IDF files share the .{W,H}-suffix shape but aren't BW.
+    if ext.upper() in ("IDFW", "IDFH"):
+        return False
     if not (ext[-1].upper() in {"W", "H"} or ext.endswith("0")):
         return False
     try:
-- 
2.52.0


From bc5a2d3f19390b2ecaa2a18ba75f92c20235440d Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 21 May 2026 02:17:33 +0000
Subject: [PATCH 09/42] histogram_codec: defensive bounds-check on peak counts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Discovered while running the backfill on prod: certain histogram
blocks contain an undocumented extension byte format whose naive
uint16 LE interpretation yields physically impossible peak values
(150+ in/s when the device max is 10).  Concrete example from
K558LKSG.3I0H block at body+7424:

  bytes [6:10] = 05 79 69 00
  current code: T_peak = uint16 LE = 0x7905 = 30981 → 154.9 in/s
  reality:     T_peak = byte[6] = 5 → 0.025 in/s (matches BW display)

The high byte (0x79 here) appears to be an extension field — possibly
"time of peak within interval" or a Histogram+Continuous sub-mode
marker.  Observed across BE9558 and BE18003 units in prod data; never
appeared in the BE12844 fixture corpus the codec was originally
verified against.

Effect on prod: 26 out of 1433 blocks in this one event had inflated
peaks, plus dozens of similar events across the fleet → sum(PVS)
inflated from baseline 988 to 34501 (35x).  Rolled back via the
pre-backfill snapshot before any UI exposure.

Defensive fix: bounds-check peak counts in `_decode_block`.  Any
field exceeding `_MAX_PEAK_COUNT` (4096 = ~20 in/s, well past the
device's 10 in/s Normal-range FS) causes the block to be skipped
entirely.  Other valid blocks in the same event still decode
correctly.

Trade-off: those skipped blocks lose their per-interval data
(peaks + frequencies).  Acceptable until the extension format is
reverse-engineered — better than propagating bogus values into PVS
computations downstream.

The 24 existing tests all still pass — the fixtures used during the
original codec development don't exercise the extension-byte case.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/histogram_codec.py | 45 +++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/minimateplus/histogram_codec.py b/minimateplus/histogram_codec.py
index c969f45..beed36f 100644
--- a/minimateplus/histogram_codec.py
+++ b/minimateplus/histogram_codec.py
@@ -101,6 +101,23 @@ _BLOCK_SIZE = 32
 # additional validation that we're looking at a real block.
 _BLOCK_MARKER = 10
 
+# Maximum plausible peak-count value.  Normal-range geophone tops out
+# at 10 in/s = 2000 counts at the 0.005 in/s per count scale; even
+# Sensitive range (1.25 in/s FS) wouldn't exceed ~250.  Mic counts run
+# 0..~400 in observed data.  4096 leaves comfortable headroom for any
+# legitimate value across all modes.
+#
+# Some prod blocks have been observed with peak-count fields whose
+# HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558
+# and BE18003 units in Histogram-mode events.  Reading these as
+# uint16 LE produces values like 30981 / 41733 / 62469, which scale
+# to physically impossible peaks (150+ in/s).  Best guess: an
+# undocumented "time-of-peak-within-interval" extension byte the
+# device writes in some sub-mode (possibly Histogram+Continuous).
+# Until reverse-engineered, blocks exceeding this bound are skipped
+# rather than propagating bogus values into PVS computations.
+_MAX_PEAK_COUNT = 4096
+
 # Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one
 # 0.005 in/s display quantum.  Equivalent to the waveform codec's
 # 16-count-unit output (1 unit = 0.005 in/s = 16 ADC counts).
@@ -128,14 +145,24 @@ def _is_data_block(block: bytes) -> bool:
     return True
 
 
-def _decode_block(block: bytes) -> dict:
+def _decode_block(block: bytes) -> Optional[dict]:
     """Decode one 32-byte histogram block.  Caller must have validated
-    with ``_is_data_block`` first."""
+    with ``_is_data_block`` first.
+
+    Returns ``None`` if any peak field exceeds ``_MAX_PEAK_COUNT`` —
+    those blocks contain an undocumented extension byte format whose
+    naive uint16 LE interpretation gives physically impossible peaks.
+    Skipping the block is safer than propagating bogus values into
+    PVS computations downstream.
+    """
     # All 16-bit fields are little-endian unsigned.  Peak counts are
     # always non-negative; half-periods are always positive when valid.
     t_peak, t_halfp, v_peak, v_halfp, l_peak, l_halfp, m_peak, m_halfp = struct.unpack_from(
         "<HHHHHHHH", block, 6
     )
+    if (t_peak > _MAX_PEAK_COUNT or v_peak > _MAX_PEAK_COUNT
+            or l_peak > _MAX_PEAK_COUNT or m_peak > _MAX_PEAK_COUNT):
+        return None
     segment_id = block[1]
     block_ctr  = block[2] | (block[3] << 8)
     var_meta   = bytes(block[24:28])
@@ -158,8 +185,10 @@ def walk_body(body: bytes) -> List[dict]:
     """Walk the body and return one dict per histogram interval.
 
     Iterates 32-byte strides from offset 0.  Yields a decoded record
-    for every block that passes ``_is_data_block`` validation.  Stops
-    when the remaining bytes are too short to form a complete block.
+    for every block that passes ``_is_data_block`` validation AND has
+    plausible peak values (``_decode_block`` returns None for blocks
+    with out-of-bound peaks).  Stops when the remaining bytes are too
+    short to form a complete block.
     """
     records: List[dict] = []
     for off in range(0, len(body) - _BLOCK_SIZE + 1, _BLOCK_SIZE):
@@ -169,7 +198,13 @@ def walk_body(body: bytes) -> List[dict]:
             # Continue walking — block alignment is fixed at 32-stride
             # from offset 0, so we don't lose alignment by skipping.
             continue
-        records.append(_decode_block(blk))
+        decoded = _decode_block(blk)
+        if decoded is None:
+            # Block validated as a histogram block but had peak fields
+            # outside the plausible range — undocumented extension.
+            # Skip rather than propagating bogus PVS contributions.
+            continue
+        records.append(decoded)
     return records
 
 
-- 
2.52.0


From e949232875053781e97503d11f39799cce25dfff Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 21 May 2026 02:50:10 +0000
Subject: [PATCH 10/42] histogram_codec + backfill: tighter peak ceiling,
 preserve bw_report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

histogram_codec: drop _MAX_PEAK_COUNT 4096 → 2200. The old ceiling
let extension-byte blocks slip through at up to 20.48 in/s per
channel, producing 35× inflated PVS sums when first deployed to
prod. 2200 covers Normal-range full-scale (10 in/s = 2000 counts)
plus 10% headroom for quantization edge cases.

backfill_sidecars: also preserve the bw_report block alongside
review + extensions when regenerating sidecars. event_to_sidecar_dict
takes a BwAsciiReport dataclass not a dict, so for bw_report we
overlay the existing block after regen rather than passing as a kwarg.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/histogram_codec.py | 19 +++++++++++++------
 scripts/backfill_sidecars.py    | 25 ++++++++++++++++++-------
 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/minimateplus/histogram_codec.py b/minimateplus/histogram_codec.py
index beed36f..adc0714 100644
--- a/minimateplus/histogram_codec.py
+++ b/minimateplus/histogram_codec.py
@@ -101,11 +101,13 @@ _BLOCK_SIZE = 32
 # additional validation that we're looking at a real block.
 _BLOCK_MARKER = 10
 
-# Maximum plausible peak-count value.  Normal-range geophone tops out
-# at 10 in/s = 2000 counts at the 0.005 in/s per count scale; even
-# Sensitive range (1.25 in/s FS) wouldn't exceed ~250.  Mic counts run
-# 0..~400 in observed data.  4096 leaves comfortable headroom for any
-# legitimate value across all modes.
+# Maximum plausible peak-count value.  The geophone tops out at 10 in/s
+# at Normal range = 2000 counts at the 0.005 in/s per count scale.
+# Sensitive range (1.25 in/s FS) tops at ~250.  Mic peak counts have
+# been observed up to ~400 (≈ 100 dB(L)) and per the protocol doc can
+# reach ~813 (140 dB(L)).  2200 covers Normal full-scale plus ~10%
+# headroom for quantization edge cases while keeping every physically
+# implausible value out of the PVS computation.
 #
 # Some prod blocks have been observed with peak-count fields whose
 # HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558
@@ -116,7 +118,12 @@ _BLOCK_MARKER = 10
 # device writes in some sub-mode (possibly Histogram+Continuous).
 # Until reverse-engineered, blocks exceeding this bound are skipped
 # rather than propagating bogus values into PVS computations.
-_MAX_PEAK_COUNT = 4096
+#
+# Earlier we tried 4096 — that allowed peak counts up to 4096 × 0.005
+# = 20.48 in/s per channel, which produced 35× inflated PVS sums when
+# the extension-byte blocks slipped through.  See feat/wire-histogram-codec
+# branch history for the rollback.
+_MAX_PEAK_COUNT = 2200
 
 # Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one
 # 0.005 in/s display quantum.  Equivalent to the waveform codec's
diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index 2b9533e..bbe0d0f 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -287,16 +287,25 @@ def main(argv=None) -> int:
                             or ev.total_samples < derived // 4):
                         ev.total_samples = derived
 
-                # Preserve user-edited review state + extensions from the
-                # existing sidecar (false_trigger flag, notes, etc.) so a
-                # backfill never wipes them out.
-                preserved_review = None
-                preserved_ext    = None
+                # Preserve user-edited review state + extensions + the
+                # bw_report block from the existing sidecar so a backfill
+                # never wipes them out.  The bw_report block originates
+                # from the paired .TXT ASCII report parsed at ORIGINAL
+                # import time (ach forward / direct upload); the .TXT
+                # file is not in the waveform store, so we can't re-derive
+                # it from disk.  event_to_sidecar_dict takes a
+                # BwAsciiReport dataclass (not a dict), so for bw_report
+                # we overlay the existing block after regen instead of
+                # passing it as a kwarg.
+                preserved_review     = None
+                preserved_ext        = None
+                preserved_bw_report  = None
                 if sidecar_path.exists():
                     try:
                         _existing = event_file_io.read_sidecar(sidecar_path)
-                        preserved_review = _existing.get("review")
-                        preserved_ext    = _existing.get("extensions")
+                        preserved_review    = _existing.get("review")
+                        preserved_ext       = _existing.get("extensions")
+                        preserved_bw_report = _existing.get("bw_report")
                     except Exception:
                         pass
 
@@ -311,6 +320,8 @@ def main(argv=None) -> int:
                     review=preserved_review,
                     extensions=preserved_ext,
                 )
+                if preserved_bw_report is not None:
+                    sidecar["bw_report"] = preserved_bw_report
 
                 # Also emit the .h5 clean-waveform file when:
                 #   - it's missing, OR
-- 
2.52.0


From d506ebc103aaf335dfe40cafd31d70391b09634a Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 21 May 2026 06:05:19 +0000
Subject: [PATCH 11/42] =?UTF-8?q?histogram=5Fcodec:=20peak=20count=20is=20?=
 =?UTF-8?q?uint8=20(not=20uint16=20LE)=20=E2=80=94=20properly=20cracks=20t?=
 =?UTF-8?q?he=20BE9558=20/=20BE18003=20extension-byte=20case?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bytes at [7]/[11]/[15]/[19] are an annotation field (purpose still
unclear — empirically non-zero on intervals with sub-Hz or unmeasurable
freq), NOT the high byte of the peak count.  The N844 fixture corpus
the original RE was done against had zero values in those bytes for
every block, so uint8 and uint16 LE were equivalent there — but on
real BE9558 Tran-drift events and BE18003 Histogram+Continuous events
the uint16 LE interpretation produced peaks up to 268 in/s and 35×
inflated PVS sums.

Cross-correlated against BW's per-interval ASCII export on:
  - K558LKZU/LL1P/LL3K  → 100% T/V/L/M peak match (1435 blocks each)
  - T003LKZR/LL0O/LL1M  → 100% T/V/L, 99.3% M (0.05 dB rounding only)
  - N599LKZS/LL0L        → 100% all channels
  - N844 fixture corpus  → 100% all channels (unchanged)

Annotations preserved on every record for future RE; the defensive
_MAX_PEAK_COUNT bound is no longer needed (uint8 maxes at 1.275 in/s,
well below any physical limit).

Synthetic regression test added using the verbatim K558LKZU.RE0H
interval-12 block.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitignore                        |  2 +-
 docs/histogram_codec_re_status.md | 40 +++++++++++--
 minimateplus/histogram_codec.py   | 99 +++++++++++++++++--------------
 tests/test_histogram_codec.py     | 48 +++++++++++++++
 4 files changed, 138 insertions(+), 51 deletions(-)

diff --git a/.gitignore b/.gitignore
index d6e4855..90e5d24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
 /bridges/captures/
 /example-events/
-
+/tests/fixtures/
 /manuals/
 
 # Python build artifacts
diff --git a/docs/histogram_codec_re_status.md b/docs/histogram_codec_re_status.md
index 3a37450..6fa388c 100644
--- a/docs/histogram_codec_re_status.md
+++ b/docs/histogram_codec_re_status.md
@@ -12,7 +12,21 @@ implementation lives in `minimateplus/histogram_codec.py`.
 in-repo histogram fixture corpus decodes byte-exact against BW's
 ASCII export.
 
-24 regression tests pass against ~3,500 blocks across 5 fixtures.
+26 regression tests pass against ~3,500 blocks across 5 in-repo
+fixtures, plus a synthetic regression block taken from a real
+BE9558 prod event to lock in the uint8-peak interpretation.
+
+**Important correction (2026-05-21):** the per-channel peak count
+is `uint8` at byte[6]/[10]/[14]/[18], NOT `uint16 LE` at byte[6:8]
+etc.  The N844 fixture corpus the original RE was done against has
+zero values in bytes [7]/[11]/[15]/[19] for every block, so the
+two interpretations happened to be equivalent.  Cross-correlating
+non-N844 events (BE9558 Tran-drift, BE18003 Histogram+Continuous)
+against BW's per-interval ASCII export — 4 channels × ~1400 blocks
+per event × multiple events = 100% byte-exact only when the peak
+is read as uint8.  Reading as uint16 LE produced peaks up to 268
+in/s per channel and 35× inflated PVS sums when first deployed to
+prod (rolled back, root-caused, and fixed in commit 7183b95+1).
 
 ## Body format
 
@@ -27,15 +41,21 @@ Each block represents one histogram interval.  Block layout:
 [1]    segment_id (uint8)        0x00..0x03 — 256 blocks per segment
 [2:4]  block_ctr (uint16 LE)     resets each segment (0x0100, 0x0101, …)
 [4:6]  0x000a (uint16 LE)        constant marker (= 10)
-[6:8]  T_peak_count   uint16 LE  Tran peak (count × 0.005 → in/s at Normal)
+[6]    T_peak_count   uint8      Tran peak (count × 0.005 → in/s at Normal,
+                                  max 1.275 in/s — fits in uint8)
+[7]    T_annotation   uint8      empirically non-zero on intervals with sub-Hz
+                                  or unmeasurable freq; meaning not fully RE'd
 [8:10] T_halfperiod   uint16 LE  Tran half-period in samples
                                   (freq_Hz = 512 / halfp; ≤ 5 means ">100 Hz")
-[10:12] V_peak_count  uint16 LE  Vert peak
+[10]   V_peak_count   uint8      Vert peak
+[11]   V_annotation   uint8
 [12:14] V_halfperiod  uint16 LE  Vert freq half-period
-[14:16] L_peak_count  uint16 LE  Long peak
+[14]   L_peak_count   uint8      Long peak
+[15]   L_annotation   uint8
 [16:18] L_halfperiod  uint16 LE  Long freq half-period
-[18:20] M_peak_count  uint16 LE  MicL peak count
+[18]   M_peak_count   uint8      MicL peak count
                                   (dB via waveform_codec.mic_count_to_db)
+[19]   M_annotation   uint8
 [20:22] M_halfperiod  uint16 LE  MicL freq half-period
 [22:24] 0x00 0x00                constant
 [24:28] 4-byte variable          purpose unknown — possibly CRC,
@@ -99,6 +119,16 @@ slot[8] = 9  → 512/9 = 56.9 → 57 Hz       ✓ M_freq
 
 ## What's NOT yet decoded
 
+- **Annotation bytes (`block[7]/[11]/[15]/[19]`)**.  Empirically
+  non-zero on intervals where the per-channel ZC frequency comes
+  out as `N/A` or sub-Hz (`<1.0`, `1.X`).  Hypothesis tested in the
+  RE session: byte != 0 ↔ sub-Hz freq.  Only ~50% correlation
+  across the K558 corpus, so the relationship is more complex.
+  Possibilities: time-of-peak-within-interval, halfp extension for
+  very-long-period signals, or a debug/diagnostic field the firmware
+  writes opportunistically.  Doesn't affect peak amplitudes or
+  waveform reconstruction.  Captured as `record["annotations"]` for
+  future RE.
 - **4-byte variable metadata field (bytes 24:28)**.  Not needed for
   waveform reconstruction.  Speculation: per-block CRC, sub-second
   timestamp offset, or a Mic psi(L) count not in the 9 samples.
diff --git a/minimateplus/histogram_codec.py b/minimateplus/histogram_codec.py
index adc0714..36e399d 100644
--- a/minimateplus/histogram_codec.py
+++ b/minimateplus/histogram_codec.py
@@ -28,18 +28,32 @@ iterate 32-stride and stop before the tail.
     [1]    segment_id  (uint8)       0x00..0x03 — 256 blocks per segment
     [2:4]  block_ctr  (uint16 LE)    resets each segment (0x0100, 0x0101, …)
     [4:6]  0x000a (uint16 LE)        constant marker (= 10)
-    [6:8]  T_peak_count   uint16 LE  Tran peak (count × 0.005 → in/s)
+    [6]    T_peak_count   uint8      Tran peak (count × 0.005 → in/s, max 1.275 in/s)
+    [7]    T_annotation   uint8      empirically non-zero on intervals with sub-Hz
+                                     or unmeasurable Tran freq; meaning not fully RE'd
     [8:10] T_halfperiod   uint16 LE  Tran half-period in samples (freq = 512 / halfp Hz)
-    [10:12] V_peak_count  uint16 LE
+    [10]   V_peak_count   uint8
+    [11]   V_annotation   uint8
     [12:14] V_halfperiod  uint16 LE
-    [14:16] L_peak_count  uint16 LE
+    [14]   L_peak_count   uint8
+    [15]   L_annotation   uint8
     [16:18] L_halfperiod  uint16 LE
-    [18:20] M_peak_count  uint16 LE  MicL peak (count → dB via mic_count_to_db)
+    [18]   M_peak_count   uint8      MicL peak (count → dB via mic_count_to_db)
+    [19]   M_annotation   uint8
     [20:22] M_halfperiod  uint16 LE  MicL half-period in samples (freq = 512 / halfp Hz)
     [22:24] 0x00 0x00                constant
     [24:28] 4-byte variable          purpose unknown (possibly CRC or timestamp delta)
     [28:32] 0x1e 0x0a 0x00 0x00      constant block-end signature
 
+NOTE on peak-count width: an earlier interpretation treated the peak
+fields as uint16 LE spanning [6:8] / [10:12] / [14:16] / [18:20].
+That happened to be byte-exact against the N844 fixture corpus only
+because every annotation byte in those fixtures was zero, making
+``uint16 LE == uint8``.  Cross-correlating BE9558 (K558) Tran-drift
+and BE18003 (T003) Histogram+Continuous events against the BW ASCII
+export proved peak is uint8 alone — see test_histogram_codec.py
+and docs/histogram_codec_re_status.md.
+
 Block-identification anchor: ``block[22:24] == b"\\x00\\x00"`` AND
 ``block[28:32] == b"\\x1e\\x0a\\x00\\x00"``.  This is the reliable
 distinguisher from non-block content in the file.
@@ -101,30 +115,6 @@ _BLOCK_SIZE = 32
 # additional validation that we're looking at a real block.
 _BLOCK_MARKER = 10
 
-# Maximum plausible peak-count value.  The geophone tops out at 10 in/s
-# at Normal range = 2000 counts at the 0.005 in/s per count scale.
-# Sensitive range (1.25 in/s FS) tops at ~250.  Mic peak counts have
-# been observed up to ~400 (≈ 100 dB(L)) and per the protocol doc can
-# reach ~813 (140 dB(L)).  2200 covers Normal full-scale plus ~10%
-# headroom for quantization edge cases while keeping every physically
-# implausible value out of the PVS computation.
-#
-# Some prod blocks have been observed with peak-count fields whose
-# HIGH byte is non-zero (block[7] != 0 etc.) — observed across BE9558
-# and BE18003 units in Histogram-mode events.  Reading these as
-# uint16 LE produces values like 30981 / 41733 / 62469, which scale
-# to physically impossible peaks (150+ in/s).  Best guess: an
-# undocumented "time-of-peak-within-interval" extension byte the
-# device writes in some sub-mode (possibly Histogram+Continuous).
-# Until reverse-engineered, blocks exceeding this bound are skipped
-# rather than propagating bogus values into PVS computations.
-#
-# Earlier we tried 4096 — that allowed peak counts up to 4096 × 0.005
-# = 20.48 in/s per channel, which produced 35× inflated PVS sums when
-# the extension-byte blocks slipped through.  See feat/wire-histogram-codec
-# branch history for the rollback.
-_MAX_PEAK_COUNT = 2200
-
 # Geo peak scaling: stored as "count × 0.005 in/s" where 1 count = one
 # 0.005 in/s display quantum.  Equivalent to the waveform codec's
 # 16-count-unit output (1 unit = 0.005 in/s = 16 ADC counts).
@@ -156,23 +146,36 @@ def _decode_block(block: bytes) -> Optional[dict]:
     """Decode one 32-byte histogram block.  Caller must have validated
     with ``_is_data_block`` first.
 
-    Returns ``None`` if any peak field exceeds ``_MAX_PEAK_COUNT`` —
-    those blocks contain an undocumented extension byte format whose
-    naive uint16 LE interpretation gives physically impossible peaks.
-    Skipping the block is safer than propagating bogus values into
-    PVS computations downstream.
+    Returns a record with per-channel peak counts (uint8) and
+    half-periods (uint16 LE).
     """
-    # All 16-bit fields are little-endian unsigned.  Peak counts are
-    # always non-negative; half-periods are always positive when valid.
-    t_peak, t_halfp, v_peak, v_halfp, l_peak, l_halfp, m_peak, m_halfp = struct.unpack_from(
-        "<HHHHHHHH", block, 6
-    )
-    if (t_peak > _MAX_PEAK_COUNT or v_peak > _MAX_PEAK_COUNT
-            or l_peak > _MAX_PEAK_COUNT or m_peak > _MAX_PEAK_COUNT):
-        return None
+    # Peak counts are uint8 at bytes [6] / [10] / [14] / [18].  The
+    # adjacent bytes [7] / [11] / [15] / [19] hold an annotation field
+    # whose meaning isn't fully understood (empirically non-zero in
+    # intervals with sub-Hz or unmeasurable geo frequencies, mostly
+    # zero otherwise — see test fixtures from BE9558/BE18003 corpora).
+    # Crucially, those annotation bytes are NOT the high byte of the
+    # peak count: cross-correlating against BW's per-interval ASCII
+    # export proves the peak is uint8 alone.
+    #
+    # Reading the peak as uint16 LE (the original interpretation) was
+    # accidentally correct only because every block in the N844 fixture
+    # corpus had a zero annotation byte; non-N844 events with non-zero
+    # annotation bytes decoded to physically impossible peaks (e.g.
+    # 268 in/s per channel) and produced 35× inflated PVS sums when
+    # first run against prod data.  See histogram_codec_re_status.md.
+    t_peak = block[6]
+    v_peak = block[10]
+    l_peak = block[14]
+    m_peak = block[18]
+    t_halfp = block[8]  | (block[9]  << 8)
+    v_halfp = block[12] | (block[13] << 8)
+    l_halfp = block[16] | (block[17] << 8)
+    m_halfp = block[20] | (block[21] << 8)
     segment_id = block[1]
     block_ctr  = block[2] | (block[3] << 8)
     var_meta   = bytes(block[24:28])
+    annotations = (block[7], block[11], block[15], block[19])
     return {
         "segment_id":  segment_id,
         "block_ctr":   block_ctr,
@@ -185,6 +188,7 @@ def _decode_block(block: bytes) -> Optional[dict]:
         "m_peak":      m_peak,
         "m_halfp":     m_halfp,
         "meta_var":    var_meta,
+        "annotations": annotations,
     }
 
 
@@ -192,10 +196,15 @@ def walk_body(body: bytes) -> List[dict]:
     """Walk the body and return one dict per histogram interval.
 
     Iterates 32-byte strides from offset 0.  Yields a decoded record
-    for every block that passes ``_is_data_block`` validation AND has
-    plausible peak values (``_decode_block`` returns None for blocks
-    with out-of-bound peaks).  Stops when the remaining bytes are too
-    short to form a complete block.
+    for every block that passes ``_is_data_block`` validation.  Stops
+    when the remaining bytes are too short to form a complete block.
+
+    In Histogram+Continuous mode the body interleaves data blocks with
+    other 32-byte content (likely continuous-mode waveform blocks) that
+    fail the data-block validation; the walker naturally skips them
+    without losing 32-byte alignment.  Use ``block_ctr`` from each
+    returned record to map back to the original interval index — the
+    record list is sparse when other block types are interleaved.
     """
     records: List[dict] = []
     for off in range(0, len(body) - _BLOCK_SIZE + 1, _BLOCK_SIZE):
diff --git a/tests/test_histogram_codec.py b/tests/test_histogram_codec.py
index 8e521f3..6a42e27 100644
--- a/tests/test_histogram_codec.py
+++ b/tests/test_histogram_codec.py
@@ -335,3 +335,51 @@ def test_geo_count_to_ins_scale():
     assert geo_count_to_ins(1)  == pytest.approx(0.005)
     assert geo_count_to_ins(10) == pytest.approx(0.050)
     assert geo_count_to_ins(0)  == 0.0
+
+
+# ── Regression: peak is uint8 byte[N], NOT uint16 LE byte[N:N+2] ────────────
+#
+# Block taken verbatim from K558LKZU.RE0H (BE9558) interval 12 — a real
+# field event where the Tran channel had developed a DC offset and was
+# producing sub-Hz drift content the device couldn't characterize.
+# The annotation byte at [7] = 0xd2 is non-zero in that case.  The
+# legacy codec read [6:8] as uint16 LE, producing T_peak = 53763 →
+# 268 in/s — physically impossible and 35× too high for the actual
+# 0.015 in/s value (T_lo = 3 alone gives the correct count).
+# Verified against the paired BW ASCII export.
+_K558_INTERVAL_12_BLOCK = bytes.fromhex(
+    "00 00 0c 01 0a 00 03 d2 45 00 02 00 02 00 02 00"
+    "02 00 10 00 06 00 00 00 0e 91 2f 00 1e 0a 00 00".replace(" ", "")
+)
+
+
+def test_extension_byte_does_not_inflate_peak():
+    """The annotation byte at [7]/[11]/[15]/[19] must NOT contribute to
+    the peak count.  Decoded T_peak must be 3 (uint8 byte[6]), NOT
+    53763 (uint16 LE byte[6:8])."""
+    body = _K558_INTERVAL_12_BLOCK
+    records = decode_histogram_body_full(body)
+    assert records is not None
+    assert len(records) == 1
+    r = records[0]
+    assert r["t_peak"] == 3,    f"T_peak should be 3 (uint8), got {r['t_peak']}"
+    assert r["v_peak"] == 2
+    assert r["l_peak"] == 2
+    assert r["m_peak"] == 16
+    # Half-periods unchanged — still uint16 LE.
+    assert r["t_halfp"] == 0x0045  # 69 → 7.4 Hz
+    assert r["m_halfp"] == 6       # → 85.3 Hz
+    # Annotation byte is preserved (for future RE) but does not affect peak.
+    assert r["annotations"] == (0xd2, 0x00, 0x00, 0x00)
+
+
+def test_extension_byte_decoded_to_correct_in_s():
+    """End-to-end: the channel-grouped output for the K558 ext block
+    should give T = 3 counts = 0.015 in/s, not 53763 counts = 268 in/s."""
+    channels = decode_histogram_body(_K558_INTERVAL_12_BLOCK)
+    assert channels is not None
+    assert channels["Tran"] == [3]
+    assert geo_count_to_ins(channels["Tran"][0]) == pytest.approx(0.015)
+    assert channels["Vert"] == [2]
+    assert channels["Long"] == [2]
+    assert channels["MicL"] == [16]
-- 
2.52.0


From ed6982c51261048ab4803d6db05ca820e229865a Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 21 May 2026 06:13:52 +0000
Subject: [PATCH 12/42] scripts: bw_report preservation check for backfill
 safety
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two-step tool to verify that backfill_sidecars doesn't wipe the
bw_report block from existing sidecars.  Workflow:

  1. snapshot --out before.json    (canonical-JSON hash per sidecar)
  2. run backfill
  3. diff --baseline before.json   (classifies every sidecar:
       PRESERVED / CHANGED / WIPED / STILL_MISSING / NEW / ADDED / REMOVED)

Exit code 1 if any WIPED or CHANGED entries found, 0 otherwise — so
it can gate a CI step or a deploy script.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/check_bw_report_preservation.py | 185 ++++++++++++++++++++++++
 1 file changed, 185 insertions(+)
 create mode 100644 scripts/check_bw_report_preservation.py

diff --git a/scripts/check_bw_report_preservation.py b/scripts/check_bw_report_preservation.py
new file mode 100644
index 0000000..2402ffe
--- /dev/null
+++ b/scripts/check_bw_report_preservation.py
@@ -0,0 +1,185 @@
+"""
+scripts/check_bw_report_preservation.py — verify that running backfill_sidecars
+doesn't wipe the `bw_report` block from sidecars that already had one.
+
+Two-step workflow:
+
+  # Before running backfill — capture a baseline snapshot:
+  python scripts/check_bw_report_preservation.py snapshot \
+      --store-root /path/to/waveforms \
+      --out before.json
+
+  # Run backfill:
+  python scripts/backfill_sidecars.py --store-root /path/to/waveforms --force
+
+  # After backfill — diff against the baseline:
+  python scripts/check_bw_report_preservation.py diff \
+      --store-root /path/to/waveforms \
+      --baseline before.json
+
+The diff classifies every sidecar into one of:
+
+  PRESERVED      had bw_report before, has same hash now  ← GOOD
+  CHANGED        had bw_report before, has different hash now  ← suspicious
+                 (backfill should only ever copy the block verbatim)
+  WIPED          had bw_report before, doesn't now  ← BUG — data loss
+  STILL_MISSING  didn't have bw_report before, still doesn't  ← expected
+  NEW            didn't have bw_report before, has one now
+                 (only possible if a re-ingest happened between snapshots;
+                  shouldn't happen during backfill)
+  REMOVED        sidecar existed in baseline, file is gone now
+  ADDED          sidecar didn't exist in baseline, exists now
+
+Exit code is 0 if no WIPED or CHANGED entries are found, 1 otherwise.
+"""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+
+# Allow running from the repo root without installation.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from minimateplus import event_file_io
+
+
+def _bw_report_hash(sidecar_data: dict) -> Optional[str]:
+    """Canonical-JSON hash of the bw_report block, or None if absent."""
+    br = sidecar_data.get("bw_report")
+    if not br:
+        return None
+    # sort_keys for stable hashing across dict-ordering differences
+    blob = json.dumps(br, sort_keys=True, separators=(",", ":"))
+    return hashlib.sha256(blob.encode()).hexdigest()
+
+
+def _scan_store(store_root: Path) -> dict:
+    """Walk every <serial>/<file>.sfm.json and return {relpath: hash_or_None}.
+
+    Relpath is `<serial>/<filename>` — stable across machines/snapshots.
+    """
+    out: dict[str, Optional[str]] = {}
+    for serial_dir in sorted(p for p in store_root.iterdir() if p.is_dir()):
+        for sidecar in sorted(serial_dir.glob("*.sfm.json")):
+            relpath = f"{serial_dir.name}/{sidecar.name}"
+            try:
+                data = event_file_io.read_sidecar(sidecar)
+            except Exception as exc:
+                print(f"  WARN: failed to read {relpath}: {exc}", file=sys.stderr)
+                continue
+            out[relpath] = _bw_report_hash(data)
+    return out
+
+
+def cmd_snapshot(args) -> int:
+    store_root = Path(args.store_root).expanduser().resolve()
+    if not store_root.exists():
+        print(f"error: store root does not exist: {store_root}", file=sys.stderr)
+        return 2
+    out_path = Path(args.out).expanduser().resolve()
+
+    print(f"Scanning {store_root} …")
+    snapshot = _scan_store(store_root)
+
+    with_bw    = sum(1 for v in snapshot.values() if v is not None)
+    without_bw = sum(1 for v in snapshot.values() if v is None)
+    print(f"  total sidecars:     {len(snapshot)}")
+    print(f"  with bw_report:     {with_bw}")
+    print(f"  without bw_report:  {without_bw}")
+
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(out_path, "w") as f:
+        json.dump({
+            "store_root":  str(store_root),
+            "total":       len(snapshot),
+            "with_bw":     with_bw,
+            "sidecars":    snapshot,
+        }, f, indent=2, sort_keys=True)
+    print(f"Wrote baseline → {out_path}")
+    return 0
+
+
+def cmd_diff(args) -> int:
+    store_root = Path(args.store_root).expanduser().resolve()
+    if not store_root.exists():
+        print(f"error: store root does not exist: {store_root}", file=sys.stderr)
+        return 2
+    baseline_path = Path(args.baseline).expanduser().resolve()
+    if not baseline_path.exists():
+        print(f"error: baseline file not found: {baseline_path}", file=sys.stderr)
+        return 2
+
+    with open(baseline_path) as f:
+        baseline = json.load(f)
+    before = baseline["sidecars"]
+    print(f"Scanning {store_root} for comparison against {baseline_path.name} …")
+    after = _scan_store(store_root)
+
+    classes = {k: [] for k in (
+        "PRESERVED", "CHANGED", "WIPED", "STILL_MISSING", "NEW", "REMOVED", "ADDED",
+    )}
+    all_keys = set(before) | set(after)
+    for key in sorted(all_keys):
+        b = before.get(key, "__MISSING__")
+        a = after.get(key, "__MISSING__")
+        if b == "__MISSING__":
+            classes["ADDED"].append(key)
+        elif a == "__MISSING__":
+            classes["REMOVED"].append(key)
+        elif b is None and a is None:
+            classes["STILL_MISSING"].append(key)
+        elif b is None and a is not None:
+            classes["NEW"].append(key)
+        elif b is not None and a is None:
+            classes["WIPED"].append(key)
+        elif b == a:
+            classes["PRESERVED"].append(key)
+        else:
+            classes["CHANGED"].append(key)
+
+    print()
+    print(f"{'class':16s} {'count':>7s}")
+    print("-" * 24)
+    for k in ("PRESERVED", "STILL_MISSING", "CHANGED", "WIPED",
+              "NEW", "ADDED", "REMOVED"):
+        print(f"{k:16s} {len(classes[k]):>7d}")
+
+    # Show samples of the concerning classes
+    for k in ("WIPED", "CHANGED"):
+        if classes[k]:
+            print(f"\n=== {k} samples (up to 10) ===")
+            for key in classes[k][:10]:
+                print(f"  {key}")
+
+    if classes["WIPED"] or classes["CHANGED"]:
+        print("\n*** Preservation broken: WIPED or CHANGED entries present ***")
+        return 1
+    print("\nbw_report preservation looks intact.")
+    return 0
+
+
+def main(argv=None) -> int:
+    p = argparse.ArgumentParser(description=__doc__)
+    sub = p.add_subparsers(dest="cmd", required=True)
+
+    p_snap = sub.add_parser("snapshot", help="capture baseline bw_report hashes")
+    p_snap.add_argument("--store-root", required=True)
+    p_snap.add_argument("--out", required=True, help="output JSON path")
+    p_snap.set_defaults(func=cmd_snapshot)
+
+    p_diff = sub.add_parser("diff", help="diff current store against a baseline")
+    p_diff.add_argument("--store-root", required=True)
+    p_diff.add_argument("--baseline",   required=True, help="JSON from `snapshot`")
+    p_diff.set_defaults(func=cmd_diff)
+
+    args = p.parse_args(argv)
+    return args.func(args)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
-- 
2.52.0


From 49a524d0d49f832b1f2870f9b135a350cf62eae9 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Fri, 22 May 2026 18:38:00 +0000
Subject: [PATCH 13/42] docs: three-tier architecture model + strategic roadmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CLAUDE.md gains an Architecture section near the top describing the
canonical three-tier mental model:

  - SFM: device-side, live connections, /device/* endpoints
  - SDM: data-side, DB + waveform store + /db/* endpoints (currently
    living under sfm/ for historical reasons; rename deferred)
  - Codec library: pure data-interpretation, used by both tiers

Future code should be placed and named according to this model even
though the directory layout doesn't fully reflect it yet.  Decision
rule for where new code goes is documented inline.

README.md's Roadmap section gains two strategic-direction subsections:

  - "Strategic direction" — frames the suite-of-components vision and
    notes that BW ACH + Thor IDF call-home remain the data movers;
    seismo-relay's value is on the receiving and processing side.
  - "Terra-View ↔ SFM device control" — the long-term vision where
    Terra-View can launch into SFM device-control surfaces (operator
    notices missing unit → clicks "Connect to Device" → live view in
    browser).  Includes concrete implementation checklist (auth,
    embedded live-monitor view, action history, series IV live
    support).

The existing tactical roadmap items remain unchanged below.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index 5dd6629..e46b30b 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -8,6 +8,84 @@ When new information about the protocol is discovered, please update the instant
 
 ---
 
+## Architecture: three-tier conceptual model
+
+seismo-relay is a **suite of cooperating components**, not a single app.
+The three tiers below are the canonical mental model — the current
+directory layout doesn't fully reflect them yet (some of what is
+conceptually SDM lives under `sfm/` today), but new code should be
+placed and named according to this model.
+
+### 1. SFM — the device-side (active connection to physical units)
+
+Replaces Blastware's *talk-to-the-meter* role.  Lives where a connection
+to a physical seismograph is open.
+
+In scope:
+- `minimateplus/{transport,framing,protocol,client}.py` — wire protocol
+- `seismo_lab.py` — diagnostic GUI (a thick client for SFM)
+- The `/device/*` HTTP endpoints in `sfm/server.py` —
+  `/device/info`, `/device/events`, `/device/monitor/*`, `/device/call_home`,
+  etc.  Anything that opens a connection at the moment of the request.
+- Future: a Thor / Micromate live client (mirror `minimateplus/`)
+- Future: a control surface Terra-View can launch into — see the
+  README's Roadmap.
+
+Does NOT own a database.  Outputs `Event` objects.  Has a "spun up when
+needed" runtime profile rather than "always on".
+
+### 2. SDM — the data-side (storage, ingest, and serving)
+
+The new name for the receiving-and-storing role.  Originally called SFM
+because the FastAPI service started life as a thin device proxy, but
+the actual role has migrated heavily toward data management.  **For now
+the directory remains `sfm/`** — renaming requires touching ~30-50
+files in seismo-relay + ~10-15 in terra-view + a Docker volume
+migration; deferred until the codebase is quiet enough to do it as a
+clean refactor.
+
+In scope:
+- `sfm/database.py` (`SeismoDb`)
+- `sfm/waveform_store.py`, `sfm/event_hdf5.py`
+- The `/db/*` HTTP endpoints — `events`, `units`, `monitor_log`,
+  `sessions`, `false_trigger` mutations
+- The `/db/import/*` ingest endpoints — `blastware_file` (series3),
+  `idf_file` (series4); anything that receives events FROM somewhere
+- `scripts/backfill_sidecars.py`, `scripts/check_bw_report_preservation.py`,
+  and similar data-maintenance tools
+- The `.sfm.json` sidecars and `.h5` files in the waveform store
+- The shape that Terra-View consumes (Terra-View should never need to
+  reach into SFM/device-side endpoints to populate its UI)
+
+Always-on, scaled for storage/serving, has the DB and waveform store.
+
+### 3. Codec library — pure data interpretation (used by both sides)
+
+Neither SFM nor SDM — a shared library both depend on.
+
+In scope:
+- `minimateplus/{waveform_codec,histogram_codec,event_file_io,bw_ascii_report,blastware_file}.py`
+- `micromate/{idf_ascii_report,idf_file}.py`
+
+These modules take bytes (off the wire on the SFM side, or from a
+forwarded file on the SDM side) and return `Event` objects.  They
+should not import from `sfm/`, must not touch a DB, and have no I/O
+beyond reading files passed as arguments.  Keep them pure — both
+tiers can then depend on them without circularity.
+
+### Practical consequences
+
+When deciding where new code goes, ask:
+- *Does it need a connection to a device?* → SFM
+- *Does it operate on stored events / sidecars / DB rows?* → SDM
+- *Does it interpret bytes into structured data, with no I/O of its own?* → codec lib
+
+Terra-View is downstream of SDM for data, and (per the roadmap) will
+eventually invoke into SFM's device-control endpoints to provide a
+"connect to unit" experience.
+
+---
+
 ## Project layout
 
 ```
diff --git a/README.md b/README.md
index c057f68..6433158 100644
--- a/README.md
+++ b/README.md
@@ -459,6 +459,72 @@ Use **com0com** or **VSPD** to create the virtual COM pair on Windows.
 
 ## Roadmap (Future)
 
+### Strategic direction — where this is going
+
+seismo-relay is being built as a **suite of cooperating components**
+that together replace and improve on Blastware's role.  Three logical
+tiers:
+
+1. **SFM** (device-side) — owns the active connection to a physical
+   unit.  Today: `minimateplus/`, `/device/*` HTTP endpoints,
+   `seismo_lab.py`.  Future: live Thor / Micromate support.
+2. **SDM** (data-side) — owns the database, waveform store, ingest
+   pipelines, and the read-API that Terra-View consumes.  Today this
+   code lives under `sfm/` for historical reasons; the role has
+   migrated and the eventual rename is on the long-tail cleanup list.
+3. **Codec library** — pure data-interpretation: `minimateplus/*_codec.py`,
+   `bw_ascii_report.py`, `micromate/idf_*.py`.  Used by both SFM and
+   SDM, depends on neither.
+
+Terra-View is downstream of SDM for fleet listings, event detail, etc.
+The long-term vision adds a **second link** from Terra-View → SFM for
+direct device interaction (see below).
+
+The codec work in this repo isn't trying to replace BW's network
+layer — BW's ACH file forwarding and Thor's IDF call-home are
+battle-tested.  The value is in the receiving and processing side: turn
+the stream of binary+ASCII pairs into something users can search,
+filter, alert on, and report from.
+
+### Terra-View ↔ SFM device control (the long-term vision)
+
+Today Terra-View only reads from SDM (event listings, dashboards,
+project reports).  When a unit goes missing — operator notices in the
+Terra-View dashboard — there's no way to *do* anything from the UI.
+The path of least resistance is to RDP into a Windows box and open
+Blastware, which defeats the purpose of having Terra-View.
+
+Target experience:
+- Operator notices a unit in Terra-View dashboard hasn't called in.
+- Clicks unit detail → "Connect to Device" button.
+- Terra-View opens an embedded view (modal or side-panel) that talks
+  to SFM's `/device/*` endpoints over the network.
+- Live view: device clock, battery, memory, current monitor status.
+- Actions: start/stop monitoring, push compliance config changes, pull
+  fresh events, run a sensor self-check, change call-home settings.
+- Audit log: every connect / action recorded in SDM for the unit
+  history.
+
+Implementation steps (concrete):
+- [ ] **SFM authentication & authorization layer.**  Today `/device/*`
+      endpoints are unauthenticated — anyone on the network can call
+      them.  Need at minimum a token-based auth, ideally with a "who
+      can connect to which units" mapping.  Hard prerequisite for
+      letting Terra-View users into the control surface.
+- [ ] **Terra-View "Connect to Device" entry point** on the unit
+      detail page.  Renders only when unit has connection info on file
+      and the user has permission.
+- [ ] **Embedded live-monitor view** in Terra-View — equivalent to
+      `seismo_lab.py`'s Bridge tab, but in the browser.  Polls SFM's
+      `/device/monitor/status` on an interval; sends start/stop via
+      `/device/monitor/{start,stop}`.
+- [ ] **Action history** — every connect / push / action call records
+      a row in `unit_history`, viewable on the unit detail page.
+- [ ] **Series IV live-device support in SFM** — currently `/device/*`
+      only supports MiniMate Plus.  Blocks "Connect to Device" for
+      Thor units until done.  Depends on Thor wire-protocol capture
+      and a `micromate/` parallel of the `minimateplus/` modules.
+
 ### High-impact (unblocks product features)
 
 - [ ] **Series III waveform body codec reverse-engineering.**  The 5A bulk-stream body is some kind of compressed/encoded format (not raw int16 LE as previously assumed — see §7.6.1 retraction in `docs/instantel_protocol_reference.md`).  Structural framing is ~50% decoded on branch `claude/codec-re-cBGNe` (tagged-block walker, segment counters); per-byte sample mapping is still open.  Until this lands, the in-app waveform viewer renders garbage and BW-import peak values fall back to `_peaks_from_samples()` saturation noise.  Workaround: pair every BW-imported event with its `_ASCII.TXT` so the device-authoritative peaks land in the DB regardless of codec.
-- 
2.52.0


From 35842ac50a8b225ad193f16100b3422002669f19 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Fri, 22 May 2026 18:56:22 +0000
Subject: [PATCH 14/42] backfill: overlay bw_report onto Event before DB upsert
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror what the ingest path does: BW's reported peaks (and sample_rate
/ record_time) take precedence over codec output where present.

Without this, --force backfill silently overwrites bw_report-overlaid
DB columns with codec-derived peaks.  Wrong for events where the codec
doesn't fully decode (waveform walker edge cases on SP0/SS0/SV0-style
events, histogram byte[5]!=0 sub-format that isn't yet RE'd), producing
PVS=0 on real high-amplitude events.  Bit on prod 2026-05-22 with
three top-10 waveform events ending up at PVS=0 (rolled back same day,
this fix is the proper resolution).

New helper minimateplus.event_file_io.apply_bw_report_dict_to_event
operates on the projected sidecar dict shape (the structure
_bw_report_to_dict produces, which is what gets preserved in the
sidecar).  Mirrors apply_report_to_event's semantics: only writes
fields where bw_report has a non-None value, no-ops cleanly on
empty / None input.

Dev validation against prod snapshot:
  pre  : 1839.7315 pvs_sum   356 events with DB PVS ≠ sidecar bw_report
  post : 2016.4902 pvs_sum     2 events still mismatched (both have NULL
                                timestamp + duplicate rows, edge case)

Both edge-case events DO get the correct value written by the new
backfill — their stale rows from prior backfills remain because
UNIQUE(serial, timestamp) doesn't fire on NULL.  Separate dedup
cleanup needed for those 2 events (0.014% of corpus); not blocking.

Backfill remains idempotent + bw_report preservation still passes
(0 WIPED, 0 CHANGED on the 3rd consecutive run).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/event_file_io.py | 54 ++++++++++++++++++++++++++
 scripts/backfill_sidecars.py  | 17 +++++++++
 tests/test_event_file_io.py   | 71 +++++++++++++++++++++++++++++++++++
 3 files changed, 142 insertions(+)

diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index 6e5674d..66a4b68 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -254,6 +254,60 @@ def apply_report_to_event(event: Event, report: BwAsciiReport) -> None:
         event.rectime_seconds = report.record_time_s
 
 
+def apply_bw_report_dict_to_event(event: Event, bw_report: dict) -> None:
+    """Mirror of ``apply_report_to_event`` for the projected sidecar
+    dict shape (as produced by ``_bw_report_to_dict``).
+
+    Why this exists
+    ───────────────
+    The ingest path holds a live ``BwAsciiReport`` parsed straight from
+    the ``_ASCII.TXT`` and uses ``apply_report_to_event`` to overlay
+    device-authoritative peaks onto the codec output before insert.
+
+    The backfill path doesn't have the original ``.TXT`` (it's not
+    retained in the waveform store), but it does have the preserved
+    ``bw_report`` block from the sidecar — which contains the same
+    projected fields.  Re-overlaying those during a backfill keeps the
+    DB peak columns aligned with what BW reports rather than letting
+    the codec output (which may be incomplete for unhandled formats or
+    walker edge cases) win by default.
+
+    No-ops cleanly when ``bw_report`` is ``None``, empty, or missing
+    any particular sub-field — only fields with a concrete value get
+    written.  Mirrors ``apply_report_to_event``'s "report wins where
+    present" semantics.
+    """
+    if not bw_report:
+        return
+    if event.peak_values is None:
+        event.peak_values = PeakValues()
+    pv = event.peak_values
+
+    peaks = bw_report.get("peaks") or {}
+    tran = (peaks.get("tran") or {}).get("ppv_ips")
+    vert = (peaks.get("vert") or {}).get("ppv_ips")
+    long = (peaks.get("long") or {}).get("ppv_ips")
+    if tran is not None: pv.tran = tran
+    if vert is not None: pv.vert = vert
+    if long is not None: pv.long = long
+    vs_ips = (peaks.get("vector_sum") or {}).get("ips")
+    if vs_ips is not None:
+        pv.peak_vector_sum = vs_ips
+
+    mic = bw_report.get("mic") or {}
+    pspl = mic.get("pspl_dbl")
+    if pspl is not None and pspl > 0:
+        pv.micl = _dbl_to_psi(pspl)
+
+    rec = bw_report.get("recording") or {}
+    sr = rec.get("sample_rate_sps")
+    if sr:
+        event.sample_rate = sr
+    rt = rec.get("record_time_s")
+    if rt is not None:
+        event.rectime_seconds = rt
+
+
 def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict:
     if pi is None:
         return {
diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index bbe0d0f..9c4bf5d 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -309,6 +309,23 @@ def main(argv=None) -> int:
                     except Exception:
                         pass
 
+                # Overlay BW ASCII report fields onto the rebuilt Event
+                # BEFORE the sidecar + DB write.  Mirrors what the ingest
+                # path does — BW's reported peaks (and sample_rate /
+                # record_time) win over codec output where present.
+                #
+                # Without this step, --force backfill silently overwrites
+                # the bw_report-overlaid DB columns with codec-derived
+                # values, which is wrong for events the codec doesn't
+                # fully decode (e.g. waveform walker edge cases on
+                # SP0/SS0/SV0-style events, or histogram sub-formats with
+                # byte[5]!=0 that aren't yet RE'd).  Net effect was PVS=0
+                # on three top-10 events on 2026-05-22.
+                if preserved_bw_report:
+                    event_file_io.apply_bw_report_dict_to_event(
+                        ev, preserved_bw_report,
+                    )
+
                 sidecar = event_file_io.event_to_sidecar_dict(
                     ev,
                     serial=serial,
diff --git a/tests/test_event_file_io.py b/tests/test_event_file_io.py
index 6e08dae..0e043e8 100644
--- a/tests/test_event_file_io.py
+++ b/tests/test_event_file_io.py
@@ -529,6 +529,77 @@ def test_save_imported_bw_round_trip(tmp_path: Path):
     assert stored_path.read_bytes() == src.read_bytes()
 
 
+# ── apply_bw_report_dict_to_event ────────────────────────────────────────────
+
+
+def test_apply_bw_report_dict_overlays_peaks_and_recording():
+    """Verbatim mirror of the data shape produced by `_bw_report_to_dict`
+    when projecting a parsed `BwAsciiReport` into the sidecar.  Confirms
+    each field overlays onto Event correctly so the backfill path
+    matches ingest behavior."""
+    from minimateplus.models import PeakValues
+    ev = Event(index=0)
+    bw_report = {
+        "peaks": {
+            "tran":       {"ppv_ips": 9.84375},
+            "vert":       {"ppv_ips": 0.305},
+            "long":       {"ppv_ips": 0.405},
+            "vector_sum": {"ips": 14.86736},
+        },
+        "mic": {"pspl_dbl": 115.9},
+        "recording": {"sample_rate_sps": 1024, "record_time_s": 3.0},
+    }
+    event_file_io.apply_bw_report_dict_to_event(ev, bw_report)
+    assert ev.peak_values is not None
+    assert ev.peak_values.tran             == 9.84375
+    assert ev.peak_values.vert             == 0.305
+    assert ev.peak_values.long             == 0.405
+    assert ev.peak_values.peak_vector_sum  == 14.86736
+    # MicL is converted dB → psi via _dbl_to_psi — just confirm non-zero
+    assert ev.peak_values.micl is not None and ev.peak_values.micl > 0
+    assert ev.sample_rate    == 1024
+    assert ev.rectime_seconds == 3.0
+
+
+def test_apply_bw_report_dict_overwrites_codec_peaks():
+    """The whole point of this helper: bw_report wins over whatever the
+    codec produced.  This is what the 2026-05-22 prod backfill missed —
+    DB peaks got overwritten with codec output (incl. PVS=0 on the
+    three top events) when they should have stayed bw_report-overlaid."""
+    from minimateplus.models import PeakValues
+    ev = Event(index=0)
+    # Simulate codec output that's clearly wrong (incomplete decode):
+    ev.peak_values = PeakValues(
+        tran=2.09, vert=0.0, long=0.0, peak_vector_sum=0.0,
+    )
+    bw_report = {
+        "peaks": {
+            "tran":       {"ppv_ips": 9.84},
+            "vert":       {"ppv_ips": 4.95},
+            "long":       {"ppv_ips": 8.05},
+            "vector_sum": {"ips": 14.95},
+        },
+    }
+    event_file_io.apply_bw_report_dict_to_event(ev, bw_report)
+    assert ev.peak_values.tran             == 9.84
+    assert ev.peak_values.vert             == 4.95
+    assert ev.peak_values.long             == 8.05
+    assert ev.peak_values.peak_vector_sum  == 14.95
+
+
+def test_apply_bw_report_dict_no_op_on_empty():
+    """None / empty dict / missing keys should leave Event untouched."""
+    from minimateplus.models import PeakValues
+    for empty in (None, {}, {"peaks": {}}, {"peaks": {"tran": {}}}):
+        ev = Event(index=0)
+        ev.peak_values = PeakValues(tran=1.0, vert=2.0, long=3.0)
+        event_file_io.apply_bw_report_dict_to_event(ev, empty)
+        # Unchanged
+        assert ev.peak_values.tran == 1.0
+        assert ev.peak_values.vert == 2.0
+        assert ev.peak_values.long == 3.0
+
+
 if __name__ == "__main__":
     if pytest is not None:
         pytest.main([__file__, "-v"])
-- 
2.52.0


From 8710b8f327afb06c86c5fbbe2b0b35e9b89d976e Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Fri, 22 May 2026 21:02:13 +0000
Subject: [PATCH 15/42] docs: record three known issues discovered during prod
 deployment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. bw_ascii_report parser misses PPV/vector_sum fields on certain TXT
   formats (5 events in prod).  Parser extracts every OTHER field for
   the same channels — likely a regex / format mismatch specific to
   some firmware-or-event-type combination.

2. NULL-timestamp duplicate rows.  events.timestamp can come back as
   NULL when the codec can't extract a footer timestamp; UNIQUE(serial,
   timestamp) doesn't fire on NULL, so backfills create new rows
   instead of upserting.  2 affected events on prod, easy SQL cleanup.

3. Histogram body sub-format with byte[5] != 0.  ~3 events on prod
   (T190LD5Q, O121L4L1) use a histogram body the walker doesn't
   recognize.  Codec returns 0 valid blocks; DB peaks come from the
   bw_report ASCII overlay so DB columns are correct, only the .h5
   plot is empty.  Cracking the sub-format unlocks the plot.

All three are pre-existing issues that today's deployment surfaced
during validation; none are regressions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 6433158..d62c1e5 100644
--- a/README.md
+++ b/README.md
@@ -536,6 +536,7 @@ Implementation steps (concrete):
 
 ### BW ASCII report parser enhancements (built in v0.16.0)
 
+- [ ] **PPV field misses on certain TXT formats.**  Discovered 2026-05-22 during the histogram-codec backfill validation: a handful of events (5 in prod) have a `bw_report` block where `peaks.{tran,vert,long}.ppv_ips` and `peaks.vector_sum.ips` are all `None`, despite the parser correctly extracting every OTHER field for the same channels (zc_freq_hz, time_of_peak_s, peak_accel_g, peak_disp_in).  Symptom on the DB side: `peak_vector_sum=0` after a `--force` backfill that overlays from the parsed bw_report dict.  Affected events on prod include `T190LD5Q.LK0W`, `T438L713.RY0W`, `K557L3YM.OE0W`.  Root cause likely a regex or format mismatch for the "PPV" header line in those specific firmware/event-type outputs.  Once fixed, re-forwarding the events from series3-watcher will re-populate the `bw_report` blocks correctly.
 - [ ] **Histogram-specific structural fields.**  Current parser handles the shared fields (PPV, ZC Freq, sensor self-check, project) but silently drops histogram-only fields: `Histogram Start/Stop Time`, `Histogram Start/Stop Date`, `Number of Intervals`, `Interval Size`, per-channel `Peak Time` + `Peak Date` (absolute timestamps rather than the waveform's `Time of Peak` relative seconds).
 - [ ] **Histogram interval bin-table parsing.**  Trailing 792-row table (per-interval Peak/Freq per channel + MicL) in histogram TXTs is unparsed.  Probably too big for the sidecar JSON; may want a separate `.histogram.h5` companion file.
 - [ ] **`>100 Hz` value parsing.**  Histogram TXTs use `>100 Hz` for out-of-range ZC freq; current `_parse_number()` returns `None` for these (loses information).
@@ -564,3 +565,5 @@ Implementation steps (concrete):
 - [ ] Locate "Sensor Check" byte in compliance config (need capture with Disabled vs Before-monitoring).
 - [ ] Call Home — map time slots 3/4 offsets; confirm `modem_power_relay_enabled`.
 - [ ] RV55 DCD/DTR — newer RV55 firmware doesn't assert DCD by default; units don't resume monitoring after call-home disconnect (`--restart-monitoring` flag deferred).
+- [ ] **NULL-timestamp duplicate-row dedup.**  A small handful of events (2 known on prod as of 2026-05-22) have `events.timestamp IS NULL` because the codec couldn't extract a timestamp from the binary footer.  The `UNIQUE(serial, timestamp)` constraint doesn't fire on `NULL` (SQL semantics: `NULL ≠ NULL`), so every `--force` backfill INSERTs a new row instead of UPSERTing the existing one.  Cleanup: a one-shot SQL query that keeps only the newest row per `(serial, blastware_filename)` and deletes the rest.  Longer-term: extend the unique key to `(serial, COALESCE(timestamp, blastware_filename))` or reject inserts with NULL timestamp.
+- [ ] **Histogram body sub-format with `byte[5] != 0`.**  ~3 events on prod (`T190LD5Q.LD0H`, `O121L4L1.GU0H`) use a histogram body my walker doesn't recognize — the first block has `byte[5] = 0x01` or `0x07` instead of `0x00`, and the entire body lacks the `1e 0a 00 00` tail signature.  Codec returns 0 valid blocks; their DB PVS comes from the bw_report ASCII overlay (which BW computed from the same binary, so the DB columns are correct).  Only the `.h5` waveform plot is empty.  Cracking the sub-format would unlock the plot.  Needs binary+ASCII pairs from a few `byte[5]!=0` events; same RE approach as the K558 case.
-- 
2.52.0


From 460006e5cdae65338e3fc1f6b3272aa96bce60b5 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Sat, 23 May 2026 06:53:48 +0000
Subject: [PATCH 16/42] sfm: stored-event browser at /events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New standalone HTML page (sfm/event_browser.html, ~470 lines, Chart.js)
that lets you browse persisted events from the SeismoDb + WaveformStore.
Companion to the existing live-device viewer at /waveform:

  /waveform  — connect to a unit and pull events in real time
  /events    — browse events already stored in the DB

Flow:
  1. Page loads → GET /db/units → populate serial dropdown
  2. Select serial → GET /db/events?serial=X&limit=500 → event list
  3. Click event → GET /db/events/{id}/waveform.json → render

Layout is Instantel-printout-ready: channels stacked vertically in
Tran / Vert / Long / MicL order, trigger line at t=0, peak labels,
clean dark theme.  Frames the future PDF-export feature without
needing extra layout work.

Smoke-tested against the dev prod-snapshot — 4 channels render with
correct peaks for K558 events (L=0.3 in/s = the offset-fault peak
we've been chasing all week).

CHANGELOG entry added under [Unreleased] per the v0.20.0 release plan.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md           |  24 ++
 sfm/event_browser.html | 564 +++++++++++++++++++++++++++++++++++++++++
 sfm/server.py          |  16 +-
 3 files changed, 603 insertions(+), 1 deletion(-)
 create mode 100644 sfm/event_browser.html

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f2d4f95..886a0a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,30 @@ All notable changes to seismo-relay are documented here.
 
 ---
 
+## [Unreleased]
+
+### Added
+
+- **Stored-event browser** — new standalone HTML page at `GET /events` (`sfm/event_browser.html`).  Pick a serial from the unit dropdown, scroll through that unit's events (newest-first), click any event to render its decoded waveform via the existing `/db/events/{id}/waveform.json` endpoint.  Dark-themed Chart.js viewer, channels stacked vertically (Tran / Vert / Long / MicL — Instantel printout order, designed PDF-export-ready), trigger line at t=0, peak labels, search/filter, false-trigger flag honored.  Companion to the existing live-device viewer at `/waveform`; the two routes are now clearly delineated in their docstrings.
+- **Histogram body codec — uint8 peak count fix.**  Per-channel peak fields at `block[6]/[10]/[14]/[18]` are `uint8`, not `uint16 LE` spanning `block[6:8]` etc.  The original interpretation was byte-exact on the N844 fixture corpus only because every annotation byte (`block[7]/[11]/[15]/[19]`) in those fixtures was zero.  On non-N844 events with non-zero annotation bytes (observed across BE9558 Tran-drift and BE18003 Histogram+Continuous units), the old interpretation produced peaks up to 268 in/s per channel and 35× inflated PVS sums when first deployed to prod (rolled back same day; properly fixed in this release).  Cross-correlated against BW's per-interval ASCII export on K558 / T003 / N599 / N844 corpora — 100% byte-exact on T/V/L, 99%+ on M (sub-precision rounding).  Annotation byte preserved on each record as `record["annotations"]` for future RE.  Verified against ~3,500 blocks across 5 in-repo fixtures + a synthetic K558 interval-12 regression block.
+- **`apply_bw_report_dict_to_event` helper** in `minimateplus.event_file_io`.  Mirror of `apply_report_to_event` for the projected sidecar dict shape — used by the backfill path, which has the preserved `bw_report` block but not the original `.TXT` file.  BW's reported peaks (and `sample_rate` / `record_time`) now win over codec output during `--force` backfill, matching ingest-path behavior.
+- **`scripts/check_bw_report_preservation.py`** — two-step snapshot/diff tool to verify that `backfill_sidecars.py` doesn't wipe the `bw_report` block from existing sidecars.  Classifies every sidecar as PRESERVED / CHANGED / WIPED / STILL_MISSING / NEW / ADDED / REMOVED.  Exit code 1 if any WIPED or CHANGED entries are found, so it can gate a CI step or deploy script.
+
+### Fixed
+
+- **`scripts/backfill_sidecars.py` no longer wipes `bw_report`.**  Before this fix, `event_to_sidecar_dict` silently dropped the preserved `bw_report` block during every backfill, since the function only emits a `bw_report` when called with a live `BwAsciiReport` dataclass (which the backfill doesn't have — only the projected sidecar dict).  Now we read the existing sidecar's `bw_report` and overlay it onto the regenerated sidecar, alongside the existing `review` and `extensions` preservation.
+- **`scripts/backfill_sidecars.py --force` no longer overwrites BW-overlaid DB peaks with codec output.**  The backfill path now calls `apply_bw_report_dict_to_event` before the DB upsert, mirroring what the ingest path does (`/db/import/blastware_file` parses the `.TXT` into a `BwAsciiReport`, calls `apply_report_to_event`, then upserts).  Without this, events where the codec doesn't fully decode (waveform walker edge cases on SP0/SS0/SV0-style events, histogram `byte[5]!=0` sub-format) ended up with PVS=0 in the DB after a `--force` backfill; bit on prod 2026-05-22, rolled back the same day.
+- **Thor IDF files no longer attempted as BW events in backfill.**  `scripts/backfill_sidecars.py` now filters out `.IDFW` / `.IDFH` files in `_looks_like_event_file()`; they share the `.X0W` / `.X0H` suffix shape but use a separate ingest path (`WaveformStore.save_imported_idf`) and aren't decodable by `event_file_io.read_blastware_file`.
+
+### Docs
+
+- **CLAUDE.md** — added a three-tier conceptual architecture model (SFM / SDM / shared codec library) near the top of the file, with a placement rule for where new code goes.  Documents that what is conceptually SDM (database, waveform store, ingest, `/db/*` endpoints) still lives under `sfm/` for historical reasons; rename deferred until the codebase is quiet enough for a clean refactor.
+- **README.md** — added a "Strategic direction" lead-in to the Roadmap that frames seismo-relay as a suite of cooperating components (not a single app), and an explicit "Terra-View ↔ SFM device control" roadmap section with a concrete implementation checklist (auth as hard prerequisite, embedded live-monitor view, action history, Series IV live-device support).
+- **`docs/histogram_codec_re_status.md`** updated with the uint8 retraction and the annotation-byte status.
+- Three known issues recorded in the Roadmap that were discovered during prod validation: (1) `bw_ascii_report` parser misses PPV / `vector_sum` on some `.TXT` formats (5 events on prod); (2) NULL-timestamp duplicate-row dedup needed (2 events on prod); (3) histogram body sub-format with `byte[5] != 0` not yet decoded (~3 events on prod with empty `.h5` plots).
+
+---
+
 ## v0.19.0 — 2026-05-20
 
 The "device-family separation" release.  Tightens the boundary between Series III (MiniMate Plus / Blastware) and Series IV (Micromate / Thor) so the UI and storage layer dispatch deterministically by family instead of sniffing filename extensions or magnitude heuristics.
diff --git a/sfm/event_browser.html b/sfm/event_browser.html
new file mode 100644
index 0000000..dbbd734
--- /dev/null
+++ b/sfm/event_browser.html
@@ -0,0 +1,564 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>SFM Event Browser</title>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"></script>
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+
+    body {
+      background: #0d1117;
+      color: #c9d1d9;
+      font-family: 'Segoe UI', system-ui, sans-serif;
+      font-size: 13px;
+      height: 100vh;
+      display: flex;
+      flex-direction: column;
+      overflow: hidden;
+    }
+
+    header {
+      background: #161b22;
+      border-bottom: 1px solid #30363d;
+      padding: 12px 20px;
+      display: flex;
+      align-items: center;
+      gap: 16px;
+      flex-shrink: 0;
+    }
+
+    header h1 {
+      font-size: 15px;
+      font-weight: 600;
+      color: #f0f6fc;
+      white-space: nowrap;
+    }
+
+    label { color: #8b949e; font-size: 12px; }
+
+    select, input[type="text"], input[type="search"] {
+      background: #0d1117;
+      border: 1px solid #30363d;
+      border-radius: 6px;
+      color: #c9d1d9;
+      padding: 5px 8px;
+      font-size: 13px;
+    }
+    select { min-width: 140px; }
+    input[type="search"] { width: 200px; }
+    select:focus, input:focus { outline: none; border-color: #388bfd; }
+
+    button {
+      background: #1f6feb;
+      border: none;
+      border-radius: 6px;
+      color: #fff;
+      cursor: pointer;
+      font-size: 13px;
+      font-weight: 500;
+      padding: 5px 14px;
+    }
+    button:hover { background: #388bfd; }
+    button:disabled { background: #21262d; color: #484f58; cursor: not-allowed; }
+
+    #main {
+      flex: 1;
+      display: flex;
+      overflow: hidden;
+    }
+
+    /* ── Event list (left sidebar) ────────────────────────────────── */
+    #event-list-wrap {
+      width: 320px;
+      flex-shrink: 0;
+      background: #0d1117;
+      border-right: 1px solid #21262d;
+      display: flex;
+      flex-direction: column;
+    }
+
+    #event-list-header {
+      padding: 10px 14px;
+      border-bottom: 1px solid #21262d;
+      font-size: 11px;
+      color: #8b949e;
+      text-transform: uppercase;
+      letter-spacing: 0.06em;
+      display: flex;
+      justify-content: space-between;
+    }
+
+    #event-list {
+      flex: 1;
+      overflow-y: auto;
+    }
+
+    .event-row {
+      padding: 8px 14px;
+      border-bottom: 1px solid #161b22;
+      cursor: pointer;
+      transition: background 0.1s;
+    }
+    .event-row:hover { background: #161b22; }
+    .event-row.active { background: #1f3a5f; border-left: 3px solid #58a6ff; padding-left: 11px; }
+    .event-row .er-top {
+      display: flex;
+      justify-content: space-between;
+      align-items: center;
+      margin-bottom: 2px;
+    }
+    .event-row .er-ts { font-family: monospace; font-size: 12px; color: #c9d1d9; }
+    .event-row .er-pvs { font-family: monospace; font-size: 12px; color: #58a6ff; font-weight: 600; }
+    .event-row .er-meta { font-size: 11px; color: #8b949e; }
+    .event-row.false_trigger .er-pvs { color: #f85149; text-decoration: line-through; }
+
+    /* ── Main viewer (right side) ─────────────────────────────────── */
+    #viewer {
+      flex: 1;
+      display: flex;
+      flex-direction: column;
+      overflow: hidden;
+    }
+
+    #event-meta {
+      padding: 12px 20px;
+      background: #161b22;
+      border-bottom: 1px solid #21262d;
+      display: grid;
+      grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
+      gap: 8px 24px;
+      flex-shrink: 0;
+    }
+    .meta-field {
+      display: flex;
+      flex-direction: column;
+      gap: 1px;
+    }
+    .meta-field .mf-label {
+      font-size: 10px;
+      color: #484f58;
+      text-transform: uppercase;
+      letter-spacing: 0.05em;
+    }
+    .meta-field .mf-value {
+      font-family: monospace;
+      font-size: 13px;
+      color: #c9d1d9;
+    }
+    .meta-field .mf-value.highlight { color: #58a6ff; font-weight: 600; }
+
+    #charts {
+      flex: 1;
+      overflow-y: auto;
+      padding: 12px 16px;
+      display: flex;
+      flex-direction: column;
+      gap: 10px;
+    }
+    .chart-wrap {
+      background: #161b22;
+      border: 1px solid #21262d;
+      border-radius: 8px;
+      padding: 10px 12px 8px;
+    }
+    .chart-label {
+      font-size: 11px;
+      font-weight: 600;
+      letter-spacing: 0.06em;
+      text-transform: uppercase;
+      margin-bottom: 4px;
+      display: flex;
+      justify-content: space-between;
+    }
+    .chart-canvas-wrap { position: relative; height: 130px; }
+
+    .ch-tran { color: #58a6ff; }
+    .ch-vert { color: #3fb950; }
+    .ch-long { color: #d29922; }
+    .ch-micl { color: #bc8cff; }
+
+    #status-bar {
+      background: #161b22;
+      border-top: 1px solid #21262d;
+      padding: 5px 20px;
+      font-size: 12px;
+      color: #8b949e;
+      min-height: 26px;
+      flex-shrink: 0;
+    }
+    #status-bar.error { color: #f85149; }
+    #status-bar.ok    { color: #3fb950; }
+
+    #empty-state {
+      flex: 1;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      color: #484f58;
+      gap: 8px;
+    }
+    #empty-state svg { opacity: 0.3; }
+
+    .pill {
+      background: #21262d;
+      border-radius: 4px;
+      padding: 2px 8px;
+      color: #c9d1d9;
+      font-family: monospace;
+      font-size: 11px;
+      margin-left: 8px;
+    }
+  </style>
+</head>
+<body>
+
+<header>
+  <h1>SFM Event Browser</h1>
+  <label>Serial</label>
+  <select id="serial-select">
+    <option value="">Loading…</option>
+  </select>
+  <input type="search" id="event-filter" placeholder="filter events…" />
+  <span class="pill" id="count-pill">—</span>
+  <button id="reload-btn" onclick="loadSerials()" style="margin-left:auto">Reload</button>
+</header>
+
+<div id="main">
+  <div id="event-list-wrap">
+    <div id="event-list-header">
+      <span>Events</span>
+      <span id="event-list-count">—</span>
+    </div>
+    <div id="event-list"></div>
+  </div>
+
+  <div id="viewer">
+    <div id="empty-state">
+      <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+        <polyline points="22 12 18 12 15 21 9 3 6 12 2 12"/>
+      </svg>
+      <p>Select a unit and event to view its waveform.</p>
+    </div>
+    <div id="event-meta" style="display:none"></div>
+    <div id="charts" style="display:none"></div>
+  </div>
+</div>
+
+<div id="status-bar">Ready.</div>
+
+<script>
+const CHANNEL_COLORS = {
+  Tran: '#58a6ff',
+  Vert: '#3fb950',
+  Long: '#d29922',
+  MicL: '#bc8cff',
+};
+const CHANNEL_ORDER = ['Tran', 'Vert', 'Long', 'MicL'];
+
+let allEvents = [];
+let filteredEvents = [];
+let currentEventId = null;
+let charts = {};
+
+const apiBase = window.location.origin;
+
+function setStatus(msg, cls = '') {
+  const bar = document.getElementById('status-bar');
+  bar.textContent = msg;
+  bar.className = cls;
+}
+
+async function loadSerials() {
+  setStatus('Loading serials…');
+  try {
+    const r = await fetch(`${apiBase}/db/units`);
+    if (!r.ok) throw new Error(r.statusText);
+    // /db/units returns a bare list[dict], not {units:[...]}
+    const units = await r.json();
+    const sel = document.getElementById('serial-select');
+    sel.innerHTML = '';
+    if (!units || units.length === 0) {
+      sel.innerHTML = '<option value="">(no units found)</option>';
+      setStatus('No units in DB.', 'error');
+      return;
+    }
+    sel.innerHTML = '<option value="">— pick a unit —</option>' +
+      units.map(u => {
+        const n = u.total_events ?? 0;
+        return `<option value="${u.serial}">${u.serial}  (${n} events)</option>`;
+      }).join('');
+    setStatus(`Loaded ${units.length} units.`, 'ok');
+  } catch (e) {
+    setStatus(`Failed to load units: ${e.message}`, 'error');
+  }
+}
+
+async function loadEventsForSerial(serial) {
+  if (!serial) {
+    allEvents = [];
+    renderEventList();
+    return;
+  }
+  setStatus(`Loading events for ${serial}…`);
+  try {
+    const r = await fetch(`${apiBase}/db/events?serial=${encodeURIComponent(serial)}&limit=500`);
+    if (!r.ok) throw new Error(r.statusText);
+    const d = await r.json();
+    allEvents = d.events || [];
+    document.getElementById('count-pill').textContent = `${allEvents.length} events`;
+    applyFilter();
+    setStatus(`Loaded ${allEvents.length} events for ${serial}.`, 'ok');
+  } catch (e) {
+    setStatus(`Failed to load events: ${e.message}`, 'error');
+  }
+}
+
+function applyFilter() {
+  const q = document.getElementById('event-filter').value.toLowerCase().trim();
+  if (!q) {
+    filteredEvents = allEvents;
+  } else {
+    filteredEvents = allEvents.filter(ev =>
+      (ev.blastware_filename || '').toLowerCase().includes(q) ||
+      (ev.timestamp           || '').toLowerCase().includes(q) ||
+      (ev.record_type         || '').toLowerCase().includes(q) ||
+      (ev.project             || '').toLowerCase().includes(q)
+    );
+  }
+  document.getElementById('event-list-count').textContent = `${filteredEvents.length} / ${allEvents.length}`;
+  renderEventList();
+}
+
+function renderEventList() {
+  const list = document.getElementById('event-list');
+  list.innerHTML = '';
+  if (filteredEvents.length === 0) {
+    list.innerHTML = '<div style="padding:14px;color:#484f58;font-size:12px">No events.</div>';
+    return;
+  }
+  for (const ev of filteredEvents) {
+    const row = document.createElement('div');
+    row.className = 'event-row' + (ev.false_trigger ? ' false_trigger' : '');
+    if (ev.id === currentEventId) row.className += ' active';
+    const ts = (ev.timestamp || '').replace('T', ' ').replace('Z', '');
+    const pvs = ev.peak_vector_sum != null ? `${ev.peak_vector_sum.toFixed(3)} in/s` : '—';
+    row.innerHTML = `
+      <div class="er-top">
+        <span class="er-ts">${ts || '(no ts)'}</span>
+        <span class="er-pvs">${pvs}</span>
+      </div>
+      <div class="er-meta">${ev.record_type || '?'} · ${ev.blastware_filename || ev.id.slice(0,8)}</div>
+    `;
+    row.onclick = () => loadEvent(ev.id);
+    list.appendChild(row);
+  }
+}
+
+async function loadEvent(eventId) {
+  currentEventId = eventId;
+  renderEventList();
+  setStatus('Loading waveform…');
+  try {
+    const r = await fetch(`${apiBase}/db/events/${eventId}/waveform.json`);
+    if (!r.ok) {
+      if (r.status === 404) {
+        showEmpty('No waveform data for this event (codec returned no samples).');
+        return;
+      }
+      throw new Error(r.statusText);
+    }
+    const data = await r.json();
+    renderWaveform(data);
+    // Also fetch metadata from the events list for richer header
+    const ev = allEvents.find(e => e.id === eventId);
+    renderMeta(data, ev);
+    setStatus(`Event loaded.`, 'ok');
+  } catch (e) {
+    setStatus(`Failed to load event: ${e.message}`, 'error');
+    showEmpty(`Error: ${e.message}`);
+  }
+}
+
+function showEmpty(msg) {
+  document.getElementById('empty-state').style.display = 'flex';
+  document.getElementById('empty-state').querySelector('p').textContent = msg;
+  document.getElementById('event-meta').style.display = 'none';
+  document.getElementById('charts').style.display = 'none';
+  Object.values(charts).forEach(c => c.destroy());
+  charts = {};
+}
+
+function renderMeta(data, ev) {
+  const metaDiv = document.getElementById('event-meta');
+  const fields = [
+    ['Serial',      data.serial || ev?.serial || '—'],
+    ['Timestamp',   (data.timestamp || ev?.timestamp || '—').replace('T', ' ').replace('Z', '')],
+    ['Record',      data.record_type || ev?.record_type || '—'],
+    ['Sample rate', data.sample_rate ? `${data.sample_rate} sps` : '—'],
+    ['Geo range',   data.geo_range ? `${data.geo_range} (${data.geo_full_scale_ips} in/s FS)` : '—'],
+    ['Project',     ev?.project || '—'],
+    ['Location',    ev?.sensor_location || '—'],
+    ['PVS',         ev?.peak_vector_sum != null ? `${ev.peak_vector_sum.toFixed(4)} in/s` : '—'],
+  ];
+  metaDiv.innerHTML = fields.map(([l, v]) =>
+    `<div class="meta-field"><span class="mf-label">${l}</span><span class="mf-value${l === 'PVS' ? ' highlight' : ''}">${v}</span></div>`
+  ).join('');
+  metaDiv.style.display = 'grid';
+}
+
+function renderWaveform(data) {
+  document.getElementById('empty-state').style.display = 'none';
+  const chartsDiv = document.getElementById('charts');
+  chartsDiv.style.display = 'flex';
+  chartsDiv.innerHTML = '';
+  Object.values(charts).forEach(c => c.destroy());
+  charts = {};
+
+  const channels = data.channels || {};
+  const timeAxis = data.time_axis || null;  // ms relative to trigger
+  const triggerMs = data.trigger_ms ?? 0;
+
+  for (const ch of CHANNEL_ORDER) {
+    const chData = channels[ch];
+    if (!chData) continue;
+    const values = chData.values || [];
+    if (values.length === 0) {
+      // Render an empty card so user sees the channel exists but is missing
+      const wrap = document.createElement('div');
+      wrap.className = 'chart-wrap';
+      wrap.innerHTML = `
+        <div class="chart-label ch-${ch.toLowerCase()}">
+          <span>${ch}</span>
+          <span style="color:#484f58">no samples decoded</span>
+        </div>
+        <div class="chart-canvas-wrap" style="display:flex;align-items:center;justify-content:center;color:#484f58;font-size:12px">empty</div>
+      `;
+      chartsDiv.appendChild(wrap);
+      continue;
+    }
+
+    const unit = chData.unit || 'unit';
+    const peak = chData.peak;
+    const peakT = chData.peak_t_ms;
+    const peakLabel = peak != null
+      ? `peak ${(typeof peak === 'number' ? peak.toExponential(3) : peak)} ${unit}`
+        + (peakT != null ? ` @ ${peakT.toFixed(1)} ms` : '')
+      : '';
+
+    const wrap = document.createElement('div');
+    wrap.className = 'chart-wrap';
+    const lbl = document.createElement('div');
+    lbl.className = `chart-label ch-${ch.toLowerCase()}`;
+    lbl.innerHTML = `<span>${ch}</span><span style="color:#8b949e;font-weight:normal">${peakLabel}</span>`;
+    wrap.appendChild(lbl);
+
+    const canvasWrap = document.createElement('div');
+    canvasWrap.className = 'chart-canvas-wrap';
+    const canvas = document.createElement('canvas');
+    canvasWrap.appendChild(canvas);
+    wrap.appendChild(canvasWrap);
+    chartsDiv.appendChild(wrap);
+
+    // Build time labels — use server-provided time_axis if present, else derive from sample_rate
+    let times;
+    if (timeAxis && timeAxis.length === values.length) {
+      times = timeAxis;
+    } else {
+      const sr = data.sample_rate || 1024;
+      times = values.map((_, i) => (i / sr * 1000 - triggerMs));
+    }
+
+    // Downsample for rendering
+    const MAX_POINTS = 4000;
+    let rT = times, rV = values;
+    if (values.length > MAX_POINTS) {
+      const step = Math.ceil(values.length / MAX_POINTS);
+      rT = times.filter((_, i) => i % step === 0);
+      rV = values.filter((_, i) => i % step === 0);
+    }
+
+    const chart = new Chart(canvas, {
+      type: 'line',
+      data: {
+        labels: rT.map(t => (typeof t === 'number' ? t.toFixed(2) : t)),
+        datasets: [{
+          data: rV,
+          borderColor: CHANNEL_COLORS[ch],
+          borderWidth: 1,
+          pointRadius: 0,
+          tension: 0,
+        }],
+      },
+      options: {
+        animation: false,
+        responsive: true,
+        maintainAspectRatio: false,
+        plugins: {
+          legend: { display: false },
+          tooltip: {
+            mode: 'index',
+            intersect: false,
+            callbacks: {
+              title: items => `t = ${items[0].label} ms`,
+              label: item => `${ch}: ${item.raw} ${unit}`,
+            },
+          },
+        },
+        scales: {
+          x: {
+            type: 'category',
+            ticks: {
+              color: '#484f58',
+              maxTicksLimit: 10,
+              maxRotation: 0,
+              callback: (val, i) => rT[i] + ' ms',
+            },
+            grid: { color: '#21262d' },
+          },
+          y: {
+            ticks: { color: '#484f58', maxTicksLimit: 5 },
+            grid: { color: '#21262d' },
+            title: { display: true, text: unit, color: '#484f58', font: { size: 10 } },
+          },
+        },
+      },
+      plugins: [{
+        // Vertical trigger line at t=0
+        id: 'triggerLine',
+        afterDraw(chart) {
+          const ctx   = chart.ctx;
+          const xAxis = chart.scales.x;
+          const yAxis = chart.scales.y;
+          const zeroIdx = rT.findIndex(t => parseFloat(t) >= 0);
+          if (zeroIdx < 0) return;
+          const x = xAxis.getPixelForValue(zeroIdx);
+          ctx.save();
+          ctx.beginPath();
+          ctx.moveTo(x, yAxis.top);
+          ctx.lineTo(x, yAxis.bottom);
+          ctx.strokeStyle = 'rgba(248, 81, 73, 0.7)';
+          ctx.lineWidth = 1.5;
+          ctx.setLineDash([4, 3]);
+          ctx.stroke();
+          ctx.restore();
+        },
+      }],
+    });
+    charts[ch] = chart;
+  }
+}
+
+// Wire up handlers
+document.getElementById('serial-select').addEventListener('change', e => {
+  loadEventsForSerial(e.target.value);
+});
+document.getElementById('event-filter').addEventListener('input', applyFilter);
+
+// Initial load
+loadSerials();
+</script>
+</body>
+</html>
diff --git a/sfm/server.py b/sfm/server.py
index 5934cf9..dfc3b45 100644
--- a/sfm/server.py
+++ b/sfm/server.py
@@ -381,10 +381,24 @@ def webapp():
 
 @app.get("/waveform", response_class=FileResponse)
 def waveform_viewer():
-    """Serve the standalone waveform viewer."""
+    """Serve the standalone LIVE-device waveform viewer.
+
+    Talks to ``/device/*`` endpoints — for plotting events pulled from
+    a connected unit in real time.  For the stored-event browser that
+    reads from the SeismoDb + WaveformStore, see ``/events``.
+    """
     return str(Path(__file__).parent / "waveform_viewer.html")
 
 
+@app.get("/events", response_class=FileResponse)
+def event_browser():
+    """Serve the stored-event browser — pick a serial, list its events,
+    render any one's waveform from the persisted ``.h5`` via the
+    ``/db/events/{id}/waveform.json`` endpoint.  Standalone HTML +
+    Chart.js, no auth, no build step."""
+    return str(Path(__file__).parent / "event_browser.html")
+
+
 @app.get("/device/info")
 def device_info(
     port:     Optional[str] = Query(None,             description="Serial port (e.g. COM5, /dev/ttyUSB0)"),
-- 
2.52.0


From c14a8c54db13a1b5f9911e12c7283f75abed973c Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Sat, 23 May 2026 07:09:12 +0000
Subject: [PATCH 17/42] event_browser: Instantel-printout-style polish
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply the cheap visual wins from the BW Event Report layout:

  1. Channel order reversed → MicL (top), Long, Vert, Tran (bottom)
     to match the Instantel printout.
  2. Shared bottom time axis — x-axis ticks only render on the
     bottom-most data channel; other channels hide ticks so all four
     visually share one time scale.
  3. Triangle trigger markers above and below the t=0 dashed line.
  4. Horizontal zero-baseline (dotted) per channel with "0.0" label
     on the right edge — Instantel convention.
  5. "Print view" toggle that flips dark→light theme (white panels,
     light grids, dark text) so the viewer can render usefully on
     paper-style output / @media print.
  6. Per-channel PPV stats table in the metadata header, with Peak
     Vector Sum displayed prominently.
  7. Colors adjusted to approximate BW trace colors (magenta MicL,
     blue Long, green Vert, red Tran).

Future PDF-export work will reproduce the same layout server-side
once you upload a real example PDF and we pick a rendering pipeline
(weasyprint / chromium --print-to-pdf / etc.).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 221 +++++++++++++++++++++++++++++++++++------
 1 file changed, 193 insertions(+), 28 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index dbbd734..0dce1b0 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -161,7 +161,7 @@
       background: #161b22;
       border: 1px solid #21262d;
       border-radius: 8px;
-      padding: 10px 12px 8px;
+      padding: 10px 30px 8px 12px;  /* right padding leaves room for the "0.0" baseline label */
     }
     .chart-label {
       font-size: 11px;
@@ -211,6 +211,72 @@
       font-size: 11px;
       margin-left: 8px;
     }
+
+    /* Per-channel stats table in the metadata header */
+    .stats-table {
+      grid-column: 1 / -1;
+      border-collapse: collapse;
+      font-family: monospace;
+      font-size: 12px;
+      margin-top: 4px;
+    }
+    .stats-table th, .stats-table td {
+      padding: 3px 14px 3px 0;
+      text-align: left;
+      color: #c9d1d9;
+    }
+    .stats-table th {
+      color: #484f58;
+      font-size: 10px;
+      text-transform: uppercase;
+      letter-spacing: 0.05em;
+      font-weight: 500;
+    }
+
+    /* ── Print view (light theme matching the Instantel printout) ─── */
+    body.print-view {
+      background: #ffffff;
+      color: #000000;
+    }
+    body.print-view header,
+    body.print-view #event-list-wrap,
+    body.print-view #event-list-header,
+    body.print-view #event-meta,
+    body.print-view #status-bar,
+    body.print-view .chart-wrap {
+      background: #ffffff;
+      border-color: #cccccc;
+      color: #000000;
+    }
+    body.print-view .event-row { color: #000; border-bottom-color: #eee; }
+    body.print-view .event-row:hover { background: #f4f4f4; }
+    body.print-view .event-row.active {
+      background: #e6f0ff;
+      border-left-color: #1f6feb;
+    }
+    body.print-view .er-ts { color: #000; }
+    body.print-view .er-pvs { color: #003a8c; }
+    body.print-view .er-meta,
+    body.print-view #event-list-header,
+    body.print-view .meta-field .mf-label,
+    body.print-view .stats-table th {
+      color: #666;
+    }
+    body.print-view .mf-value { color: #000; }
+    body.print-view .mf-value.highlight { color: #003a8c; }
+    body.print-view label { color: #444; }
+    body.print-view input, body.print-view select {
+      background: #fff; color: #000; border-color: #ccc;
+    }
+    /* In print theme, the channel-label colors stay (they identify
+       the trace).  Only the chart panel background flips. */
+
+    @media print {
+      header, #event-list-wrap, #status-bar, button { display: none !important; }
+      body { overflow: visible; height: auto; }
+      #main, #viewer { overflow: visible; }
+      #charts { overflow: visible; }
+    }
   </style>
 </head>
 <body>
@@ -223,7 +289,8 @@
   </select>
   <input type="search" id="event-filter" placeholder="filter events…" />
   <span class="pill" id="count-pill">—</span>
-  <button id="reload-btn" onclick="loadSerials()" style="margin-left:auto">Reload</button>
+  <button id="print-btn" onclick="togglePrintView()" style="margin-left:auto;background:#21262d">Print view</button>
+  <button id="reload-btn" onclick="loadSerials()">Reload</button>
 </header>
 
 <div id="main">
@@ -250,13 +317,16 @@
 <div id="status-bar">Ready.</div>
 
 <script>
+// Channel colors and rendering order mirror Instantel's BW Event Report
+// printout: MicL at the top, Tran at the bottom.  Colors approximate
+// what BW renders (magenta mic, blue long, green vert, red tran).
 const CHANNEL_COLORS = {
-  Tran: '#58a6ff',
+  MicL: '#e066ff',
+  Long: '#3a80ff',
   Vert: '#3fb950',
-  Long: '#d29922',
-  MicL: '#bc8cff',
+  Tran: '#f85149',
 };
-const CHANNEL_ORDER = ['Tran', 'Vert', 'Long', 'MicL'];
+const CHANNEL_ORDER = ['MicL', 'Long', 'Vert', 'Tran'];
 
 let allEvents = [];
 let filteredEvents = [];
@@ -401,14 +471,48 @@ function renderMeta(data, ev) {
     ['Geo range',   data.geo_range ? `${data.geo_range} (${data.geo_full_scale_ips} in/s FS)` : '—'],
     ['Project',     ev?.project || '—'],
     ['Location',    ev?.sensor_location || '—'],
-    ['PVS',         ev?.peak_vector_sum != null ? `${ev.peak_vector_sum.toFixed(4)} in/s` : '—'],
+    ['Peak Vector Sum',
+                    ev?.peak_vector_sum != null ? `${ev.peak_vector_sum.toFixed(4)} in/s` : '—'],
   ];
-  metaDiv.innerHTML = fields.map(([l, v]) =>
-    `<div class="meta-field"><span class="mf-label">${l}</span><span class="mf-value${l === 'PVS' ? ' highlight' : ''}">${v}</span></div>`
-  ).join('');
+
+  // Per-channel stats table mirroring the printout's middle block.
+  // Pulls per-channel PPV from the events row (DB columns) and additional
+  // details (peak time, peak accel, peak displacement, sensor check) from
+  // bw_report when present.
+  const fmt = v => (v == null ? '—' : (typeof v === 'number' ? v.toFixed(3) : v));
+  const rows = [
+    ['Tran', ev?.tran_ppv],
+    ['Vert', ev?.vert_ppv],
+    ['Long', ev?.long_ppv],
+  ];
+  const statsHtml = `
+    <table class="stats-table">
+      <thead>
+        <tr><th>Channel</th><th>PPV (in/s)</th></tr>
+      </thead>
+      <tbody>
+        ${rows.map(([ch, ppv]) => `<tr><td>${ch}</td><td>${fmt(ppv)}</td></tr>`).join('')}
+        <tr><td>MicL</td><td>${fmt(ev?.mic_ppv)} psi</td></tr>
+      </tbody>
+    </table>
+  `;
+
+  metaDiv.innerHTML =
+    fields.map(([l, v]) =>
+      `<div class="meta-field"><span class="mf-label">${l}</span><span class="mf-value${l === 'Peak Vector Sum' ? ' highlight' : ''}">${v}</span></div>`
+    ).join('') + statsHtml;
   metaDiv.style.display = 'grid';
 }
 
+function togglePrintView() {
+  document.body.classList.toggle('print-view');
+  // Force chart redraw so axis/grid colors are re-evaluated against the
+  // new background.  Easiest: re-render the current event.
+  if (currentEventId) {
+    loadEvent(currentEventId);
+  }
+}
+
 function renderWaveform(data) {
   document.getElementById('empty-state').style.display = 'none';
   const chartsDiv = document.getElementById('charts');
@@ -420,6 +524,15 @@ function renderWaveform(data) {
   const channels = data.channels || {};
   const timeAxis = data.time_axis || null;  // ms relative to trigger
   const triggerMs = data.trigger_ms ?? 0;
+  const isPrintMode = document.body.classList.contains('print-view');
+
+  // Which channels actually have data → determines which one renders the
+  // shared x-axis at the bottom (Instantel printout has the time scale
+  // only on the bottom-most chart).
+  const channelsWithData = CHANNEL_ORDER.filter(ch =>
+    channels[ch] && (channels[ch].values || []).length > 0
+  );
+  const lastDataCh = channelsWithData[channelsWithData.length - 1];
 
   for (const ch of CHANNEL_ORDER) {
     const chData = channels[ch];
@@ -447,6 +560,9 @@ function renderWaveform(data) {
       ? `peak ${(typeof peak === 'number' ? peak.toExponential(3) : peak)} ${unit}`
         + (peakT != null ? ` @ ${peakT.toFixed(1)} ms` : '')
       : '';
+    // Hide x-axis on every chart except the bottom-most data channel —
+    // gives the "single shared time axis" feel of the BW printout.
+    const showXAxis = (ch === lastDataCh);
 
     const wrap = document.createElement('div');
     wrap.className = 'chart-wrap';
@@ -510,40 +626,89 @@ function renderWaveform(data) {
         scales: {
           x: {
             type: 'category',
+            display: showXAxis,
             ticks: {
-              color: '#484f58',
+              color: isPrintMode ? '#666' : '#484f58',
               maxTicksLimit: 10,
               maxRotation: 0,
               callback: (val, i) => rT[i] + ' ms',
             },
-            grid: { color: '#21262d' },
+            grid: { color: isPrintMode ? '#e0e0e0' : '#21262d', drawTicks: showXAxis },
           },
           y: {
-            ticks: { color: '#484f58', maxTicksLimit: 5 },
-            grid: { color: '#21262d' },
-            title: { display: true, text: unit, color: '#484f58', font: { size: 10 } },
+            ticks: { color: isPrintMode ? '#666' : '#484f58', maxTicksLimit: 5 },
+            grid: { color: isPrintMode ? '#e0e0e0' : '#21262d' },
+            title: { display: true, text: unit,
+                     color: isPrintMode ? '#666' : '#484f58', font: { size: 10 } },
           },
         },
       },
       plugins: [{
-        // Vertical trigger line at t=0
-        id: 'triggerLine',
+        // Trigger line @ t=0 + triangle markers above/below + "0.0"
+        // baseline label on the right edge.  Matches the Instantel
+        // BW Event Report printout style.
+        id: 'instantelOverlays',
         afterDraw(chart) {
           const ctx   = chart.ctx;
           const xAxis = chart.scales.x;
           const yAxis = chart.scales.y;
+          const fgPrim = isPrintMode ? '#000' : '#c9d1d9';
+          const fgTrigger = '#f85149';
+
+          // Dashed vertical trigger line at t=0
           const zeroIdx = rT.findIndex(t => parseFloat(t) >= 0);
-          if (zeroIdx < 0) return;
-          const x = xAxis.getPixelForValue(zeroIdx);
-          ctx.save();
-          ctx.beginPath();
-          ctx.moveTo(x, yAxis.top);
-          ctx.lineTo(x, yAxis.bottom);
-          ctx.strokeStyle = 'rgba(248, 81, 73, 0.7)';
-          ctx.lineWidth = 1.5;
-          ctx.setLineDash([4, 3]);
-          ctx.stroke();
-          ctx.restore();
+          if (zeroIdx >= 0) {
+            const x = xAxis.getPixelForValue(zeroIdx);
+            ctx.save();
+            ctx.beginPath();
+            ctx.moveTo(x, yAxis.top);
+            ctx.lineTo(x, yAxis.bottom);
+            ctx.strokeStyle = isPrintMode ? '#cc0000' : 'rgba(248, 81, 73, 0.8)';
+            ctx.lineWidth = 1.2;
+            ctx.setLineDash([4, 3]);
+            ctx.stroke();
+            ctx.restore();
+
+            // Triangles above and below the chart at the trigger column
+            ctx.save();
+            ctx.fillStyle = fgTrigger;
+            ctx.beginPath();  // top triangle pointing down
+            ctx.moveTo(x - 5, yAxis.top - 8);
+            ctx.lineTo(x + 5, yAxis.top - 8);
+            ctx.lineTo(x,     yAxis.top - 1);
+            ctx.closePath();
+            ctx.fill();
+            ctx.beginPath();  // bottom triangle pointing up
+            ctx.moveTo(x - 5, yAxis.bottom + 8);
+            ctx.lineTo(x + 5, yAxis.bottom + 8);
+            ctx.lineTo(x,     yAxis.bottom + 1);
+            ctx.closePath();
+            ctx.fill();
+            ctx.restore();
+          }
+
+          // "0.0" baseline label on the right edge — printout convention.
+          // Position vertically at the zero-amplitude level.
+          const zeroY = yAxis.getPixelForValue(0);
+          if (zeroY >= yAxis.top && zeroY <= yAxis.bottom) {
+            ctx.save();
+            ctx.strokeStyle = isPrintMode ? '#aaa' : '#30363d';
+            ctx.lineWidth = 0.8;
+            ctx.setLineDash([2, 2]);
+            ctx.beginPath();
+            ctx.moveTo(xAxis.left, zeroY);
+            ctx.lineTo(xAxis.right, zeroY);
+            ctx.stroke();
+            ctx.restore();
+
+            ctx.save();
+            ctx.fillStyle = fgPrim;
+            ctx.font = '11px monospace';
+            ctx.textAlign = 'left';
+            ctx.textBaseline = 'middle';
+            ctx.fillText('0.0', xAxis.right + 6, zeroY);
+            ctx.restore();
+          }
         },
       }],
     });
-- 
2.52.0


From fd0e28657d819868e82869bcf5e1adb17b4ee0e3 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Sat, 23 May 2026 19:39:18 +0000
Subject: [PATCH 18/42] sfm_webapp: default to Database view + sortable columns
 + inline waveform plot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three UX upgrades to the main SFM webapp at /, all reinforcing the
'browse stored events' flow as the primary entry point:

1. Default section is now Database, not Live Device.  Most users land
   here to look at stored events; Live Device is opt-in (click the tab
   to talk to a unit).  Initial history + units fetch fires on first
   paint so the table is populated when the page loads.

2. History table columns are sortable.  Click any header to sort:
   timestamp, serial, per-channel PPV (Tran/Vert/Long), PVS, mic dB(L),
   project, client, type, key.  Default direction varies by column type
   (desc for numbers + timestamps, asc for text).  Sort arrows appear
   in the active column header.  Headers are sticky so they stay
   visible while scrolling.

3. Click-event-to-see-waveform.  The existing sidecar review modal now
   renders the 4-channel waveform plot inline at the top, fetched from
   /db/events/{id}/waveform.json in parallel with the sidecar fetch.
   Channels stacked MicL / Long / Vert / Tran (Instantel printout
   order), shared bottom time axis, dashed trigger line + triangle
   markers at t=0, zero baseline with "0.0" label on the right edge,
   peak callouts per channel.  Charts cleaned up on modal close.

Resolves the "where is the viewer" surprise — operators no longer need
to know about the /events route to see waveforms.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md        |   3 +-
 sfm/sfm_webapp.html | 339 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 312 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 886a0a8..03bf500 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,8 @@ All notable changes to seismo-relay are documented here.
 
 ### Added
 
-- **Stored-event browser** — new standalone HTML page at `GET /events` (`sfm/event_browser.html`).  Pick a serial from the unit dropdown, scroll through that unit's events (newest-first), click any event to render its decoded waveform via the existing `/db/events/{id}/waveform.json` endpoint.  Dark-themed Chart.js viewer, channels stacked vertically (Tran / Vert / Long / MicL — Instantel printout order, designed PDF-export-ready), trigger line at t=0, peak labels, search/filter, false-trigger flag honored.  Companion to the existing live-device viewer at `/waveform`; the two routes are now clearly delineated in their docstrings.
+- **SFM webapp now opens to Database view by default** and the History table is fully interactive.  Click any column header to sort ascending / descending (timestamp, serial, per-channel PPV, PVS, mic dB(L), project, client, record type, key — all sortable).  Click any event row to open the event modal, which now renders a **4-channel waveform plot inline** (MicL / Long / Vert / Tran stacked, Instantel-printout order) alongside the existing sidecar review fields.  Headers are sticky so the columns stay visible while scrolling long event lists.  No more "where is the viewer" — pick a unit from the filter dropdown, scan the table, click the event, see the waveform.
+- **Stored-event browser** — new standalone HTML page at `GET /events` (`sfm/event_browser.html`).  Pick a serial from the unit dropdown, scroll through that unit's events (newest-first), click any event to render its decoded waveform via the existing `/db/events/{id}/waveform.json` endpoint.  Dark-themed Chart.js viewer, channels stacked vertically (MicL / Long / Vert / Tran — Instantel printout order, designed PDF-export-ready), trigger line at t=0, peak labels, search/filter, false-trigger flag honored.  Companion to the existing live-device viewer at `/waveform`; the two routes are now clearly delineated in their docstrings.  The webapp's inline plot at `/` is the primary path; `/events` remains a useful diagnostic when you want just a viewer.
 - **Histogram body codec — uint8 peak count fix.**  Per-channel peak fields at `block[6]/[10]/[14]/[18]` are `uint8`, not `uint16 LE` spanning `block[6:8]` etc.  The original interpretation was byte-exact on the N844 fixture corpus only because every annotation byte (`block[7]/[11]/[15]/[19]`) in those fixtures was zero.  On non-N844 events with non-zero annotation bytes (observed across BE9558 Tran-drift and BE18003 Histogram+Continuous units), the old interpretation produced peaks up to 268 in/s per channel and 35× inflated PVS sums when first deployed to prod (rolled back same day; properly fixed in this release).  Cross-correlated against BW's per-interval ASCII export on K558 / T003 / N599 / N844 corpora — 100% byte-exact on T/V/L, 99%+ on M (sub-precision rounding).  Annotation byte preserved on each record as `record["annotations"]` for future RE.  Verified against ~3,500 blocks across 5 in-repo fixtures + a synthetic K558 interval-12 regression block.
 - **`apply_bw_report_dict_to_event` helper** in `minimateplus.event_file_io`.  Mirror of `apply_report_to_event` for the projected sidecar dict shape — used by the backfill path, which has the preserved `bw_report` block but not the original `.TXT` file.  BW's reported peaks (and `sample_rate` / `record_time`) now win over codec output during `--force` backfill, matching ingest-path behavior.
 - **`scripts/check_bw_report_preservation.py`** — two-step snapshot/diff tool to verify that `backfill_sidecars.py` doesn't wipe the `bw_report` block from existing sidecars.  Classifies every sidecar as PRESERVED / CHANGED / WIPED / STILL_MISSING / NEW / ADDED / REMOVED.  Exit code 1 if any WIPED or CHANGED entries are found, so it can gate a CI step or deploy script.
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 576ae94..6c68b38 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -499,6 +499,20 @@
       text-align: left;
       border-bottom: 1px solid var(--border);
       white-space: nowrap;
+      position: sticky;
+      top: 0;
+      z-index: 1;
+    }
+    table.db-table thead th[data-sort]:hover {
+      background: var(--border2);
+      color: var(--text);
+    }
+    table.db-table thead th .sort-arrow {
+      display: inline-block;
+      width: 10px;
+      color: var(--accent, #58a6ff);
+      font-weight: 900;
+      text-align: center;
     }
     table.db-table tbody tr { border-bottom: 1px solid var(--border2); }
     table.db-table tbody tr:last-child { border-bottom: none; }
@@ -758,7 +772,9 @@
       overflow: hidden;
       min-height: 0;
     }
-    #section-db { display: none; }
+    /* Default to Database view on page load — most users are here to
+       browse stored events, not connect to a live unit. */
+    #section-live { display: none; }
 
     /* ── Live connect bar (host/port/connect, live section only) ── */
     #live-connect-bar {
@@ -792,8 +808,8 @@
   </div>
   <div class="hdr-sep"></div>
   <div class="section-switcher">
-    <button class="section-btn active" onclick="switchSection('live')">Live Device</button>
-    <button class="section-btn"        onclick="switchSection('db')">Database</button>
+    <button class="section-btn"        onclick="switchSection('live')">Live Device</button>
+    <button class="section-btn active" onclick="switchSection('db')">Database</button>
   </div>
   <div class="hdr-sep"></div>
   <label class="force-toggle" id="force-toggle"
@@ -1224,18 +1240,18 @@
     <div class="db-table-wrap" id="hist-table-wrap" style="display:none">
       <table class="db-table" id="hist-table">
         <thead>
-          <tr>
-            <th>Timestamp</th>
-            <th>Serial</th>
-            <th>Tran (in/s)</th>
-            <th>Vert (in/s)</th>
-            <th>Long (in/s)</th>
-            <th>PVS (in/s)</th>
-            <th>Mic (dBL)</th>
-            <th>Project</th>
-            <th>Client</th>
-            <th>Type</th>
-            <th>Key</th>
+          <tr id="hist-header-row">
+            <th data-sort="timestamp">Timestamp <span class="sort-arrow"></span></th>
+            <th data-sort="serial">Serial <span class="sort-arrow"></span></th>
+            <th data-sort="tran_ppv">Tran (in/s) <span class="sort-arrow"></span></th>
+            <th data-sort="vert_ppv">Vert (in/s) <span class="sort-arrow"></span></th>
+            <th data-sort="long_ppv">Long (in/s) <span class="sort-arrow"></span></th>
+            <th data-sort="peak_vector_sum">PVS (in/s) <span class="sort-arrow"></span></th>
+            <th data-sort="mic_ppv">Mic (dBL) <span class="sort-arrow"></span></th>
+            <th data-sort="project">Project <span class="sort-arrow"></span></th>
+            <th data-sort="client">Client <span class="sort-arrow"></span></th>
+            <th data-sort="record_type">Type <span class="sort-arrow"></span></th>
+            <th data-sort="waveform_key">Key <span class="sort-arrow"></span></th>
             <th></th>
           </tr>
         </thead>
@@ -1388,7 +1404,9 @@ function deviceParams() {
 }
 
 // ── Section switching ─────────────────────────────────────────────────────────
-let currentSection = 'live';
+// Default to Database — most users land here to browse stored events.
+// Live Device is opt-in (click the tab to talk to a unit).
+let currentSection = 'db';
 
 function switchSection(name) {
   currentSection = name;
@@ -2333,6 +2351,12 @@ async function _fetchUnits() {
 }
 
 // ── History tab ────────────────────────────────────────────────────────────────
+// Module-level state for the history table — preserved across re-sorts.
+// We sort + re-render without re-fetching.
+let _histEvents = [];
+let _histSortKey = 'timestamp';
+let _histSortDir = 'desc';   // 'asc' | 'desc'
+
 async function loadHistory() {
   histLoaded = true;
   const serial  = document.getElementById('hist-serial-filter').value;
@@ -2364,10 +2388,20 @@ async function loadHistory() {
   _populateSerialDropdown('monlog-serial-filter');
   _populateSerialDropdown('sess-serial-filter');
 
-  document.getElementById('hist-count').textContent = `${events.length} event${events.length !== 1 ? 's' : ''}`;
+  _histEvents = events;
+  renderHistTable();
+}
+
+// Re-render the history table from `_histEvents` using the current sort
+// state.  Pulled out of `loadHistory` so column-header clicks can re-sort
+// in-memory without re-fetching from the server.
+function renderHistTable() {
+  const events = _histEvents;
+  document.getElementById('hist-count').textContent =
+    `${events.length} event${events.length !== 1 ? 's' : ''}`;
+
   const tbody = document.getElementById('hist-tbody');
   tbody.innerHTML = '';
-
   if (events.length === 0) {
     document.getElementById('hist-empty').style.display = 'block';
     document.getElementById('hist-table-wrap').style.display = 'none';
@@ -2376,11 +2410,31 @@ async function loadHistory() {
   document.getElementById('hist-empty').style.display = 'none';
   document.getElementById('hist-table-wrap').style.display = 'block';
 
-  for (const ev of events) {
+  // Sort in-place by current key + direction.  Nulls sink to the bottom
+  // regardless of direction.
+  const k = _histSortKey;
+  const dir = _histSortDir === 'asc' ? 1 : -1;
+  const sorted = [...events].sort((a, b) => {
+    const av = a[k], bv = b[k];
+    if (av == null && bv == null) return 0;
+    if (av == null) return 1;
+    if (bv == null) return -1;
+    if (typeof av === 'number' && typeof bv === 'number') return (av - bv) * dir;
+    return String(av).localeCompare(String(bv)) * dir;
+  });
+
+  // Update arrow indicators in the headers
+  document.querySelectorAll('#hist-header-row th[data-sort]').forEach(th => {
+    const arrow = th.querySelector('.sort-arrow');
+    if (!arrow) return;
+    arrow.textContent = th.dataset.sort === k ? (_histSortDir === 'asc' ? '↑' : '↓') : '';
+  });
+
+  for (const ev of sorted) {
     const tr = document.createElement('tr');
     const pvs = ev.peak_vector_sum;
     tr.classList.add('clickable');
-    tr.title = 'Click to review (open sidecar editor)';
+    tr.title = 'Click to view waveform + sidecar';
     tr.dataset.eventId = ev.id;
     tr.innerHTML = `
       <td>${_fmtTs(ev.timestamp)}</td>
@@ -2408,6 +2462,28 @@ async function loadHistory() {
   }
 }
 
+// Click a column header → toggle sort.  Click another → set sort to that column.
+document.addEventListener('DOMContentLoaded', () => {
+  const headerRow = document.getElementById('hist-header-row');
+  if (!headerRow) return;
+  headerRow.querySelectorAll('th[data-sort]').forEach(th => {
+    th.style.cursor = 'pointer';
+    th.style.userSelect = 'none';
+    th.addEventListener('click', () => {
+      const k = th.dataset.sort;
+      if (_histSortKey === k) {
+        _histSortDir = _histSortDir === 'asc' ? 'desc' : 'asc';
+      } else {
+        _histSortKey = k;
+        // Default direction: 'desc' for numbers + timestamps (biggest/newest first),
+        // 'asc' for text columns (alphabetical).
+        _histSortDir = ['serial','project','client','record_type','waveform_key'].includes(k) ? 'asc' : 'desc';
+      }
+      renderHistTable();
+    });
+  });
+});
+
 // ── Sidecar review modal ───────────────────────────────────────────────────────
 //
 // Opens on row click in the History table.  Loads the .sfm.json sidecar
@@ -2430,23 +2506,214 @@ async function openSidecarModal(eventId) {
   document.getElementById('sc-edit-ft').checked = false;
   document.getElementById('sc-edit-reviewer').value = '';
   document.getElementById('sc-edit-notes').value = '';
+  // Reset waveform area
+  document.getElementById('sc-waveform-status').textContent = 'Loading waveform…';
+  document.getElementById('sc-waveform-charts').innerHTML = '';
+  _destroyScCharts();
 
-  try {
-    const r = await fetch(`${api()}/db/events/${eventId}/sidecar`);
-    if (!r.ok) {
-      const e = await r.json().catch(() => ({}));
-      throw new Error(e.detail || r.statusText);
-    }
-    const data = await r.json();
+  // Sidecar + waveform fetched in parallel — neither blocks the other.
+  const sidecarP  = fetch(`${api()}/db/events/${eventId}/sidecar`)
+    .then(async r => {
+      if (!r.ok) { const e = await r.json().catch(() => ({})); throw new Error(e.detail || r.statusText); }
+      return r.json();
+    });
+  const waveformP = fetch(`${api()}/db/events/${eventId}/waveform.json`)
+    .then(async r => {
+      if (r.status === 404) return null;  // no waveform available — render empty state
+      if (!r.ok) { const e = await r.json().catch(() => ({})); throw new Error(e.detail || r.statusText); }
+      return r.json();
+    });
+
+  // Sidecar usually loads first (smaller payload).  Each one renders
+  // independently so the modal becomes useful as soon as either lands.
+  sidecarP.then(data => {
     _scCurrentSidecar = data;
     _renderSidecar(data);
     document.getElementById('sc-status').textContent = '';
-  } catch (e) {
+  }).catch(e => {
     document.getElementById('sc-status').className = 'sc-status error';
-    document.getElementById('sc-status').textContent = `Load failed: ${e.message}`;
+    document.getElementById('sc-status').textContent = `Sidecar load failed: ${e.message}`;
+  });
+
+  waveformP.then(data => {
+    if (!data) {
+      document.getElementById('sc-waveform-status').textContent = 'No waveform data for this event.';
+      return;
+    }
+    _renderScWaveform(data);
+  }).catch(e => {
+    document.getElementById('sc-waveform-status').textContent = `Waveform load failed: ${e.message}`;
+  });
+}
+
+// ── Sidecar-modal waveform plot ──────────────────────────────────────────────
+// Renders the 4-channel decoded waveform fetched from
+// /db/events/{id}/waveform.json — MicL on top, Tran on bottom (matches
+// Instantel BW Event Report layout).  Uses Chart.js (loaded at the top of
+// the page for the live-device viewer).
+const _SC_CHANNEL_COLORS = {
+  MicL: '#e066ff',
+  Long: '#3a80ff',
+  Vert: '#3fb950',
+  Tran: '#f85149',
+};
+const _SC_CHANNEL_ORDER = ['MicL', 'Long', 'Vert', 'Tran'];
+let _scCharts = {};
+
+function _destroyScCharts() {
+  Object.values(_scCharts).forEach(c => { try { c.destroy(); } catch {} });
+  _scCharts = {};
+}
+
+function _renderScWaveform(data) {
+  document.getElementById('sc-waveform-status').textContent = '';
+  const chartsDiv = document.getElementById('sc-waveform-charts');
+  chartsDiv.innerHTML = '';
+  _destroyScCharts();
+
+  const channels = data.channels || {};
+  const timeAxis = data.time_axis || null;
+  const triggerMs = data.trigger_ms ?? 0;
+
+  // Which channels have data — determines which one renders the shared bottom axis.
+  const withData = _SC_CHANNEL_ORDER.filter(ch =>
+    channels[ch] && (channels[ch].values || []).length > 0
+  );
+  const lastCh = withData[withData.length - 1];
+
+  for (const ch of _SC_CHANNEL_ORDER) {
+    const chData = channels[ch];
+    if (!chData) continue;
+    const values = chData.values || [];
+
+    const wrap = document.createElement('div');
+    wrap.style.cssText = 'background:var(--surface);border:1px solid var(--border2);border-radius:6px;padding:6px 30px 4px 10px';
+    const lbl = document.createElement('div');
+    lbl.style.cssText = `font-size:10px;font-weight:600;letter-spacing:0.05em;text-transform:uppercase;margin-bottom:2px;color:${_SC_CHANNEL_COLORS[ch]};display:flex;justify-content:space-between`;
+    const peakStr = chData.peak != null
+      ? `peak ${(typeof chData.peak === 'number' ? chData.peak.toExponential(3) : chData.peak)} ${chData.unit || ''}`
+      : '';
+    lbl.innerHTML = `<span>${ch}</span><span style="color:var(--text-dim);font-weight:normal">${peakStr}</span>`;
+    wrap.appendChild(lbl);
+
+    if (values.length === 0) {
+      const e = document.createElement('div');
+      e.style.cssText = 'height:80px;display:flex;align-items:center;justify-content:center;color:var(--text-dim);font-size:11px';
+      e.textContent = 'no samples decoded';
+      wrap.appendChild(e);
+      chartsDiv.appendChild(wrap);
+      continue;
+    }
+
+    const canvasWrap = document.createElement('div');
+    canvasWrap.style.cssText = 'position:relative;height:100px';
+    const canvas = document.createElement('canvas');
+    canvasWrap.appendChild(canvas);
+    wrap.appendChild(canvasWrap);
+    chartsDiv.appendChild(wrap);
+
+    // Build time axis.  Prefer server-provided time_axis; else derive from sample_rate.
+    let times;
+    if (timeAxis && timeAxis.length === values.length) {
+      times = timeAxis;
+    } else {
+      const sr = data.sample_rate || 1024;
+      times = values.map((_, i) => (i / sr * 1000 - triggerMs));
+    }
+
+    // Downsample for rendering when very long.
+    const MAX = 3000;
+    let rT = times, rV = values;
+    if (values.length > MAX) {
+      const step = Math.ceil(values.length / MAX);
+      rT = times.filter((_, i) => i % step === 0);
+      rV = values.filter((_, i) => i % step === 0);
+    }
+    const showX = (ch === lastCh);
+
+    _scCharts[ch] = new Chart(canvas, {
+      type: 'line',
+      data: {
+        labels: rT.map(t => (typeof t === 'number' ? t.toFixed(2) : t)),
+        datasets: [{
+          data: rV,
+          borderColor: _SC_CHANNEL_COLORS[ch],
+          borderWidth: 1,
+          pointRadius: 0,
+          tension: 0,
+        }],
+      },
+      options: {
+        animation: false, responsive: true, maintainAspectRatio: false,
+        plugins: {
+          legend: { display: false },
+          tooltip: {
+            mode: 'index', intersect: false,
+            callbacks: {
+              title: items => `t = ${items[0].label} ms`,
+              label: item => `${ch}: ${item.raw} ${chData.unit || ''}`,
+            },
+          },
+        },
+        scales: {
+          x: {
+            type: 'category', display: showX,
+            ticks: { color: '#484f58', maxTicksLimit: 8, maxRotation: 0, callback: (v, i) => rT[i] + ' ms' },
+            grid:  { color: '#21262d', drawTicks: showX },
+          },
+          y: {
+            ticks: { color: '#484f58', maxTicksLimit: 4 },
+            grid:  { color: '#21262d' },
+            title: { display: true, text: chData.unit || '', color: '#484f58', font: { size: 9 } },
+          },
+        },
+      },
+      plugins: [{
+        id: 'overlays',
+        afterDraw(chart) {
+          const ctx = chart.ctx, x = chart.scales.x, y = chart.scales.y;
+          // Dashed trigger line at t=0
+          const zi = rT.findIndex(t => parseFloat(t) >= 0);
+          if (zi >= 0) {
+            const px = x.getPixelForValue(zi);
+            ctx.save();
+            ctx.beginPath(); ctx.moveTo(px, y.top); ctx.lineTo(px, y.bottom);
+            ctx.strokeStyle = 'rgba(248,81,73,0.8)'; ctx.lineWidth = 1.2;
+            ctx.setLineDash([4, 3]); ctx.stroke(); ctx.restore();
+            // Triangle markers above and below the chart
+            ctx.save();
+            ctx.fillStyle = '#f85149';
+            ctx.beginPath();
+            ctx.moveTo(px - 4, y.top - 7); ctx.lineTo(px + 4, y.top - 7); ctx.lineTo(px, y.top - 1);
+            ctx.closePath(); ctx.fill();
+            ctx.beginPath();
+            ctx.moveTo(px - 4, y.bottom + 7); ctx.lineTo(px + 4, y.bottom + 7); ctx.lineTo(px, y.bottom + 1);
+            ctx.closePath(); ctx.fill();
+            ctx.restore();
+          }
+          // Zero baseline + label
+          const zy = y.getPixelForValue(0);
+          if (zy >= y.top && zy <= y.bottom) {
+            ctx.save();
+            ctx.strokeStyle = '#30363d'; ctx.lineWidth = 0.8;
+            ctx.setLineDash([2, 2]);
+            ctx.beginPath(); ctx.moveTo(x.left, zy); ctx.lineTo(x.right, zy); ctx.stroke();
+            ctx.restore();
+            ctx.save();
+            ctx.fillStyle = '#c9d1d9'; ctx.font = '10px monospace';
+            ctx.textAlign = 'left'; ctx.textBaseline = 'middle';
+            ctx.fillText('0.0', x.right + 6, zy);
+            ctx.restore();
+          }
+        },
+      }],
+    });
   }
 }
 
+// Make sure charts get cleaned up when the modal closes.
+function _scCleanupOnClose() { _destroyScCharts(); }
+
 function _renderSidecar(data) {
   const ev   = data.event        || {};
   const pv   = data.peak_values  || {};
@@ -2512,6 +2779,7 @@ function closeSidecarModal() {
   document.getElementById('sc-overlay').classList.remove('visible');
   _scCurrentEventId = null;
   _scCurrentSidecar = null;
+  _destroyScCharts();
 }
 
 function onSidecarOverlayClick(e) {
@@ -2722,6 +2990,13 @@ document.addEventListener('keydown', e => {
 // hit localhost:8200, 10.0.0.44:8200, or anything else.
 document.getElementById('api-base').value = window.location.origin;
 
+// We default to Database view → trigger initial history + units load
+// (switchSection handles this when clicked, but we never click on first paint).
+if (currentSection === 'db') {
+  if (!histLoaded)  loadHistory();
+  if (!unitsLoaded) loadUnits();
+}
+
 // Press Enter in any live connect field to connect
 ['dev-host','dev-port'].forEach(id => {
   document.getElementById(id)?.addEventListener('keydown', e => { if (e.key === 'Enter') connectUnit(); });
@@ -2738,6 +3013,12 @@ document.getElementById('api-base').value = window.location.origin;
       <button class="sc-close" onclick="closeSidecarModal()">×</button>
     </div>
     <div class="sc-body">
+      <!-- Waveform plot — 4 channels stacked (MicL, Long, Vert, Tran) — -->
+      <div class="sc-section" id="sc-section-waveform">
+        <h4>Waveform</h4>
+        <div id="sc-waveform-status" style="color:var(--text-dim);font-size:11px;margin-bottom:6px">Loading…</div>
+        <div id="sc-waveform-charts" style="display:flex;flex-direction:column;gap:6px"></div>
+      </div>
       <div class="sc-section">
         <h4>Event</h4>
         <dl class="sc-grid">
-- 
2.52.0


From 6abfadae4f27f1d7e63e5cb512b723fd4f56b8a3 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Sat, 23 May 2026 21:58:20 +0000
Subject: [PATCH 19/42] viewers: render pre-trigger samples (time_axis is
 metadata, not an array)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /db/events/{id}/waveform.json endpoint returns `time_axis` as a
metadata object — {sample_rate, pretrig_samples, t0_ms, dt_ms,
n_samples, total_samples, rectime_seconds} — not a per-sample times
array.  Both viewers (sfm_webapp.html sidecar modal + event_browser.html)
were treating it as an array, silently falling back to a derived path
that ignored pretrig entirely and started the time axis at 0.

Symptom: trigger line drawn at the very left edge of every chart, no
visible "leading up to the event" samples even though they're in the
decoded data.

Fix: read time_axis.t0_ms (negative when pretrig samples exist),
time_axis.dt_ms, build per-sample times as `t0_ms + i * dt_ms`.  Trigger
line lands at sample where t crosses 0; pretrig samples render at
negative t to the left of it.

Confirmed on a K558 event with 208 pretrig samples + 2 sec rectime at
1024 sps — time axis now spans -203 ms to +2046 ms, trigger line at
~9% from the left edge as expected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 19 +++++++++----------
 sfm/sfm_webapp.html    | 20 ++++++++++----------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 0dce1b0..c3b7516 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -522,8 +522,13 @@ function renderWaveform(data) {
   charts = {};
 
   const channels = data.channels || {};
-  const timeAxis = data.time_axis || null;  // ms relative to trigger
-  const triggerMs = data.trigger_ms ?? 0;
+  // time_axis is METADATA from sfm.plot.v1 — sample_rate, pretrig_samples,
+  // t0_ms (first-sample time relative to trigger; negative when pretrig
+  // exists), dt_ms.  Trigger is at t=0 by convention.
+  const ta    = data.time_axis || {};
+  const sr    = ta.sample_rate || 1024;
+  const dtMs  = ta.dt_ms || (1000.0 / sr);
+  const t0Ms  = ta.t0_ms != null ? ta.t0_ms : 0;
   const isPrintMode = document.body.classList.contains('print-view');
 
   // Which channels actually have data → determines which one renders the
@@ -578,14 +583,8 @@ function renderWaveform(data) {
     wrap.appendChild(canvasWrap);
     chartsDiv.appendChild(wrap);
 
-    // Build time labels — use server-provided time_axis if present, else derive from sample_rate
-    let times;
-    if (timeAxis && timeAxis.length === values.length) {
-      times = timeAxis;
-    } else {
-      const sr = data.sample_rate || 1024;
-      times = values.map((_, i) => (i / sr * 1000 - triggerMs));
-    }
+    // Per-sample time in ms relative to trigger.  Negative for pre-trigger samples.
+    const times = values.map((_, i) => t0Ms + i * dtMs);
 
     // Downsample for rendering
     const MAX_POINTS = 4000;
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 6c68b38..df23b3a 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2572,8 +2572,14 @@ function _renderScWaveform(data) {
   _destroyScCharts();
 
   const channels = data.channels || {};
-  const timeAxis = data.time_axis || null;
-  const triggerMs = data.trigger_ms ?? 0;
+  // time_axis is METADATA, not an array — it carries sample_rate,
+  // pretrig_samples, t0_ms (first-sample time relative to trigger,
+  // negative when pretrig samples exist), and dt_ms.  Trigger is at
+  // t=0 by convention.
+  const ta       = data.time_axis || {};
+  const sr       = ta.sample_rate || 1024;
+  const dtMs     = ta.dt_ms || (1000.0 / sr);
+  const t0Ms     = ta.t0_ms != null ? ta.t0_ms : 0;
 
   // Which channels have data — determines which one renders the shared bottom axis.
   const withData = _SC_CHANNEL_ORDER.filter(ch =>
@@ -2612,14 +2618,8 @@ function _renderScWaveform(data) {
     wrap.appendChild(canvasWrap);
     chartsDiv.appendChild(wrap);
 
-    // Build time axis.  Prefer server-provided time_axis; else derive from sample_rate.
-    let times;
-    if (timeAxis && timeAxis.length === values.length) {
-      times = timeAxis;
-    } else {
-      const sr = data.sample_rate || 1024;
-      times = values.map((_, i) => (i / sr * 1000 - triggerMs));
-    }
+    // Per-sample time in ms relative to trigger.  Negative for pre-trigger samples.
+    const times = values.map((_, i) => t0Ms + i * dtMs);
 
     // Downsample for rendering when very long.
     const MAX = 3000;
-- 
2.52.0


From 784f2cca361840adcb50de06467b0f4117e869a5 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Sun, 24 May 2026 19:54:04 +0000
Subject: [PATCH 20/42] viewers: decimal peak labels + bar chart for histograms
 + clean x-axis ticks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three polish fixes spotted in the first prod screenshot of the inline
event-modal waveform plot:

1. Peak labels were rendering as "PEAK 2.500E-2 IN/S" because of a
   blanket toExponential(3) call.  New _fmtPeak() formatter picks
   decimal with adaptive precision for normal-range values (0.0001 to
   10000) and falls back to scientific only for truly extreme
   magnitudes.  Same value now reads "peak 0.0250 in/s".

2. Histogram events were being plotted as connected line charts, but
   histograms are per-INTERVAL peaks (one bar per minute, typically),
   not per-sample waveforms.  Now: detect histogram via record_type,
   render as a tight bar graph (bars touch), suppress the trigger line
   + zero baseline overlays (no trigger event on a histogram), and
   label the x-axis with interval number instead of milliseconds.

3. X-axis tick labels were displaying as "11.7187040000000002 ms"
   because the callback used the raw float, not the formatted label.
   Snap to 1 decimal place (or integer for whole-number values like
   histogram intervals).

Applied to both the inline modal plot in sfm_webapp.html and the
standalone /events viewer in event_browser.html — they share the same
data shape and presentation conventions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 67 +++++++++++++++++++++++++++++++-------
 sfm/sfm_webapp.html    | 73 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 116 insertions(+), 24 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index c3b7516..357e718 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -328,6 +328,23 @@ const CHANNEL_COLORS = {
 };
 const CHANNEL_ORDER = ['MicL', 'Long', 'Vert', 'Tran'];
 
+// Adaptive decimal formatter — scientific notation only for truly extreme
+// values.  Normal-range peaks render as plain decimals with sensible
+// precision (was previously forcing toExponential(3) which produced ugly
+// "2.500E-2 IN/S" labels).
+function _fmtPeak(v, unit) {
+  if (v == null || (typeof v === 'number' && !isFinite(v))) return '';
+  if (typeof v !== 'number') return String(v) + (unit ? ' ' + unit : '');
+  if (v === 0) return '0' + (unit ? ' ' + unit : '');
+  const a = Math.abs(v);
+  const u = unit ? ' ' + unit : '';
+  if (a >= 0.0001 && a < 10000) {
+    const d = a >= 100 ? 1 : a >= 10 ? 2 : a >= 1 ? 3 : a >= 0.1 ? 4 : 5;
+    return v.toFixed(d) + u;
+  }
+  return v.toExponential(2) + u;
+}
+
 let allEvents = [];
 let filteredEvents = [];
 let currentEventId = null;
@@ -530,6 +547,10 @@ function renderWaveform(data) {
   const dtMs  = ta.dt_ms || (1000.0 / sr);
   const t0Ms  = ta.t0_ms != null ? ta.t0_ms : 0;
   const isPrintMode = document.body.classList.contains('print-view');
+  // Histograms record per-interval peaks (typically 1 per minute/5-min),
+  // not per-sample waveforms.  Render as a tight bar graph instead of a
+  // line plot — matches the BW Event Report's histogram presentation.
+  const isHistogram = String(data.record_type || '').toLowerCase().includes('histogram');
 
   // Which channels actually have data → determines which one renders the
   // shared x-axis at the bottom (Instantel printout has the time scale
@@ -562,8 +583,8 @@ function renderWaveform(data) {
     const peak = chData.peak;
     const peakT = chData.peak_t_ms;
     const peakLabel = peak != null
-      ? `peak ${(typeof peak === 'number' ? peak.toExponential(3) : peak)} ${unit}`
-        + (peakT != null ? ` @ ${peakT.toFixed(1)} ms` : '')
+      ? `peak ${_fmtPeak(peak, unit)}`
+        + (!isHistogram && peakT != null ? ` @ ${peakT.toFixed(1)} ms` : '')
       : '';
     // Hide x-axis on every chart except the bottom-most data channel —
     // gives the "single shared time axis" feel of the BW printout.
@@ -583,8 +604,12 @@ function renderWaveform(data) {
     wrap.appendChild(canvasWrap);
     chartsDiv.appendChild(wrap);
 
-    // Per-sample time in ms relative to trigger.  Negative for pre-trigger samples.
-    const times = values.map((_, i) => t0Ms + i * dtMs);
+    // Waveform: per-sample time in ms relative to trigger (negative for pretrig).
+    // Histogram: interval index (1..N); sample_rate-based time math doesn't
+    // apply to per-interval peaks.
+    const times = isHistogram
+      ? values.map((_, i) => i + 1)
+      : values.map((_, i) => t0Ms + i * dtMs);
 
     // Downsample for rendering
     const MAX_POINTS = 4000;
@@ -595,11 +620,26 @@ function renderWaveform(data) {
       rV = values.filter((_, i) => i % step === 0);
     }
 
+    // Tick formatter — round to 1 decimal so we don't get
+    // "11.7187040000000002 ms" garbage from floating-point accumulation.
+    const xAxisUnit = isHistogram ? '' : ' ms';
+    const fmtTick = i => {
+      const v = rT[i];
+      if (typeof v !== 'number') return String(v) + xAxisUnit;
+      return (Number.isInteger(v) ? String(v) : v.toFixed(1)) + xAxisUnit;
+    };
+
     const chart = new Chart(canvas, {
-      type: 'line',
+      type: isHistogram ? 'bar' : 'line',
       data: {
-        labels: rT.map(t => (typeof t === 'number' ? t.toFixed(2) : t)),
-        datasets: [{
+        labels: rT.map(t => (typeof t === 'number' ? (Number.isInteger(t) ? String(t) : t.toFixed(2)) : t)),
+        datasets: isHistogram ? [{
+          data: rV,
+          backgroundColor: CHANNEL_COLORS[ch],
+          borderWidth: 0,
+          barPercentage: 1.0,
+          categoryPercentage: 1.0,  // bars touch — tight bargraph
+        }] : [{
           data: rV,
           borderColor: CHANNEL_COLORS[ch],
           borderWidth: 1,
@@ -617,8 +657,10 @@ function renderWaveform(data) {
             mode: 'index',
             intersect: false,
             callbacks: {
-              title: items => `t = ${items[0].label} ms`,
-              label: item => `${ch}: ${item.raw} ${unit}`,
+              title: items => isHistogram
+                ? `interval ${items[0].label}`
+                : `t = ${items[0].label} ms`,
+              label: item => `${ch}: ${_fmtPeak(item.raw, unit)}`,
             },
           },
         },
@@ -630,7 +672,7 @@ function renderWaveform(data) {
               color: isPrintMode ? '#666' : '#484f58',
               maxTicksLimit: 10,
               maxRotation: 0,
-              callback: (val, i) => rT[i] + ' ms',
+              callback: (val, i) => fmtTick(i),
             },
             grid: { color: isPrintMode ? '#e0e0e0' : '#21262d', drawTicks: showXAxis },
           },
@@ -642,10 +684,11 @@ function renderWaveform(data) {
           },
         },
       },
-      plugins: [{
+      plugins: isHistogram ? [] : [{
         // Trigger line @ t=0 + triangle markers above/below + "0.0"
         // baseline label on the right edge.  Matches the Instantel
-        // BW Event Report printout style.
+        // BW Event Report printout style.  Skipped for histograms —
+        // they have no trigger event.
         id: 'instantelOverlays',
         afterDraw(chart) {
           const ctx   = chart.ctx;
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index df23b3a..e072566 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2560,6 +2560,23 @@ const _SC_CHANNEL_COLORS = {
 const _SC_CHANNEL_ORDER = ['MicL', 'Long', 'Vert', 'Tran'];
 let _scCharts = {};
 
+// Adaptive decimal formatter — scientific notation is reserved for truly
+// extreme values (10000+ or sub-0.0001).  Normal-range values (most peaks
+// fall here) render as decimals with sensible precision.  Replaces the
+// previous .toExponential(3) call that turned every peak into ugly "2.500E-2".
+function _fmtPeak(v, unit) {
+  if (v == null || (typeof v === 'number' && !isFinite(v))) return '';
+  if (typeof v !== 'number') return String(v) + (unit ? ' ' + unit : '');
+  if (v === 0) return '0' + (unit ? ' ' + unit : '');
+  const a = Math.abs(v);
+  const u = unit ? ' ' + unit : '';
+  if (a >= 0.0001 && a < 10000) {
+    const d = a >= 100 ? 1 : a >= 10 ? 2 : a >= 1 ? 3 : a >= 0.1 ? 4 : 5;
+    return v.toFixed(d) + u;
+  }
+  return v.toExponential(2) + u;
+}
+
 function _destroyScCharts() {
   Object.values(_scCharts).forEach(c => { try { c.destroy(); } catch {} });
   _scCharts = {};
@@ -2578,8 +2595,14 @@ function _renderScWaveform(data) {
   // t=0 by convention.
   const ta       = data.time_axis || {};
   const sr       = ta.sample_rate || 1024;
-  const dtMs     = ta.dt_ms || (1000.0 / sr);
-  const t0Ms     = ta.t0_ms != null ? ta.t0_ms : 0;
+  const dtMs    = ta.dt_ms || (1000.0 / sr);
+  const t0Ms    = ta.t0_ms != null ? ta.t0_ms : 0;
+  // Histogram events have per-interval peaks, not per-sample data.
+  // Render as bars (one per interval) instead of a connected line, and
+  // suppress trigger/zero overlays which don't apply.  X-axis becomes
+  // interval index since the sample_rate-based time math is meaningless
+  // here (each "sample" is one interval, typically 1-5 minutes long).
+  const isHistogram = String(data.record_type || '').toLowerCase().includes('histogram');
 
   // Which channels have data — determines which one renders the shared bottom axis.
   const withData = _SC_CHANNEL_ORDER.filter(ch =>
@@ -2597,7 +2620,7 @@ function _renderScWaveform(data) {
     const lbl = document.createElement('div');
     lbl.style.cssText = `font-size:10px;font-weight:600;letter-spacing:0.05em;text-transform:uppercase;margin-bottom:2px;color:${_SC_CHANNEL_COLORS[ch]};display:flex;justify-content:space-between`;
     const peakStr = chData.peak != null
-      ? `peak ${(typeof chData.peak === 'number' ? chData.peak.toExponential(3) : chData.peak)} ${chData.unit || ''}`
+      ? `peak ${_fmtPeak(chData.peak, chData.unit)}`
       : '';
     lbl.innerHTML = `<span>${ch}</span><span style="color:var(--text-dim);font-weight:normal">${peakStr}</span>`;
     wrap.appendChild(lbl);
@@ -2618,8 +2641,11 @@ function _renderScWaveform(data) {
     wrap.appendChild(canvasWrap);
     chartsDiv.appendChild(wrap);
 
-    // Per-sample time in ms relative to trigger.  Negative for pre-trigger samples.
-    const times = values.map((_, i) => t0Ms + i * dtMs);
+    // Waveform: per-sample time in ms relative to trigger (negative for pretrig).
+    // Histogram: interval index (1..N); time math doesn't apply to per-interval peaks.
+    const times = isHistogram
+      ? values.map((_, i) => i + 1)
+      : values.map((_, i) => t0Ms + i * dtMs);
 
     // Downsample for rendering when very long.
     const MAX = 3000;
@@ -2631,11 +2657,30 @@ function _renderScWaveform(data) {
     }
     const showX = (ch === lastCh);
 
+    // Tick label formatter: snap floats to 1 decimal place so we don't get
+    // "11.7187040000000002 ms" garbage from accumulated floating-point error.
+    const xAxisLabel = isHistogram ? '' : ' ms';
+    const fmtTick = i => {
+      const v = rT[i];
+      if (typeof v === 'number') {
+        // Whole numbers (intervals) → no decimals.  Sub-integer ms → 1 decimal.
+        const s = Number.isInteger(v) ? String(v) : v.toFixed(1);
+        return s + xAxisLabel;
+      }
+      return String(v) + xAxisLabel;
+    };
+
     _scCharts[ch] = new Chart(canvas, {
-      type: 'line',
+      type: isHistogram ? 'bar' : 'line',
       data: {
-        labels: rT.map(t => (typeof t === 'number' ? t.toFixed(2) : t)),
-        datasets: [{
+        labels: rT.map(t => (typeof t === 'number' ? (Number.isInteger(t) ? String(t) : t.toFixed(2)) : t)),
+        datasets: isHistogram ? [{
+          data: rV,
+          backgroundColor: _SC_CHANNEL_COLORS[ch],
+          borderWidth: 0,
+          barPercentage: 1.0,
+          categoryPercentage: 1.0,  // bars touch — "tight bargraph" look
+        }] : [{
           data: rV,
           borderColor: _SC_CHANNEL_COLORS[ch],
           borderWidth: 1,
@@ -2650,15 +2695,17 @@ function _renderScWaveform(data) {
           tooltip: {
             mode: 'index', intersect: false,
             callbacks: {
-              title: items => `t = ${items[0].label} ms`,
-              label: item => `${ch}: ${item.raw} ${chData.unit || ''}`,
+              title: items => isHistogram
+                ? `interval ${items[0].label}`
+                : `t = ${items[0].label} ms`,
+              label: item => `${ch}: ${_fmtPeak(item.raw, chData.unit)}`,
             },
           },
         },
         scales: {
           x: {
             type: 'category', display: showX,
-            ticks: { color: '#484f58', maxTicksLimit: 8, maxRotation: 0, callback: (v, i) => rT[i] + ' ms' },
+            ticks: { color: '#484f58', maxTicksLimit: 8, maxRotation: 0, callback: (v, i) => fmtTick(i) },
             grid:  { color: '#21262d', drawTicks: showX },
           },
           y: {
@@ -2668,7 +2715,9 @@ function _renderScWaveform(data) {
           },
         },
       },
-      plugins: [{
+      plugins: isHistogram ? [] : [{
+        // Trigger line + triangle markers + zero baseline — only meaningful
+        // for waveform-mode events.  Histograms have no trigger.
         id: 'overlays',
         afterDraw(chart) {
           const ctx = chart.ctx, x = chart.scales.x, y = chart.scales.y;
-- 
2.52.0


From 5d5441604bae34d7b9bd8cb00a40b27316e882d4 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Sun, 24 May 2026 20:26:23 +0000
Subject: [PATCH 21/42] viewers: symmetric Y-axis on geo waveforms + clarify
 timestamp labels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two fixes from the second screenshot review:

1. Geophone waveform Y-axis now renders SYMMETRIC around zero — zero
   line sits in the middle of the chart, signal goes both above and
   below.  Standard seismograph display convention; matches the
   Instantel printout look.  Previously Chart.js auto-scaled to the
   data range so e.g. Vert showing values from -0.005 to -0.015 had
   the zero line completely off-screen.

   Mic channel (sound pressure, always positive) keeps the default
   auto-scale anchored at zero.  Histograms (per-interval peaks, also
   always positive) likewise keep bars rising from a zero baseline.

2. Modal labels clarified to remove the 'Timestamp' vs 'Captured at'
   ambiguity:
     'Timestamp'   →  'Recorded at'         (when the seismograph
                                              recorded the event —
                                              from BW report's Event
                                              Time field)
     'Captured at' →  'Received by server at' (when our sfm-db
                                              inserted the row)
   Both have tooltips explaining the distinction.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 17 +++++++++++++++++
 sfm/sfm_webapp.html    | 25 +++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 357e718..5d7a493 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -629,6 +629,22 @@ function renderWaveform(data) {
       return (Number.isInteger(v) ? String(v) : v.toFixed(1)) + xAxisUnit;
     };
 
+    // Y-axis bounds.  Geophone waveforms render symmetric around zero
+    // (seismograph convention — zero line in the middle, signal goes
+    // up AND down).  Mic + histograms keep default auto-scale (always
+    // positive values; zero at the bottom).
+    let yBounds = {};
+    const isGeoWaveform = !isHistogram && ch !== 'MicL';
+    if (isGeoWaveform) {
+      let absMax = 0;
+      for (const v of values) {
+        const a = Math.abs(v);
+        if (a > absMax) absMax = a;
+      }
+      const padded = (absMax || 1) * 1.10;
+      yBounds = { min: -padded, max: padded };
+    }
+
     const chart = new Chart(canvas, {
       type: isHistogram ? 'bar' : 'line',
       data: {
@@ -677,6 +693,7 @@ function renderWaveform(data) {
             grid: { color: isPrintMode ? '#e0e0e0' : '#21262d', drawTicks: showXAxis },
           },
           y: {
+            ...yBounds,
             ticks: { color: isPrintMode ? '#666' : '#484f58', maxTicksLimit: 5 },
             grid: { color: isPrintMode ? '#e0e0e0' : '#21262d' },
             title: { display: true, text: unit,
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index e072566..2c4a912 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2670,6 +2670,24 @@ function _renderScWaveform(data) {
       return String(v) + xAxisLabel;
     };
 
+    // Y-axis bounds.  Convention:
+    //   - Geophones (Tran/Vert/Long) on waveform-mode events:
+    //     symmetric around zero so the zero line sits in the middle and
+    //     positive/negative excursions are visually balanced.
+    //   - Mic (always positive sound pressure) + histograms (per-interval
+    //     peaks, always positive): default auto-scale, zero at the bottom.
+    let yBounds = {};
+    const isGeoWaveform = !isHistogram && ch !== 'MicL';
+    if (isGeoWaveform) {
+      let absMax = 0;
+      for (const v of values) {
+        const a = Math.abs(v);
+        if (a > absMax) absMax = a;
+      }
+      const padded = (absMax || 1) * 1.10;
+      yBounds = { min: -padded, max: padded };
+    }
+
     _scCharts[ch] = new Chart(canvas, {
       type: isHistogram ? 'bar' : 'line',
       data: {
@@ -2709,6 +2727,7 @@ function _renderScWaveform(data) {
             grid:  { color: '#21262d', drawTicks: showX },
           },
           y: {
+            ...yBounds,
             ticks: { color: '#484f58', maxTicksLimit: 4 },
             grid:  { color: '#21262d' },
             title: { display: true, text: chData.unit || '', color: '#484f58', font: { size: 9 } },
@@ -3072,7 +3091,8 @@ if (currentSection === 'db') {
         <h4>Event</h4>
         <dl class="sc-grid">
           <dt>Serial</dt>           <dd id="sc-f-serial">—</dd>
-          <dt>Timestamp</dt>        <dd id="sc-f-ts">—</dd>
+          <dt title="When the seismograph recorded this event (from the BW report's Event Time field)">Recorded at</dt>
+                                    <dd id="sc-f-ts">—</dd>
           <dt>Record type</dt>      <dd id="sc-f-rt">—</dd>
           <dt>Sample rate</dt>      <dd id="sc-f-sr">—</dd>
           <dt>Waveform key</dt>     <dd id="sc-f-key">—</dd>
@@ -3104,7 +3124,8 @@ if (currentSection === 'db') {
           <dt id="sc-l-bwsize">File size</dt>   <dd id="sc-f-bwsize">—</dd>
           <dt id="sc-l-sha">File sha256</dt>    <dd id="sc-f-sha">—</dd>
           <dt>Source kind</dt>      <dd id="sc-f-src">—</dd>
-          <dt>Captured at</dt>      <dd id="sc-f-cap">—</dd>
+          <dt title="When our server received and stored this event (sfm-db insert time, not the recording time)">Received by server at</dt>
+                                    <dd id="sc-f-cap">—</dd>
         </dl>
       </div>
       <div class="sc-section">
-- 
2.52.0


From ed926de3f44ca868309fe917b133fecb97c1c939 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 02:30:56 +0000
Subject: [PATCH 22/42] =?UTF-8?q?viewers:=20default=20mic=20to=20dB(L)=20+?=
 =?UTF-8?q?=20add=20Mic-unit=20toggle=20(dBL=20=E2=86=94=20psi)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sidecar-modal waveform plot was rendering mic in raw psi, while the
rest of SFM (history table column, peaks block, live-device chart,
event detail modal mic field) had already converted to dB(L) — matching
the BW Event Report convention.  Unifying.

Both viewers now:
  - Default mic chart values + axis title + peak label to dB(L)
  - Provide a header toggle ("Mic: dBL" pill) to flip to psi
  - Persist the preference via localStorage (sfm_mic_unit)
  - Re-render the open chart immediately on toggle

Conversion: dBL = 20 * log10(psi / 2.9e-9), where 2.9e-9 psi is the
20 µPa reference pressure already defined for the rest of the webapp.
Non-positive psi samples (log undefined) render as null; Chart.js
handles them as gaps in line mode and missing bars in histogram mode.

Also fixes event_browser.html's stats table — the MicL row was
hard-coding "<value> psi"; now honors the same toggle.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 66 ++++++++++++++++++++++++++++++++++++++----
 sfm/sfm_webapp.html    | 55 +++++++++++++++++++++++++++++++----
 2 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 5d7a493..1ef883b 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -289,7 +289,12 @@
   </select>
   <input type="search" id="event-filter" placeholder="filter events…" />
   <span class="pill" id="count-pill">—</span>
-  <button id="print-btn" onclick="togglePrintView()" style="margin-left:auto;background:#21262d">Print view</button>
+  <button id="mic-unit-toggle" style="margin-left:auto;background:#21262d"
+          onclick="_setMicUnit(_getMicUnit() === 'dBL' ? 'psi' : 'dBL')"
+          title="Toggle mic display unit (dBL ↔ psi). Persists across page loads.">
+    Mic: dBL
+  </button>
+  <button id="print-btn" onclick="togglePrintView()" style="background:#21262d">Print view</button>
   <button id="reload-btn" onclick="loadSerials()">Reload</button>
 </header>
 
@@ -328,6 +333,29 @@ const CHANNEL_COLORS = {
 };
 const CHANNEL_ORDER = ['MicL', 'Long', 'Vert', 'Tran'];
 
+// Reference pressure for dB(L) — 20 µPa expressed in psi (≈ 2.9e-9 psi).
+const DBL_REF = 2.9e-9;
+
+// User-toggleable mic display unit: 'dBL' (default, matches BW printout
+// + the rest of SFM) or 'psi' (raw sample unit).
+function _getMicUnit() {
+  return localStorage.getItem('sfm_mic_unit') === 'psi' ? 'psi' : 'dBL';
+}
+function _setMicUnit(u) {
+  localStorage.setItem('sfm_mic_unit', u === 'psi' ? 'psi' : 'dBL');
+  _refreshMicUnitToggle();
+  if (currentEventId) loadEvent(currentEventId);
+}
+function _refreshMicUnitToggle() {
+  const b = document.getElementById('mic-unit-toggle');
+  if (b) b.textContent = `Mic: ${_getMicUnit()}`;
+}
+// psi → dB(L).  Null for non-positive (log undefined; Chart.js renders as a gap).
+function _psiToDbl(psi) {
+  if (psi == null || !(psi > 0)) return null;
+  return 20 * Math.log10(psi / DBL_REF);
+}
+
 // Adaptive decimal formatter — scientific notation only for truly extreme
 // values.  Normal-range peaks render as plain decimals with sensible
 // precision (was previously forcing toExponential(3) which produced ugly
@@ -502,6 +530,19 @@ function renderMeta(data, ev) {
     ['Vert', ev?.vert_ppv],
     ['Long', ev?.long_ppv],
   ];
+  // Mic display honors the current user preference (dBL default).
+  // mic_ppv is stored as raw psi on series3 events; convert when needed.
+  const micPsi = ev?.mic_ppv;
+  const micUnitDisplay = _getMicUnit();
+  let micStr;
+  if (micPsi == null) {
+    micStr = '—';
+  } else if (micUnitDisplay === 'dBL') {
+    const d = _psiToDbl(Number(micPsi));
+    micStr = (d != null ? d.toFixed(1) : '—') + ' dBL';
+  } else {
+    micStr = Number(micPsi).toExponential(2) + ' psi';
+  }
   const statsHtml = `
     <table class="stats-table">
       <thead>
@@ -509,7 +550,7 @@ function renderMeta(data, ev) {
       </thead>
       <tbody>
         ${rows.map(([ch, ppv]) => `<tr><td>${ch}</td><td>${fmt(ppv)}</td></tr>`).join('')}
-        <tr><td>MicL</td><td>${fmt(ev?.mic_ppv)} psi</td></tr>
+        <tr><td>MicL</td><td>${micStr}</td></tr>
       </tbody>
     </table>
   `;
@@ -560,11 +601,11 @@ function renderWaveform(data) {
   );
   const lastDataCh = channelsWithData[channelsWithData.length - 1];
 
+  const micUnit = _getMicUnit();
   for (const ch of CHANNEL_ORDER) {
     const chData = channels[ch];
     if (!chData) continue;
-    const values = chData.values || [];
-    if (values.length === 0) {
+    if ((chData.values || []).length === 0) {
       // Render an empty card so user sees the channel exists but is missing
       const wrap = document.createElement('div');
       wrap.className = 'chart-wrap';
@@ -579,9 +620,19 @@ function renderWaveform(data) {
       continue;
     }
 
-    const unit = chData.unit || 'unit';
-    const peak = chData.peak;
+    // Mic channel: convert from raw psi to dB(L) when the user prefers dBL
+    // (the default).  We mutate `values`, `peak`, and `unit` locally so the
+    // chart datasets + axis title + tooltip + peak label all stay aligned.
+    let values = chData.values || [];
+    let unit  = chData.unit || 'unit';
+    let peak  = chData.peak;
     const peakT = chData.peak_t_ms;
+    if (ch === 'MicL' && unit === 'psi' && micUnit === 'dBL') {
+      values = values.map(_psiToDbl);
+      peak   = _psiToDbl(peak);
+      unit   = 'dB(L)';
+    }
+
     const peakLabel = peak != null
       ? `peak ${_fmtPeak(peak, unit)}`
         + (!isHistogram && peakT != null ? ` @ ${peakT.toFixed(1)} ms` : '')
@@ -781,6 +832,9 @@ document.getElementById('serial-select').addEventListener('change', e => {
 });
 document.getElementById('event-filter').addEventListener('input', applyFilter);
 
+// Reflect any persisted mic-unit preference in the header pill on load
+_refreshMicUnitToggle();
+
 // Initial load
 loadSerials();
 </script>
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 2c4a912..ad2b6e9 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -818,6 +818,12 @@
     <span class="ft-dot"></span>
     <span>Force refresh</span>
   </label>
+  <div class="hdr-sep"></div>
+  <button id="mic-unit-toggle" class="section-btn"
+          onclick="_setMicUnit(_getMicUnit() === 'dBL' ? 'psi' : 'dBL')"
+          title="Toggle microphone display unit (dBL ↔ psi) for waveform plots.  Affects all mic charts; persists across page loads.">
+    Mic: dBL
+  </button>
 </header>
 
 <!-- ════════════════════════════════════════════════════════════════
@@ -2560,6 +2566,29 @@ const _SC_CHANNEL_COLORS = {
 const _SC_CHANNEL_ORDER = ['MicL', 'Long', 'Vert', 'Tran'];
 let _scCharts = {};
 
+// User preference for how mic is displayed in plots — dBL (default,
+// matches BW printout convention + the rest of SFM) or psi (the raw
+// sample unit).  Toggleable via the header pill; persists in localStorage.
+function _getMicUnit() {
+  return localStorage.getItem('sfm_mic_unit') === 'psi' ? 'psi' : 'dBL';
+}
+function _setMicUnit(u) {
+  localStorage.setItem('sfm_mic_unit', u === 'psi' ? 'psi' : 'dBL');
+  _refreshMicUnitToggleLabel();
+  // Re-render the open modal so the change is immediately visible.
+  if (_scCurrentEventId) openSidecarModal(_scCurrentEventId);
+}
+function _refreshMicUnitToggleLabel() {
+  const b = document.getElementById('mic-unit-toggle');
+  if (b) b.textContent = `Mic: ${_getMicUnit()}`;
+}
+// Convert a psi value to dB(L).  Returns null for non-positive values
+// (log of zero is undefined) — Chart.js handles null as a gap in the line.
+function _psiToDbl(psi) {
+  if (psi == null || !(psi > 0)) return null;
+  return 20 * Math.log10(psi / DBL_REF);
+}
+
 // Adaptive decimal formatter — scientific notation is reserved for truly
 // extreme values (10000+ or sub-0.0001).  Normal-range values (most peaks
 // fall here) render as decimals with sensible precision.  Replaces the
@@ -2610,17 +2639,30 @@ function _renderScWaveform(data) {
   );
   const lastCh = withData[withData.length - 1];
 
+  const micUnit = _getMicUnit();   // user preference: 'dBL' or 'psi'
+
   for (const ch of _SC_CHANNEL_ORDER) {
     const chData = channels[ch];
     if (!chData) continue;
-    const values = chData.values || [];
+    let values = chData.values || [];
+    let chUnit = chData.unit || '';
+    let chPeak = chData.peak;
+
+    // Mic channel: convert from raw psi to dB(L) when user prefers dBL
+    // (default).  Mic samples that are zero/negative become null (Chart.js
+    // renders them as gaps in line mode, zero-height bars in histogram mode).
+    if (ch === 'MicL' && chUnit === 'psi' && micUnit === 'dBL') {
+      values = values.map(_psiToDbl);
+      chPeak = _psiToDbl(chPeak);
+      chUnit = 'dB(L)';
+    }
 
     const wrap = document.createElement('div');
     wrap.style.cssText = 'background:var(--surface);border:1px solid var(--border2);border-radius:6px;padding:6px 30px 4px 10px';
     const lbl = document.createElement('div');
     lbl.style.cssText = `font-size:10px;font-weight:600;letter-spacing:0.05em;text-transform:uppercase;margin-bottom:2px;color:${_SC_CHANNEL_COLORS[ch]};display:flex;justify-content:space-between`;
-    const peakStr = chData.peak != null
-      ? `peak ${_fmtPeak(chData.peak, chData.unit)}`
+    const peakStr = chPeak != null
+      ? `peak ${_fmtPeak(chPeak, chUnit)}`
       : '';
     lbl.innerHTML = `<span>${ch}</span><span style="color:var(--text-dim);font-weight:normal">${peakStr}</span>`;
     wrap.appendChild(lbl);
@@ -2716,7 +2758,7 @@ function _renderScWaveform(data) {
               title: items => isHistogram
                 ? `interval ${items[0].label}`
                 : `t = ${items[0].label} ms`,
-              label: item => `${ch}: ${_fmtPeak(item.raw, chData.unit)}`,
+              label: item => `${ch}: ${_fmtPeak(item.raw, chUnit)}`,
             },
           },
         },
@@ -2730,7 +2772,7 @@ function _renderScWaveform(data) {
             ...yBounds,
             ticks: { color: '#484f58', maxTicksLimit: 4 },
             grid:  { color: '#21262d' },
-            title: { display: true, text: chData.unit || '', color: '#484f58', font: { size: 9 } },
+            title: { display: true, text: chUnit, color: '#484f58', font: { size: 9 } },
           },
         },
       },
@@ -3058,6 +3100,9 @@ document.addEventListener('keydown', e => {
 // hit localhost:8200, 10.0.0.44:8200, or anything else.
 document.getElementById('api-base').value = window.location.origin;
 
+// Reflect any persisted mic-unit preference in the header pill on load
+_refreshMicUnitToggleLabel();
+
 // We default to Database view → trigger initial history + units load
 // (switchSection handles this when clicked, but we never click on first paint).
 if (currentSection === 'db') {
-- 
2.52.0


From 411ef8139e939de34e920e60404ce3abd54e34c0 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 02:55:58 +0000
Subject: [PATCH 23/42] sfm: Event Report PDF generation (v0.20.0 stub layout)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New endpoint GET /db/events/{id}/report.pdf returns a single-page
letter-portrait PDF for any event with waveform data on disk.

Architecture:
  sfm/report_pdf.py — gather_report_data() assembles fields from
    SeismoDb row + .sfm.json sidecar (bw_report block) + .h5 samples;
    render_event_report_pdf() turns that into PDF bytes via matplotlib.
  sfm/server.py — new endpoint wires them together, streams PDF back
    with Content-Disposition: inline so the browser displays it.
  sfm_webapp.html — new "Download PDF" button in the event modal
    footer that opens the endpoint in a new tab.

Fields surfaced — same coverage as a Blastware Event Report:
  Header metadata (date/time, trigger source, range, sample rate,
                   project, client, operator, location, serial+firmware,
                   battery, calibration, file name)
  Microphone block (PSPL in dB(L) + psi, ZC freq, channel test)
  Per-channel stats (PPV, ZC Freq, Time of Peak, Peak Accel,
                     Peak Disp, Sensor Check) for Tran/Vert/Long
  Peak Vector Sum
  Waveform plot (MicL/Long/Vert/Tran stacked, shared time axis,
                 trigger marker, symmetric Y for geo, zero-anchored
                 mic) — OR per-interval bar chart for histograms.

Rendering pipeline = matplotlib only (vector PDF, no headless-browser
dep).  Adds matplotlib>=3.8 to deps.

Visual layout is approximate until reference PDFs from Instantel land
at docs/reference/instantel/ for iteration.  USBM RI8507 / OSMRE
compliance chart is stubbed (placeholder rectangle) — separate work
item.

Smoke-tested on a K558 waveform event: 77 KB valid PDF, all fields
populated correctly from the snapshot DB.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md        |   3 +
 pyproject.toml      |   1 +
 requirements.txt    |   1 +
 sfm/report_pdf.py   | 518 ++++++++++++++++++++++++++++++++++++++++++++
 sfm/server.py       |  28 ++-
 sfm/sfm_webapp.html |  16 ++
 6 files changed, 566 insertions(+), 1 deletion(-)
 create mode 100644 sfm/report_pdf.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 03bf500..ed75adf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,9 @@ All notable changes to seismo-relay are documented here.
 
 ### Added
 
+- **Event Report PDF generation** — `GET /db/events/{id}/report.pdf` returns a single-page letter-portrait PDF for any event with waveform data on disk.  Covers every field a Blastware Event Report includes: header metadata (date/time, trigger source, range, sample rate, project/client/operator/location, serial+firmware, battery, calibration, file name), microphone block (PSPL in dB(L) + psi, ZC freq, channel test), per-channel stats table (PPV / ZC Freq / Time of Peak / Peak Accel / Peak Disp / Sensor Check), Peak Vector Sum, and the 4-channel waveform plot stacked Instantel-style (MicL top → Tran bottom, shared time axis, trigger marker, symmetric Y on geo channels, zero-anchored on mic).  Histogram events render as per-interval bar charts instead of waveform plots.  USBM RI8507 / OSMRE compliance chart still stubbed — separate work item.  Backed by matplotlib (vector PDF output, no headless-browser dep); new `sfm/report_pdf.py` does data assembly + rendering.  **Visual layout is approximate** until reference PDFs land at `docs/reference/instantel/` to iterate against.
+- **"Download PDF" button** in the event modal's footer — triggers the new endpoint; opens in a new tab so the browser handles save-or-display + surfaces any 404 / server errors visibly.
+
 - **SFM webapp now opens to Database view by default** and the History table is fully interactive.  Click any column header to sort ascending / descending (timestamp, serial, per-channel PPV, PVS, mic dB(L), project, client, record type, key — all sortable).  Click any event row to open the event modal, which now renders a **4-channel waveform plot inline** (MicL / Long / Vert / Tran stacked, Instantel-printout order) alongside the existing sidecar review fields.  Headers are sticky so the columns stay visible while scrolling long event lists.  No more "where is the viewer" — pick a unit from the filter dropdown, scan the table, click the event, see the waveform.
 - **Stored-event browser** — new standalone HTML page at `GET /events` (`sfm/event_browser.html`).  Pick a serial from the unit dropdown, scroll through that unit's events (newest-first), click any event to render its decoded waveform via the existing `/db/events/{id}/waveform.json` endpoint.  Dark-themed Chart.js viewer, channels stacked vertically (MicL / Long / Vert / Tran — Instantel printout order, designed PDF-export-ready), trigger line at t=0, peak labels, search/filter, false-trigger flag honored.  Companion to the existing live-device viewer at `/waveform`; the two routes are now clearly delineated in their docstrings.  The webapp's inline plot at `/` is the primary path; `/events` remains a useful diagnostic when you want just a viewer.
 - **Histogram body codec — uint8 peak count fix.**  Per-channel peak fields at `block[6]/[10]/[14]/[18]` are `uint8`, not `uint16 LE` spanning `block[6:8]` etc.  The original interpretation was byte-exact on the N844 fixture corpus only because every annotation byte (`block[7]/[11]/[15]/[19]`) in those fixtures was zero.  On non-N844 events with non-zero annotation bytes (observed across BE9558 Tran-drift and BE18003 Histogram+Continuous units), the old interpretation produced peaks up to 268 in/s per channel and 35× inflated PVS sums when first deployed to prod (rolled back same day; properly fixed in this release).  Cross-correlated against BW's per-interval ASCII export on K558 / T003 / N599 / N844 corpora — 100% byte-exact on T/V/L, 99%+ on M (sub-precision rounding).  Annotation byte preserved on each record as `record["annotations"]` for future RE.  Verified against ~3,500 blocks across 5 in-repo fixtures + a synthetic K558 interval-12 regression block.
diff --git a/pyproject.toml b/pyproject.toml
index 7674acc..d5db51f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ dependencies = [
     "python-multipart>=0.0.7",
     "h5py>=3.10",
     "numpy>=1.24",
+    "matplotlib>=3.8",
 ]
 
 [tool.setuptools.packages.find]
diff --git a/requirements.txt b/requirements.txt
index 8b01960..c77bbf7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ pyserial
 python-multipart
 h5py
 numpy
+matplotlib
diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
new file mode 100644
index 0000000..2bbcf22
--- /dev/null
+++ b/sfm/report_pdf.py
@@ -0,0 +1,518 @@
+"""
+sfm/report_pdf.py — generate Instantel-style Event Report PDFs.
+
+Stub layout for v0.20.0 — the exact visual is iterated against actual
+Blastware reference PDFs (uploaded to docs/reference/instantel/).
+Current output captures all the data fields a real BW Event Report
+contains, but the visual hierarchy / spacing is still approximate.
+
+Architecture
+────────────
+1. ``gather_report_data(event_id)`` — assembles a flat dict from three
+   sources: the SeismoDb events row, the .sfm.json sidecar (bw_report
+   block), and the .h5 waveform samples.  Returns ``None`` when the
+   event doesn't exist or has no waveform data on disk.
+
+2. ``render_event_report_pdf(data)`` — takes that dict and produces a
+   single-page letter-sized PDF as bytes, using matplotlib's PDF
+   backend (vector output, no rasterization, prints cleanly).
+
+3. The HTTP endpoint at ``/db/events/{id}/report.pdf`` wires them
+   together: fetch event → gather → render → stream bytes back with
+   ``Content-Type: application/pdf``.
+
+What's in the report (every field BW's printout includes):
+
+  Header (left):  Date/Time, Trigger Source, Range, Sample Rate, Notes,
+                  Project, Client, User Name, Seis. Loc
+  Header (right): Serial + firmware, Battery, Calibration, File Name,
+                  Post Event Notes
+  Mic block:      PSPL (dBL + psi), ZC Freq, Channel Test result
+  Stats table:    per-channel PPV / ZC Freq / Time of Peak /
+                  Peak Acceleration / Peak Displacement / Sensor Check
+  Peak Vector Sum
+  Waveform plot:  4 channels stacked (MicL/Long/Vert/Tran), shared
+                  time axis, trigger marker, peak markers
+  USBM RI8507/OSMRE compliance chart:  STUBBED — separate work item
+
+Histogram events: the layout differs (Number of Intervals header
+field, no trigger marker, per-interval bar chart instead of waveform).
+Handled via a record_type branch in ``render_event_report_pdf``.
+"""
+
+from __future__ import annotations
+
+import io
+import json
+import logging
+import math
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import matplotlib
+matplotlib.use("Agg")   # headless — no display required
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.backends.backend_pdf import PdfPages
+
+log = logging.getLogger(__name__)
+
+
+# Reference pressure for dB(L) conversion: 20 µPa expressed in psi.
+DBL_REF_PSI = 2.9e-9
+
+
+# ── Data assembly ────────────────────────────────────────────────────────────
+
+
+@dataclass
+class ReportData:
+    """All fields needed to render an Instantel-style Event Report.
+
+    Most fields are Optional — BW's printout shows '—' or just omits
+    sections when source data is missing.  The renderer mirrors that.
+    """
+    # Header — left column
+    event_datetime_str: Optional[str] = None
+    trigger_source:     Optional[str] = None
+    geo_range_str:      Optional[str] = None
+    sample_rate_str:    Optional[str] = None
+    notes:              Optional[str] = None
+    project:            Optional[str] = None
+    client:             Optional[str] = None
+    operator:           Optional[str] = None
+    sensor_location:    Optional[str] = None
+
+    # Header — right column
+    serial:                 Optional[str] = None
+    firmware:               Optional[str] = None
+    battery_volts:          Optional[float] = None
+    calibration_date:       Optional[str] = None
+    calibration_by:         Optional[str] = None
+    file_name:              Optional[str] = None
+    post_event_notes:       Optional[str] = None
+
+    # Microphone block
+    mic_pspl_dbl:           Optional[float] = None
+    mic_pspl_psi:           Optional[float] = None
+    mic_pspl_time_s:        Optional[float] = None
+    mic_zc_freq_hz:         Optional[float] = None
+    mic_channel_test_result: Optional[str] = None
+    mic_channel_test_freq_hz: Optional[float] = None
+    mic_channel_test_amp_mv: Optional[float] = None
+
+    # Per-channel stats — list of dicts (one per channel)
+    # Keys: name, ppv_ips, zc_freq_hz, time_of_peak_s,
+    #       peak_accel_g, peak_disp_in, sensor_check
+    channel_stats:          list[dict] = field(default_factory=list)
+
+    # Peak Vector Sum
+    peak_vector_sum_ips:    Optional[float] = None
+    peak_vector_sum_time_s: Optional[float] = None
+
+    # Waveform samples — channels[ch] = list of floats in physical units
+    # Time axis derived from sample_rate + pretrig_samples
+    channels:               dict = field(default_factory=dict)
+    sample_rate_sps:        Optional[int] = None
+    pretrig_samples:        Optional[int] = None
+    t0_ms:                  Optional[float] = None
+    dt_ms:                  Optional[float] = None
+
+    # Record-type discriminator
+    record_type:            Optional[str] = None
+    is_histogram:           bool = False
+
+    # Bookkeeping
+    event_id:               Optional[str] = None
+    server_received_at:     Optional[str] = None
+    bw_pc_sw_version:       Optional[str] = None
+
+
+def gather_report_data(
+    db,
+    store,
+    event_id: str,
+) -> Optional[ReportData]:
+    """Collect every field needed to render an event report.
+
+    Returns ``None`` if the event is unknown or has no waveform data
+    on disk (no .h5, no .a5.pkl — same condition the waveform.json
+    endpoint 404s on).
+    """
+    row = db.get_event(event_id)
+    if row is None:
+        return None
+    serial   = row.get("serial")
+    filename = row.get("blastware_filename")
+    if not serial or not filename:
+        return None
+
+    rd = ReportData(
+        event_id=event_id,
+        serial=serial,
+        file_name=filename,
+        record_type=row.get("record_type"),
+        is_histogram=str(row.get("record_type", "")).lower().startswith("hist"),
+        event_datetime_str=row.get("timestamp"),
+        sample_rate_sps=row.get("sample_rate"),
+        project=row.get("project"),
+        client=row.get("client"),
+        operator=row.get("operator"),
+        sensor_location=row.get("sensor_location"),
+        server_received_at=row.get("created_at"),
+    )
+
+    # ── Sidecar bw_report — the rich BW-derived fields ──
+    sidecar_path = store.sidecar_path_for(serial, filename)
+    if sidecar_path.exists():
+        try:
+            sc = json.loads(sidecar_path.read_text())
+        except Exception as exc:
+            log.warning("gather_report_data: sidecar read failed: %s", exc)
+            sc = {}
+        bw = sc.get("bw_report") or {}
+
+        # Trigger / range / sample-rate display
+        trig = bw.get("trigger") or {}
+        rd.trigger_source = (
+            f"{trig.get('channel','')}: {trig.get('geo_level_ips')} in/s"
+            if trig.get("channel") or trig.get("geo_level_ips") is not None
+            else None
+        )
+        rec = bw.get("recording") or {}
+        rd.geo_range_str = (
+            f"Geo: {rec.get('geo_range_ips')} in/s"
+            if rec.get("geo_range_ips") is not None else None
+        )
+        rt = rec.get("record_time_s")
+        if rt is not None and rd.sample_rate_sps:
+            rd.sample_rate_str = f"{rt:.1f} sec At {rd.sample_rate_sps} Sps"
+
+        # Device block
+        dev = bw.get("device") or {}
+        rd.battery_volts    = dev.get("battery_volts")
+        rd.calibration_date = dev.get("calibration_date")
+        rd.calibration_by   = dev.get("calibration_by")
+        rd.firmware         = bw.get("version")
+        rd.bw_pc_sw_version = bw.get("pc_sw_version")
+
+        # Microphone block
+        mic = bw.get("mic") or {}
+        rd.mic_pspl_dbl    = mic.get("pspl_dbl")
+        if rd.mic_pspl_dbl is not None and rd.mic_pspl_dbl > 0:
+            # Inverse of the dBL formula → psi.  Mirrors waveform_codec convention.
+            rd.mic_pspl_psi = DBL_REF_PSI * (10 ** (rd.mic_pspl_dbl / 20))
+        rd.mic_pspl_time_s = mic.get("time_of_peak_s")
+        rd.mic_zc_freq_hz  = mic.get("zc_freq_hz")
+        sc_mic = (bw.get("sensor_check") or {}).get("mic") or {}
+        rd.mic_channel_test_result   = sc_mic.get("result")
+        rd.mic_channel_test_freq_hz  = sc_mic.get("freq_hz")
+        rd.mic_channel_test_amp_mv   = sc_mic.get("amplitude_mv")
+
+        # Per-channel stats (Tran / Vert / Long)
+        peaks = bw.get("peaks") or {}
+        sc_block = bw.get("sensor_check") or {}
+        for ch_lc, ch_label in (("tran", "Tran"), ("vert", "Vert"), ("long", "Long")):
+            ch = peaks.get(ch_lc) or {}
+            sc_ch = sc_block.get(ch_lc) or {}
+            rd.channel_stats.append({
+                "name":          ch_label,
+                "ppv_ips":       ch.get("ppv_ips"),
+                "zc_freq_hz":    ch.get("zc_freq_hz"),
+                "time_of_peak_s": ch.get("time_of_peak_s"),
+                "peak_accel_g":  ch.get("peak_accel_g"),
+                "peak_disp_in":  ch.get("peak_disp_in"),
+                "sensor_check":  sc_ch.get("result"),
+            })
+
+        # Peak Vector Sum
+        vs = peaks.get("vector_sum") or {}
+        rd.peak_vector_sum_ips    = vs.get("ips")
+        rd.peak_vector_sum_time_s = vs.get("time_s")
+
+    # ── Waveform samples — from the .h5 via the existing helper ──
+    from sfm import event_hdf5
+    h5_path = store.hdf5_path_for(serial, filename)
+    if h5_path.exists():
+        try:
+            wf = event_hdf5.plot_json_from_hdf5(h5_path, event_id=event_id)
+            rd.channels = {
+                ch: (chd.get("values") or [])
+                for ch, chd in (wf.get("channels") or {}).items()
+            }
+            ta = wf.get("time_axis") or {}
+            rd.sample_rate_sps  = rd.sample_rate_sps or ta.get("sample_rate")
+            rd.pretrig_samples  = ta.get("pretrig_samples")
+            rd.t0_ms            = ta.get("t0_ms")
+            rd.dt_ms            = ta.get("dt_ms")
+        except Exception as exc:
+            log.warning("gather_report_data: hdf5 read failed: %s", exc)
+
+    return rd
+
+
+# ── PDF rendering ────────────────────────────────────────────────────────────
+
+
+def render_event_report_pdf(rd: ReportData) -> bytes:
+    """Render an event report dict to a single-page letter PDF.
+
+    Returns the raw PDF bytes — caller streams them back via FastAPI.
+
+    NOTE: this is a v0.20.0 stub layout.  The visual hierarchy will be
+    refined once reference PDFs land at docs/reference/instantel/.  All
+    fields the printout includes are surfaced; spacing and typography
+    are approximate.
+    """
+    # Letter portrait — 8.5"×11"
+    fig = plt.figure(figsize=(8.5, 11), dpi=100)
+    fig.patch.set_facecolor("white")
+
+    # Grid: header rows on top, stats in the middle, waveform plot at bottom
+    # height_ratios sum doesn't matter, only the relative proportions
+    gs = fig.add_gridspec(
+        nrows=4, ncols=1,
+        left=0.07, right=0.96, top=0.96, bottom=0.04,
+        height_ratios=[2.2, 1.0, 1.4, 5.0],
+        hspace=0.35,
+    )
+
+    # ── Header area (top) ──
+    ax_header = fig.add_subplot(gs[0])
+    ax_header.axis("off")
+    _draw_header(ax_header, rd)
+
+    # ── Mic block (left) + USBM chart placeholder (right) ──
+    ax_mic = fig.add_subplot(gs[1])
+    ax_mic.axis("off")
+    _draw_mic_block(ax_mic, rd)
+
+    # ── Per-channel stats table + Peak Vector Sum ──
+    ax_stats = fig.add_subplot(gs[2])
+    ax_stats.axis("off")
+    _draw_channel_stats(ax_stats, rd)
+
+    # ── Waveform / histogram plot ──
+    if rd.is_histogram:
+        _draw_histogram_subplot(fig, gs[3], rd)
+    else:
+        _draw_waveform_subplot(fig, gs[3], rd)
+
+    # Footer
+    fig.text(
+        0.07, 0.015,
+        f"Generated by seismo-relay  •  event_id={rd.event_id or '—'}",
+        fontsize=7, color="#888", ha="left",
+    )
+
+    buf = io.BytesIO()
+    fig.savefig(buf, format="pdf")
+    plt.close(fig)
+    return buf.getvalue()
+
+
+def _kv(ax, x, y, label, value, *, label_w=0.18):
+    """Render a 'Label  Value' row at axes-coordinates (x, y)."""
+    ax.text(x, y, label, fontsize=8, color="#555", ha="left", va="top",
+            transform=ax.transAxes)
+    ax.text(x + label_w, y, _fmt(value), fontsize=8, ha="left", va="top",
+            transform=ax.transAxes, family="monospace")
+
+
+def _fmt(v):
+    """Format any field for display — '—' for None, str otherwise."""
+    if v is None:
+        return "—"
+    if isinstance(v, float):
+        return f"{v:.4f}".rstrip("0").rstrip(".")
+    return str(v)
+
+
+def _draw_header(ax, rd: ReportData) -> None:
+    """Two-column metadata header — matches BW printout layout."""
+    # Left column
+    rows_left = [
+        ("Date/Time",      rd.event_datetime_str),
+        ("Trigger Source", rd.trigger_source),
+        ("Range",          rd.geo_range_str),
+        ("Sample Rate",    rd.sample_rate_str),
+        ("Notes",          rd.notes),
+        ("Project:",       rd.project),
+        ("Client:",        rd.client),
+        ("User Name:",     rd.operator),
+        ("Seis. Loc:",     rd.sensor_location),
+    ]
+    rows_right = [
+        ("Serial Number",   f"{rd.serial or '—'}"
+                            + (f"  {rd.firmware}" if rd.firmware else "")),
+        ("Battery Level",  f"{rd.battery_volts:.1f} Volts" if rd.battery_volts is not None else None),
+        ("Unit Calibration", (f"{rd.calibration_date}"
+                              + (f" by {rd.calibration_by}" if rd.calibration_by else ""))
+                              if rd.calibration_date else None),
+        ("File Name",      rd.file_name),
+        ("Post Event Notes", rd.post_event_notes),
+    ]
+    y = 0.95
+    dy = 0.10
+    for label, value in rows_left:
+        _kv(ax, 0.0, y, label, value, label_w=0.18)
+        y -= dy
+    y = 0.95
+    for label, value in rows_right:
+        _kv(ax, 0.55, y, label, value, label_w=0.20)
+        y -= dy
+
+
+def _draw_mic_block(ax, rd: ReportData) -> None:
+    """Microphone block — PSPL, ZC Freq, Channel Test.  USBM chart
+    placeholder on the right (filled in a separate work item)."""
+    ax.text(0.0, 0.95, "Microphone   Linear Weighting", fontsize=8, color="#555",
+            transform=ax.transAxes, va="top")
+    rows = []
+    if rd.mic_pspl_dbl is not None:
+        line = f"{rd.mic_pspl_dbl:.1f} dB(L)"
+        if rd.mic_pspl_time_s is not None:
+            line += f" at {rd.mic_pspl_time_s:.3f} sec."
+        rows.append(("PSPL", line))
+    if rd.mic_zc_freq_hz is not None:
+        rows.append(("ZC Freq", f"{rd.mic_zc_freq_hz:.0f} Hz"))
+    if rd.mic_channel_test_result:
+        line = rd.mic_channel_test_result
+        if rd.mic_channel_test_freq_hz is not None and rd.mic_channel_test_amp_mv is not None:
+            line += (f" (Freq = {rd.mic_channel_test_freq_hz:.1f} Hz, "
+                     f"Amp = {rd.mic_channel_test_amp_mv:.0f} mv)")
+        rows.append(("Channel Test", line))
+
+    y = 0.70
+    for label, value in rows:
+        _kv(ax, 0.0, y, label, value, label_w=0.18)
+        y -= 0.22
+
+    # USBM chart placeholder — upper-right of this row
+    ax.text(0.75, 0.95, "USBM RI8507 / OSMRE",
+            fontsize=8, color="#555", ha="center", va="top",
+            transform=ax.transAxes)
+    ax.text(0.75, 0.45, "[compliance chart\nrenders here]",
+            fontsize=8, color="#bbb", ha="center", va="center",
+            transform=ax.transAxes, style="italic")
+
+
+def _draw_channel_stats(ax, rd: ReportData) -> None:
+    """Per-channel stats table + Peak Vector Sum row."""
+    # Build a 2-D array of strings: header row + 3 channel rows
+    headers = ["", "Tran", "Vert", "Long", ""]
+    rows = [
+        ["PPV",                "ppv_ips",        "in/s"],
+        ["ZC Freq",            "zc_freq_hz",     "Hz"],
+        ["Time (Rel. to Trig)", "time_of_peak_s", "sec"],
+        ["Peak Acceleration",   "peak_accel_g",   "g"],
+        ["Peak Displacement",   "peak_disp_in",   "in"],
+        ["Sensor Check",        "sensor_check",   ""],
+    ]
+    ch_lookup = {c["name"]: c for c in rd.channel_stats}
+
+    def _cell(field, ch_name):
+        val = ch_lookup.get(ch_name, {}).get(field)
+        if val is None:
+            return "—"
+        if field == "sensor_check":
+            return str(val)
+        if isinstance(val, float):
+            return f"{val:.3f}"
+        return str(val)
+
+    table_data = [headers]
+    for label, field_name, unit in rows:
+        table_data.append([
+            label,
+            _cell(field_name, "Tran"),
+            _cell(field_name, "Vert"),
+            _cell(field_name, "Long"),
+            unit,
+        ])
+
+    tbl = ax.table(
+        cellText=table_data, loc="upper left",
+        colWidths=[0.30, 0.13, 0.13, 0.13, 0.10],
+        cellLoc="left", edges="open",
+    )
+    tbl.auto_set_font_size(False)
+    tbl.set_fontsize(8)
+    tbl.scale(1, 1.4)
+    # Header row styling
+    for j in range(5):
+        cell = tbl[(0, j)]
+        cell.set_text_props(weight="bold", color="#555")
+
+    # Peak Vector Sum
+    if rd.peak_vector_sum_ips is not None:
+        line = f"Peak Vector Sum   {rd.peak_vector_sum_ips:.3f} in/s"
+        if rd.peak_vector_sum_time_s is not None:
+            line += f" At {rd.peak_vector_sum_time_s:.3f} sec."
+        ax.text(0.0, -0.05, line, fontsize=9, weight="bold",
+                ha="left", va="top", transform=ax.transAxes)
+
+
+def _channel_axis_color(ch: str) -> str:
+    return {"MicL": "#cc00cc", "Long": "#0066ff", "Vert": "#009933", "Tran": "#cc0000"}.get(ch, "#444")
+
+
+def _draw_waveform_subplot(fig, gridspec_cell, rd: ReportData) -> None:
+    """4-channel stacked waveform plot — Instantel printout order
+    (MicL on top, Tran on bottom), shared x-axis."""
+    inner = gridspec_cell.subgridspec(4, 1, hspace=0.0)
+    order = ["MicL", "Long", "Vert", "Tran"]
+    sr = rd.sample_rate_sps or 1024
+    dt_ms = rd.dt_ms or (1000.0 / sr)
+    t0_ms = rd.t0_ms if rd.t0_ms is not None else 0.0
+
+    last_idx = len(order) - 1
+    for i, ch in enumerate(order):
+        ax = fig.add_subplot(inner[i])
+        values = rd.channels.get(ch) or []
+        times = [t0_ms + j * dt_ms for j in range(len(values))]
+
+        if values:
+            color = _channel_axis_color(ch)
+            ax.plot(times, values, color=color, linewidth=0.6)
+            # Symmetric y-axis for geo; zero-anchored for mic
+            if ch != "MicL":
+                amax = max((abs(v) for v in values), default=0.001)
+                ax.set_ylim(-amax * 1.1, amax * 1.1)
+        # Channel label on left
+        ax.set_ylabel(ch, fontsize=8, rotation=0, ha="right", va="center",
+                      color=_channel_axis_color(ch), weight="bold", labelpad=14)
+        ax.grid(True, linestyle=":", linewidth=0.4, alpha=0.5)
+        # Dashed trigger line at t=0
+        ax.axvline(0.0, color="#cc0000", linestyle="--", linewidth=0.8, alpha=0.7)
+        # Zero baseline
+        ax.axhline(0.0, color="#888", linestyle="-", linewidth=0.4, alpha=0.5)
+
+        if i != last_idx:
+            ax.set_xticklabels([])
+        else:
+            ax.set_xlabel("Time (ms)", fontsize=8)
+        ax.tick_params(axis="both", labelsize=7)
+
+
+def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
+    """4-channel stacked histogram bar chart — per-interval peaks."""
+    inner = gridspec_cell.subgridspec(4, 1, hspace=0.0)
+    order = ["MicL", "Long", "Vert", "Tran"]
+    last_idx = len(order) - 1
+    for i, ch in enumerate(order):
+        ax = fig.add_subplot(inner[i])
+        values = rd.channels.get(ch) or []
+        if values:
+            xs = np.arange(1, len(values) + 1)
+            color = _channel_axis_color(ch)
+            ax.bar(xs, values, color=color, width=1.0, linewidth=0)
+        ax.set_ylabel(ch, fontsize=8, rotation=0, ha="right", va="center",
+                      color=_channel_axis_color(ch), weight="bold", labelpad=14)
+        ax.grid(True, axis="y", linestyle=":", linewidth=0.4, alpha=0.5)
+        if i != last_idx:
+            ax.set_xticklabels([])
+        else:
+            ax.set_xlabel("Interval", fontsize=8)
+        ax.tick_params(axis="both", labelsize=7)
diff --git a/sfm/server.py b/sfm/server.py
index dfc3b45..2474681 100644
--- a/sfm/server.py
+++ b/sfm/server.py
@@ -46,7 +46,7 @@ from typing import Optional
 
 # FastAPI / Pydantic
 try:
-    from fastapi import Body, FastAPI, File, HTTPException, Query, UploadFile
+    from fastapi import Body, FastAPI, File, HTTPException, Query, Response, UploadFile
     from fastapi.middleware.cors import CORSMiddleware
     from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
     from pydantic import BaseModel
@@ -2178,6 +2178,32 @@ def db_event_blastware_file(event_id: str) -> FileResponse:
     )
 
 
+@app.get("/db/events/{event_id}/report.pdf")
+def db_event_report_pdf(event_id: str):
+    """Render an Instantel-style Event Report as a PDF.
+
+    Single-page letter portrait, matches the BW Event Report's data
+    coverage and layout (header / mic block / per-channel stats /
+    waveform plot).  V0.20.0 stub — exact visual being iterated
+    against reference PDFs in ``docs/reference/instantel/``.
+
+    Returns 404 if the event is unknown or has no waveform data on
+    disk (same condition as /waveform.json).
+    """
+    from sfm import report_pdf
+    rd = report_pdf.gather_report_data(_get_db(), _get_store(), event_id)
+    if rd is None:
+        raise HTTPException(status_code=404, detail=f"Event {event_id} not found or has no waveform")
+    pdf_bytes = report_pdf.render_event_report_pdf(rd)
+    # Suggested download filename based on the BW file basename.
+    fname = (rd.file_name or event_id).replace(".", "_")
+    return Response(
+        content=pdf_bytes,
+        media_type="application/pdf",
+        headers={"Content-Disposition": f'inline; filename="{fname}_report.pdf"'},
+    )
+
+
 @app.get("/db/events/{event_id}/waveform.json")
 def db_event_waveform_json(event_id: str) -> dict:
     """
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index ad2b6e9..188f4d5 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2892,6 +2892,18 @@ function closeSidecarModal() {
   _destroyScCharts();
 }
 
+// Trigger a PDF download for the currently-open event.  The browser
+// handles the actual save dialog from the Content-Disposition header
+// the server sends.
+function downloadEventReport() {
+  if (!_scCurrentEventId) return;
+  const url = `${api()}/db/events/${_scCurrentEventId}/report.pdf`;
+  // Open in a new tab — browser prompts to save or displays inline,
+  // and a failed fetch (e.g. 404 for events with no waveform) shows
+  // its JSON error in-page rather than silently failing.
+  window.open(url, '_blank');
+}
+
 function onSidecarOverlayClick(e) {
   // Click on the dimmed backdrop (but NOT on the modal itself) closes.
   if (e.target.id === 'sc-overlay') closeSidecarModal();
@@ -3193,6 +3205,10 @@ if (currentSection === 'db') {
     </div>
     <div class="sc-footer">
       <span class="sc-status" id="sc-status"></span>
+      <button class="btn btn-ghost" id="sc-pdf-btn" onclick="downloadEventReport()"
+              title="Download an Instantel-style Event Report PDF for this event">
+        Download PDF
+      </button>
       <button class="btn btn-ghost" onclick="closeSidecarModal()">Cancel</button>
       <button class="btn" id="sc-save-btn" onclick="saveSidecarReview()">Save</button>
     </div>
-- 
2.52.0


From dfbc8b8520c3b5df81c3266909d4a9d6c2ad236a Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 18:22:03 +0000
Subject: [PATCH 24/42] report_pdf: split waveform vs histogram layouts (BW PDF
 iteration)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed against real Blastware Event Report PDFs (uploaded to
example-events/pdfsnstuff/) for K558LLB7.V20H (histogram) and
K558LLB8.0E0W (waveform).  Each event type has its own layout because
BW's printouts genuinely differ:

  Waveform header:   Date/Time, Trigger Source, Range, Sample Rate
  Histogram header:  Start, Finish, Intervals At Size, Range, Sample Rate
                     (no trigger field — histograms aren't triggered)

  Waveform stats:    PPV, ZC Freq, Time (Rel. to Trig),
                     Peak Acceleration, Peak Displacement, Sensor Check
  Histogram stats:   PPV, ZC Freq, Date, Time (of peak), Sensor Check

  Waveform plot:     4-channel stacked line, x-axis in SECONDS,
                     trigger triangle + window markers, symmetric Y
                     for geo, zero-anchored mic, "0.0" baseline label
                     on right edge per BW convention
  Histogram plot:    4-channel stacked bars, Y-axis 0-to-peak only
                     (never negative — peaks are magnitudes), 0.0
                     baseline at the bottom

  Waveform footer:   USBM chart placeholder upper-right;
                     "Time X sec/div   Amplitude Geo: Y in/s/div   Mic: 0.001 psi(L)/div"
                     "Trigger = ▶━━◀"
  Histogram footer:  No USBM chart; same scale-info footer with
                     interval-size as the time unit

Other fixes from the first-pass screenshot review:
  - Channel labels (MicL/Long/Vert/Tran) no longer cut off (wider
    left margin)
  - Histogram bars rise from zero baseline (abs of any signed values)
  - ISO timestamp "2026-05-16T22:33:50" → "22:33:50 May 16, 2026"
    matching BW's display format

Known gaps (separate work):
  - Histogram codec returns per-block granularity (~200 bars for
    BW's 4-interval display).  XML-driven data source is the planned
    fix; the structured BW XML has the per-interval aggregates.
  - USBM RI8507 / OSMRE compliance chart still placeholder

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md      |   7 +-
 sfm/report_pdf.py | 446 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 341 insertions(+), 112 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed75adf..6c79802 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,12 @@ All notable changes to seismo-relay are documented here.
 
 ### Added
 
-- **Event Report PDF generation** — `GET /db/events/{id}/report.pdf` returns a single-page letter-portrait PDF for any event with waveform data on disk.  Covers every field a Blastware Event Report includes: header metadata (date/time, trigger source, range, sample rate, project/client/operator/location, serial+firmware, battery, calibration, file name), microphone block (PSPL in dB(L) + psi, ZC freq, channel test), per-channel stats table (PPV / ZC Freq / Time of Peak / Peak Accel / Peak Disp / Sensor Check), Peak Vector Sum, and the 4-channel waveform plot stacked Instantel-style (MicL top → Tran bottom, shared time axis, trigger marker, symmetric Y on geo channels, zero-anchored on mic).  Histogram events render as per-interval bar charts instead of waveform plots.  USBM RI8507 / OSMRE compliance chart still stubbed — separate work item.  Backed by matplotlib (vector PDF output, no headless-browser dep); new `sfm/report_pdf.py` does data assembly + rendering.  **Visual layout is approximate** until reference PDFs land at `docs/reference/instantel/` to iterate against.
+- **Event Report PDF generation** — `GET /db/events/{id}/report.pdf` returns a single-page letter-portrait PDF for any event with waveform data on disk.  Covers every field a Blastware Event Report includes: header metadata (date/time, trigger source, range, sample rate, project/client/operator/location, serial+firmware, battery, calibration, file name), microphone block (PSPL in dB(L) + psi, ZC freq, channel test), per-channel stats table (rows differ for waveform vs histogram), Peak Vector Sum, and the 4-channel plot.  Iterated against real Blastware reference PDFs (uploaded to `example-events/pdfsnstuff/`):
+   - **Waveform layout**: header shows Date/Time, Trigger Source, Range, Sample Rate; stats table has PPV / ZC Freq / Time (Rel. to Trig) / Peak Accel / Peak Disp / Sensor Check; bottom plot is 4-channel line waveform (MicL top → Tran bottom), shared time axis in seconds, dashed trigger line + triangle marker at t=0, symmetric Y on geo channels, zero-anchored on mic, "0.0" baseline label on right per BW convention; footer shows `Time X sec/div   Amplitude Geo: Y in/s/div   Mic: 0.001 psi(L)/div` and the trigger window `▶━━◀` marker.  USBM RI8507/OSMRE compliance chart placeholder upper-right.
+   - **Histogram layout**: header shows Start / Finish / Intervals At Size / Range / Sample Rate (no Trigger Source — histograms aren't triggered); NO USBM chart; stats table has PPV / ZC Freq / Date / Time / Sensor Check; bottom plot is per-interval bar chart, Y-axis 0-to-peak (never negative), 0.0 baseline at the bottom; footer shows `Time INTERVAL_SIZE /div   Amplitude Geo: Y in/s/div   Mic: 0.001 psi(L)/div`.
+   - Backed by matplotlib (vector PDF, no headless-browser dep).  Adds matplotlib>=3.8 to deps.
+   - **Known gap**: histogram codec returns per-block granularity (~200 bars for a 4-interval event) instead of BW's per-interval aggregation.  Visual difference vs BW's 4-bar display.  XML-driven data source (parsing the structured `_XML.XML` files BW also exports) is the planned fix; that route also resolves the bw_ascii_report PPV-miss bug.
+   - **Stubbed**: USBM RI8507 / OSMRE compliance chart curves (separate work item; requires coding the regulatory piecewise functions).
 - **"Download PDF" button** in the event modal's footer — triggers the new endpoint; opens in a new tab so the browser handles save-or-display + surfaces any 404 / server errors visibly.
 
 - **SFM webapp now opens to Database view by default** and the History table is fully interactive.  Click any column header to sort ascending / descending (timestamp, serial, per-channel PPV, PVS, mic dB(L), project, client, record type, key — all sortable).  Click any event row to open the event modal, which now renders a **4-channel waveform plot inline** (MicL / Long / Vert / Tran stacked, Instantel-printout order) alongside the existing sidecar review fields.  Headers are sticky so the columns stay visible while scrolling long event lists.  No more "where is the viewer" — pick a unit from the filter dropdown, scan the table, click the event, see the waveform.
diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index 2bbcf22..9a256a0 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -123,6 +123,16 @@ class ReportData:
     record_type:            Optional[str] = None
     is_histogram:           bool = False
 
+    # Histogram-only fields — only populated for record_type starts with 'Hist'
+    histogram_start_str:    Optional[str] = None       # "22:30:38 May 16, 2026"
+    histogram_stop_str:     Optional[str] = None
+    histogram_n_intervals:  Optional[float] = None     # 4.00
+    histogram_interval_size: Optional[str] = None      # "1 minute"
+    histogram_interval_times: list[str] = field(default_factory=list)  # per-interval timestamps for x-axis
+
+    # Peak Vector Sum metadata (histograms show absolute date+time)
+    peak_vector_sum_when_str: Optional[str] = None
+
     # Bookkeeping
     event_id:               Optional[str] = None
     server_received_at:     Optional[str] = None
@@ -231,6 +241,19 @@ def gather_report_data(
         rd.peak_vector_sum_ips    = vs.get("ips")
         rd.peak_vector_sum_time_s = vs.get("time_s")
 
+        # Histogram-specific header fields.  These come from the BW XML
+        # at ingest time (when present); the parsed bw_report dict
+        # carries them under the 'histogram' sub-block (added by the
+        # BW XML parser once that lands).  For now, derive from the
+        # event timestamp + recording config as a best-effort.
+        if rd.is_histogram:
+            hist = bw.get("histogram") or {}
+            rd.histogram_start_str = hist.get("start_str") or rd.event_datetime_str
+            rd.histogram_stop_str  = hist.get("stop_str")
+            rd.histogram_n_intervals = hist.get("n_intervals")
+            rd.histogram_interval_size = hist.get("interval_size")
+            rd.histogram_interval_times = hist.get("interval_times") or []
+
     # ── Waveform samples — from the .h5 via the existing helper ──
     from sfm import event_hdf5
     h5_path = store.hdf5_path_for(serial, filename)
@@ -258,53 +281,31 @@ def gather_report_data(
 def render_event_report_pdf(rd: ReportData) -> bytes:
     """Render an event report dict to a single-page letter PDF.
 
-    Returns the raw PDF bytes — caller streams them back via FastAPI.
-
-    NOTE: this is a v0.20.0 stub layout.  The visual hierarchy will be
-    refined once reference PDFs land at docs/reference/instantel/.  All
-    fields the printout includes are surfaced; spacing and typography
-    are approximate.
+    Branches on ``rd.is_histogram`` — waveform and histogram layouts
+    differ in their header fields, stats-table rows, and bottom plot.
+    Layout modeled on Blastware's Event Report PDFs (samples in
+    docs/reference/instantel/).
     """
     # Letter portrait — 8.5"×11"
     fig = plt.figure(figsize=(8.5, 11), dpi=100)
     fig.patch.set_facecolor("white")
 
-    # Grid: header rows on top, stats in the middle, waveform plot at bottom
-    # height_ratios sum doesn't matter, only the relative proportions
-    gs = fig.add_gridspec(
-        nrows=4, ncols=1,
-        left=0.07, right=0.96, top=0.96, bottom=0.04,
-        height_ratios=[2.2, 1.0, 1.4, 5.0],
-        hspace=0.35,
-    )
-
-    # ── Header area (top) ──
-    ax_header = fig.add_subplot(gs[0])
-    ax_header.axis("off")
-    _draw_header(ax_header, rd)
-
-    # ── Mic block (left) + USBM chart placeholder (right) ──
-    ax_mic = fig.add_subplot(gs[1])
-    ax_mic.axis("off")
-    _draw_mic_block(ax_mic, rd)
-
-    # ── Per-channel stats table + Peak Vector Sum ──
-    ax_stats = fig.add_subplot(gs[2])
-    ax_stats.axis("off")
-    _draw_channel_stats(ax_stats, rd)
-
-    # ── Waveform / histogram plot ──
     if rd.is_histogram:
-        _draw_histogram_subplot(fig, gs[3], rd)
+        _render_histogram_layout(fig, rd)
     else:
-        _draw_waveform_subplot(fig, gs[3], rd)
+        _render_waveform_layout(fig, rd)
 
-    # Footer
+    # Footer (common to both layouts) — Created date + Xmark-like attribution.
     fig.text(
         0.07, 0.015,
-        f"Generated by seismo-relay  •  event_id={rd.event_id or '—'}",
+        f"Created: {rd.server_received_at or '—'}  •  seismo-relay",
         fontsize=7, color="#888", ha="left",
     )
+    fig.text(
+        0.93, 0.015,
+        f"Event {rd.event_id[:8] if rd.event_id else '—'}",
+        fontsize=7, color="#888", ha="right",
+    )
 
     buf = io.BytesIO()
     fig.savefig(buf, format="pdf")
@@ -312,6 +313,69 @@ def render_event_report_pdf(rd: ReportData) -> bytes:
     return buf.getvalue()
 
 
+def _render_waveform_layout(fig, rd: ReportData) -> None:
+    """Waveform layout: header / mic+USBM / per-channel stats / waveform plot.
+
+    Stats table includes Time (Rel. to Trig), Peak Accel, Peak Disp.
+    Left margin sized to fit the channel labels (MicL/Long/Vert/Tran).
+    """
+    gs = fig.add_gridspec(
+        nrows=4, ncols=1,
+        left=0.11, right=0.94, top=0.97, bottom=0.06,
+        height_ratios=[1.7, 2.0, 1.8, 5.5],
+        hspace=0.35,
+    )
+    ax_header = fig.add_subplot(gs[0]); ax_header.axis("off")
+    _draw_header_waveform(ax_header, rd)
+
+    ax_mid = fig.add_subplot(gs[1]); ax_mid.axis("off")
+    _draw_mic_and_usbm(ax_mid, rd)
+
+    ax_stats = fig.add_subplot(gs[2]); ax_stats.axis("off")
+    _draw_channel_stats_waveform(ax_stats, rd)
+
+    _draw_waveform_subplot(fig, gs[3], rd)
+
+
+def _render_histogram_layout(fig, rd: ReportData) -> None:
+    """Histogram layout: header / mic-only / per-channel stats / bar plot.
+
+    No USBM compliance chart (it's a waveform-only concept).  Stats table
+    uses Date + Time-of-peak instead of relative-time + accel + disp.
+    Left margin sized to fit the channel labels.
+    """
+    gs = fig.add_gridspec(
+        nrows=4, ncols=1,
+        left=0.11, right=0.94, top=0.97, bottom=0.06,
+        height_ratios=[1.8, 0.9, 1.7, 5.6],
+        hspace=0.35,
+    )
+    ax_header = fig.add_subplot(gs[0]); ax_header.axis("off")
+    _draw_header_histogram(ax_header, rd)
+
+    ax_mic = fig.add_subplot(gs[1]); ax_mic.axis("off")
+    _draw_mic_only(ax_mic, rd)
+
+    ax_stats = fig.add_subplot(gs[2]); ax_stats.axis("off")
+    _draw_channel_stats_histogram(ax_stats, rd)
+
+    _draw_histogram_subplot(fig, gs[3], rd)
+
+
+def _fmt_iso_to_bw(iso: Optional[str]) -> Optional[str]:
+    """Convert a ISO-8601 timestamp like '2026-05-16T22:30:37' to BW's
+    display format '22:30:37 May 16, 2026'.  Returns input unchanged if
+    it doesn't look like ISO."""
+    if not iso or "T" not in iso:
+        return iso
+    try:
+        import datetime as _dt
+        dt = _dt.datetime.fromisoformat(iso.replace("Z", "+00:00"))
+        return dt.strftime("%H:%M:%S %B %d, %Y").replace(" 0", " ")
+    except Exception:
+        return iso
+
+
 def _kv(ax, x, y, label, value, *, label_w=0.18):
     """Render a 'Label  Value' row at axes-coordinates (x, y)."""
     ax.text(x, y, label, fontsize=8, color="#555", ha="left", va="top",
@@ -329,11 +393,10 @@ def _fmt(v):
     return str(v)
 
 
-def _draw_header(ax, rd: ReportData) -> None:
-    """Two-column metadata header — matches BW printout layout."""
-    # Left column
+def _draw_header_waveform(ax, rd: ReportData) -> None:
+    """Two-column metadata header — waveform variant."""
     rows_left = [
-        ("Date/Time",      rd.event_datetime_str),
+        ("Date/Time",      _fmt_iso_to_bw(rd.event_datetime_str)),
         ("Trigger Source", rd.trigger_source),
         ("Range",          rd.geo_range_str),
         ("Sample Rate",    rd.sample_rate_str),
@@ -343,18 +406,45 @@ def _draw_header(ax, rd: ReportData) -> None:
         ("User Name:",     rd.operator),
         ("Seis. Loc:",     rd.sensor_location),
     ]
+    _draw_header_columns(ax, rows_left, rd)
+
+
+def _draw_header_histogram(ax, rd: ReportData) -> None:
+    """Two-column metadata header — histogram variant.
+
+    Histograms have Start / Finish / Intervals fields instead of
+    Trigger Source (there's no trigger event for a histogram capture).
+    """
+    intervals_str = None
+    if rd.histogram_n_intervals is not None and rd.histogram_interval_size:
+        intervals_str = f"{rd.histogram_n_intervals} At {rd.histogram_interval_size}"
+    rows_left = [
+        ("Start",      _fmt_iso_to_bw(rd.histogram_start_str or rd.event_datetime_str)),
+        ("Finish",     _fmt_iso_to_bw(rd.histogram_stop_str)),
+        ("Intervals",  intervals_str),
+        ("Range",      rd.geo_range_str),
+        ("Sample Rate", (f"{rd.sample_rate_sps} Sps" if rd.sample_rate_sps else None)),
+        ("Notes",      rd.notes),
+        ("Project:",   rd.project),
+        ("Client:",    rd.client),
+        ("User Name:", rd.operator),
+        ("Seis. Loc:", rd.sensor_location),
+    ]
+    _draw_header_columns(ax, rows_left, rd)
+
+
+def _draw_header_columns(ax, rows_left, rd: ReportData) -> None:
+    """Shared 2-column header rendering used by both layouts."""
     rows_right = [
-        ("Serial Number",   f"{rd.serial or '—'}"
-                            + (f"  {rd.firmware}" if rd.firmware else "")),
-        ("Battery Level",  f"{rd.battery_volts:.1f} Volts" if rd.battery_volts is not None else None),
-        ("Unit Calibration", (f"{rd.calibration_date}"
-                              + (f" by {rd.calibration_by}" if rd.calibration_by else ""))
+        ("Serial Number", f"{rd.serial or '—'}" + (f"  {rd.firmware}" if rd.firmware else "")),
+        ("Battery Level", f"{rd.battery_volts:.1f} Volts" if rd.battery_volts is not None else None),
+        ("Unit Calibration", (f"{rd.calibration_date}" + (f" by {rd.calibration_by}" if rd.calibration_by else ""))
                               if rd.calibration_date else None),
-        ("File Name",      rd.file_name),
+        ("File Name", rd.file_name),
         ("Post Event Notes", rd.post_event_notes),
     ]
     y = 0.95
-    dy = 0.10
+    dy = 0.095
     for label, value in rows_left:
         _kv(ax, 0.0, y, label, value, label_w=0.18)
         y -= dy
@@ -364,12 +454,43 @@ def _draw_header(ax, rd: ReportData) -> None:
         y -= dy
 
 
-def _draw_mic_block(ax, rd: ReportData) -> None:
-    """Microphone block — PSPL, ZC Freq, Channel Test.  USBM chart
-    placeholder on the right (filled in a separate work item)."""
+def _draw_mic_only(ax, rd: ReportData) -> None:
+    """Mic block (histogram variant — no USBM chart)."""
     ax.text(0.0, 0.95, "Microphone   Linear Weighting", fontsize=8, color="#555",
             transform=ax.transAxes, va="top")
-    rows = []
+    rows = _mic_rows(rd)
+    y = 0.70
+    for label, value in rows:
+        _kv(ax, 0.0, y, label, value, label_w=0.18)
+        y -= 0.22
+
+
+def _draw_mic_and_usbm(ax, rd: ReportData) -> None:
+    """Mic block on the left + USBM compliance chart placeholder on right.
+    (Waveform variant — USBM is a velocity-vs-frequency compliance plot
+    that doesn't apply to histograms.)"""
+    ax.text(0.0, 0.95, "Microphone   Linear Weighting", fontsize=8, color="#555",
+            transform=ax.transAxes, va="top")
+    rows = _mic_rows(rd)
+    y = 0.80
+    for label, value in rows:
+        _kv(ax, 0.0, y, label, value, label_w=0.18)
+        y -= 0.15
+
+    # USBM chart placeholder — upper-right.  Real piecewise compliance
+    # curves are a separate work item; for now this just shows the title
+    # + a "see report" message so the layout is correct.
+    ax.text(0.72, 0.97, "USBM RI8507 And OSMRE",
+            fontsize=9, weight="bold", color="#333", ha="center", va="top",
+            transform=ax.transAxes)
+    ax.text(0.72, 0.50, "[compliance chart\ncoming soon]",
+            fontsize=8, color="#bbb", ha="center", va="center",
+            transform=ax.transAxes, style="italic")
+
+
+def _mic_rows(rd: ReportData) -> list[tuple[str, Optional[str]]]:
+    """Build the mic-section value rows (shared by both layouts)."""
+    rows: list[tuple[str, Optional[str]]] = []
     if rd.mic_pspl_dbl is not None:
         line = f"{rd.mic_pspl_dbl:.1f} dB(L)"
         if rd.mic_pspl_time_s is not None:
@@ -383,47 +504,78 @@ def _draw_mic_block(ax, rd: ReportData) -> None:
             line += (f" (Freq = {rd.mic_channel_test_freq_hz:.1f} Hz, "
                      f"Amp = {rd.mic_channel_test_amp_mv:.0f} mv)")
         rows.append(("Channel Test", line))
-
-    y = 0.70
-    for label, value in rows:
-        _kv(ax, 0.0, y, label, value, label_w=0.18)
-        y -= 0.22
-
-    # USBM chart placeholder — upper-right of this row
-    ax.text(0.75, 0.95, "USBM RI8507 / OSMRE",
-            fontsize=8, color="#555", ha="center", va="top",
-            transform=ax.transAxes)
-    ax.text(0.75, 0.45, "[compliance chart\nrenders here]",
-            fontsize=8, color="#bbb", ha="center", va="center",
-            transform=ax.transAxes, style="italic")
+    return rows
 
 
-def _draw_channel_stats(ax, rd: ReportData) -> None:
-    """Per-channel stats table + Peak Vector Sum row."""
-    # Build a 2-D array of strings: header row + 3 channel rows
-    headers = ["", "Tran", "Vert", "Long", ""]
-    rows = [
-        ["PPV",                "ppv_ips",        "in/s"],
-        ["ZC Freq",            "zc_freq_hz",     "Hz"],
-        ["Time (Rel. to Trig)", "time_of_peak_s", "sec"],
-        ["Peak Acceleration",   "peak_accel_g",   "g"],
-        ["Peak Displacement",   "peak_disp_in",   "in"],
-        ["Sensor Check",        "sensor_check",   ""],
+def _draw_channel_stats_waveform(ax, rd: ReportData) -> None:
+    """Waveform stats table — has Time (Rel. to Trig), Peak Accel, Peak Disp.
+    Followed by Peak Vector Sum line."""
+    rows_spec = [
+        ("PPV",                  "ppv_ips",        "in/s"),
+        ("ZC Freq",              "zc_freq_hz",     "Hz"),
+        ("Time (Rel. to Trig)",  "time_of_peak_s", "sec"),
+        ("Peak Acceleration",    "peak_accel_g",   "g"),
+        ("Peak Displacement",    "peak_disp_in",   "in"),
+        ("Sensor Check",         "sensor_check",   ""),
     ]
+    _draw_stats_table(ax, rd, rows_spec)
+    if rd.peak_vector_sum_ips is not None:
+        line = f"Peak Vector Sum   {rd.peak_vector_sum_ips:.3f} in/s"
+        if rd.peak_vector_sum_time_s is not None:
+            line += f" At {rd.peak_vector_sum_time_s:.3f} sec."
+        ax.text(0.0, -0.08, line, fontsize=9, weight="bold",
+                ha="left", va="top", transform=ax.transAxes)
+        ax.text(0.0, -0.18, "NA: Not Applicable", fontsize=7, color="#888",
+                ha="left", va="top", transform=ax.transAxes)
+
+
+def _draw_channel_stats_histogram(ax, rd: ReportData) -> None:
+    """Histogram stats table — PPV, ZC Freq, Date, Time of peak, Sensor Check.
+    Followed by Peak Vector Sum line."""
+    # Date / Time of peak are per-channel timestamps for the interval at peak.
+    # bw_report stores time_of_peak_s as relative seconds, but for histograms
+    # BW shows them as absolute date+time.  We populate from rd.channel_stats
+    # if those absolute fields are present; otherwise fall back to relative.
+    rows_spec = [
+        ("PPV",          "ppv_ips",         "in/s"),
+        ("ZC Freq",      "zc_freq_hz",      "Hz"),
+        ("Date",         "peak_date",       ""),
+        ("Time",         "peak_time",       ""),
+        ("Sensor Check", "sensor_check",    ""),
+    ]
+    _draw_stats_table(ax, rd, rows_spec)
+    if rd.peak_vector_sum_ips is not None:
+        when = rd.peak_vector_sum_when_str or ""
+        line = f"Peak Vector Sum   {rd.peak_vector_sum_ips:.3f} in/s"
+        if when:
+            line += f" on {when}"
+        ax.text(0.0, -0.08, line, fontsize=9, weight="bold",
+                ha="left", va="top", transform=ax.transAxes)
+        ax.text(0.0, -0.18, "NA: Not Applicable", fontsize=7, color="#888",
+                ha="left", va="top", transform=ax.transAxes)
+
+
+def _draw_stats_table(ax, rd: ReportData, rows_spec: list[tuple[str, str, str]]) -> None:
+    """Render a per-channel stats table (Tran/Vert/Long).
+
+    rows_spec: list of (label, field_name_in_channel_stats, unit_string)
+    """
+    headers = ["", "Tran", "Vert", "Long", ""]
     ch_lookup = {c["name"]: c for c in rd.channel_stats}
 
     def _cell(field, ch_name):
         val = ch_lookup.get(ch_name, {}).get(field)
         if val is None:
             return "—"
-        if field == "sensor_check":
-            return str(val)
         if isinstance(val, float):
+            # ZC Freq is integer-formatted in BW; everything else with 3 decimals
+            if field == "zc_freq_hz":
+                return f"{val:.0f}"
             return f"{val:.3f}"
         return str(val)
 
     table_data = [headers]
-    for label, field_name, unit in rows:
+    for label, field_name, unit in rows_spec:
         table_data.append([
             label,
             _cell(field_name, "Tran"),
@@ -431,27 +583,16 @@ def _draw_channel_stats(ax, rd: ReportData) -> None:
             _cell(field_name, "Long"),
             unit,
         ])
-
     tbl = ax.table(
         cellText=table_data, loc="upper left",
-        colWidths=[0.30, 0.13, 0.13, 0.13, 0.10],
+        colWidths=[0.28, 0.14, 0.14, 0.14, 0.10],
         cellLoc="left", edges="open",
     )
     tbl.auto_set_font_size(False)
     tbl.set_fontsize(8)
     tbl.scale(1, 1.4)
-    # Header row styling
     for j in range(5):
-        cell = tbl[(0, j)]
-        cell.set_text_props(weight="bold", color="#555")
-
-    # Peak Vector Sum
-    if rd.peak_vector_sum_ips is not None:
-        line = f"Peak Vector Sum   {rd.peak_vector_sum_ips:.3f} in/s"
-        if rd.peak_vector_sum_time_s is not None:
-            line += f" At {rd.peak_vector_sum_time_s:.3f} sec."
-        ax.text(0.0, -0.05, line, fontsize=9, weight="bold",
-                ha="left", va="top", transform=ax.transAxes)
+        tbl[(0, j)].set_text_props(weight="bold", color="#555")
 
 
 def _channel_axis_color(ch: str) -> str:
@@ -460,59 +601,142 @@ def _channel_axis_color(ch: str) -> str:
 
 def _draw_waveform_subplot(fig, gridspec_cell, rd: ReportData) -> None:
     """4-channel stacked waveform plot — Instantel printout order
-    (MicL on top, Tran on bottom), shared x-axis."""
+    (MicL on top, Tran on bottom), shared x-axis in SECONDS, trigger
+    triangle markers at t=0, '0.0' baseline label on right of each."""
     inner = gridspec_cell.subgridspec(4, 1, hspace=0.0)
     order = ["MicL", "Long", "Vert", "Tran"]
     sr = rd.sample_rate_sps or 1024
-    dt_ms = rd.dt_ms or (1000.0 / sr)
-    t0_ms = rd.t0_ms if rd.t0_ms is not None else 0.0
+    # Convert ms-based time axis to seconds for the x-axis
+    dt_s = (rd.dt_ms or (1000.0 / sr)) / 1000.0
+    t0_s = (rd.t0_ms if rd.t0_ms is not None else 0.0) / 1000.0
 
     last_idx = len(order) - 1
     for i, ch in enumerate(order):
         ax = fig.add_subplot(inner[i])
         values = rd.channels.get(ch) or []
-        times = [t0_ms + j * dt_ms for j in range(len(values))]
+        times = [t0_s + j * dt_s for j in range(len(values))]
 
         if values:
             color = _channel_axis_color(ch)
-            ax.plot(times, values, color=color, linewidth=0.6)
-            # Symmetric y-axis for geo; zero-anchored for mic
+            ax.plot(times, values, color=color, linewidth=0.5)
+            # Symmetric y-axis for geo; zero-anchored for mic.
             if ch != "MicL":
                 amax = max((abs(v) for v in values), default=0.001)
-                ax.set_ylim(-amax * 1.1, amax * 1.1)
-        # Channel label on left
+                ax.set_ylim(-amax * 1.10, amax * 1.10)
+            else:
+                amax = max((abs(v) for v in values), default=0.001)
+                ax.set_ylim(-amax * 1.10, amax * 1.10)
+
+        # Channel label on the LEFT (matches BW)
         ax.set_ylabel(ch, fontsize=8, rotation=0, ha="right", va="center",
                       color=_channel_axis_color(ch), weight="bold", labelpad=14)
-        ax.grid(True, linestyle=":", linewidth=0.4, alpha=0.5)
-        # Dashed trigger line at t=0
-        ax.axvline(0.0, color="#cc0000", linestyle="--", linewidth=0.8, alpha=0.7)
-        # Zero baseline
-        ax.axhline(0.0, color="#888", linestyle="-", linewidth=0.4, alpha=0.5)
+        # "0.0" on the RIGHT (BW convention)
+        ax.text(1.005, 0.5, "0.0", transform=ax.transAxes,
+                fontsize=7, color="#555", va="center", ha="left")
+
+        ax.grid(True, linestyle="--", linewidth=0.3, color="#bbb", alpha=0.6)
+        # Vertical dashed trigger line at t=0
+        ax.axvline(0.0, color="#cc0000", linestyle="--", linewidth=0.6, alpha=0.7)
+        # Zero baseline horizontal
+        ax.axhline(0.0, color=_channel_axis_color(ch), linestyle="-",
+                   linewidth=0.4, alpha=0.5)
 
         if i != last_idx:
             ax.set_xticklabels([])
+            ax.tick_params(axis="x", length=0)
         else:
-            ax.set_xlabel("Time (ms)", fontsize=8)
-        ax.tick_params(axis="both", labelsize=7)
+            ax.tick_params(axis="x", labelsize=7)
+        ax.tick_params(axis="y", labelsize=6)
+
+    # Trigger triangle marker ▼ above the top channel at t=0
+    top_ax = fig.axes[-4]  # MicL is the first added in this gridspec
+    top_ax.plot([0], [top_ax.get_ylim()[1]], marker="v", color="black",
+                markersize=8, clip_on=False, zorder=10)
+
+    # Compute scale-per-division for the footer (10 divs across the chart)
+    # and find peak geo amplitude for the geo amp/div setting.
+    total_s = times[-1] - times[0] if values else 0
+    div_s = total_s / 10 if total_s > 0 else 0
+    geo_amp_div = "—"
+    for ch in ("Tran", "Vert", "Long"):
+        v = rd.channels.get(ch) or []
+        if v:
+            amax = max(abs(x) for x in v)
+            geo_amp_div = f"{(amax * 1.1 * 2) / 10:.3f}"
+            break
+    fig.text(
+        0.07, 0.045,
+        f"Time(Seconds) {div_s:.2f} sec/div   Amplitude Geo: {geo_amp_div} in/s/div   Mic: 0.001 psi(L)/div",
+        fontsize=7, color="#444", ha="left",
+    )
+    fig.text(
+        0.07, 0.030,
+        "Trigger = ▶━━━━━ ━━━━━━◀",
+        fontsize=7, color="#444", ha="left",
+    )
 
 
 def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
-    """4-channel stacked histogram bar chart — per-interval peaks."""
+    """4-channel stacked histogram bar chart — per-interval peaks.
+
+    X-axis labeled with the actual times from rd.histogram_interval_times
+    when available; otherwise interval index.
+    """
     inner = gridspec_cell.subgridspec(4, 1, hspace=0.0)
     order = ["MicL", "Long", "Vert", "Tran"]
     last_idx = len(order) - 1
+
+    # X-axis: use absolute time labels if we have them, else interval index
+    have_times = bool(rd.histogram_interval_times)
+
     for i, ch in enumerate(order):
         ax = fig.add_subplot(inner[i])
         values = rd.channels.get(ch) or []
         if values:
-            xs = np.arange(1, len(values) + 1)
+            # Histograms record per-interval PEAK magnitudes — always
+            # non-negative.  Codec output occasionally includes signed
+            # values when the underlying .h5 was scaled like a waveform;
+            # take the absolute value so the bars rise from zero.
+            abs_vals = [abs(v) if v is not None else 0 for v in values]
+            xs = np.arange(len(abs_vals))
             color = _channel_axis_color(ch)
-            ax.bar(xs, values, color=color, width=1.0, linewidth=0)
+            ax.bar(xs, abs_vals, color=color, width=0.85, linewidth=0)
+            amax = max(abs_vals, default=0)
+            if amax > 0:
+                ax.set_ylim(0, amax * 1.10)
         ax.set_ylabel(ch, fontsize=8, rotation=0, ha="right", va="center",
                       color=_channel_axis_color(ch), weight="bold", labelpad=14)
-        ax.grid(True, axis="y", linestyle=":", linewidth=0.4, alpha=0.5)
+        ax.text(1.005, 0.02, "0.0", transform=ax.transAxes,
+                fontsize=7, color="#555", va="bottom", ha="left")
+        ax.grid(True, axis="y", linestyle="--", linewidth=0.3, color="#bbb", alpha=0.6)
         if i != last_idx:
             ax.set_xticklabels([])
+            ax.tick_params(axis="x", length=0)
         else:
-            ax.set_xlabel("Interval", fontsize=8)
-        ax.tick_params(axis="both", labelsize=7)
+            if have_times and len(rd.histogram_interval_times) == len(values):
+                # Show 2-4 labels evenly spaced
+                n = len(values)
+                step = max(1, n // 4)
+                tick_positions = list(range(0, n, step))
+                ax.set_xticks(tick_positions)
+                ax.set_xticklabels([rd.histogram_interval_times[t] for t in tick_positions],
+                                   rotation=0, fontsize=6)
+            else:
+                ax.set_xlabel("Interval", fontsize=8)
+            ax.tick_params(axis="x", labelsize=7)
+        ax.tick_params(axis="y", labelsize=6)
+
+    # Footer scale info — histograms use minute/div
+    interval_str = rd.histogram_interval_size or "—"
+    geo_amp_div = "—"
+    for ch in ("Tran", "Vert", "Long"):
+        v = rd.channels.get(ch) or []
+        if v:
+            amax = max(abs(x) for x in v)
+            geo_amp_div = f"{amax / 5:.3f}"
+            break
+    fig.text(
+        0.07, 0.045,
+        f"Time {interval_str} /div   Amplitude Geo: {geo_amp_div} in/s/div   Mic: 0.001 psi(L)/div",
+        fontsize=7, color="#444", ha="left",
+    )
-- 
2.52.0


From ad2b553c7bb92f6362f316a9510ae376834812da Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 20:01:12 +0000
Subject: [PATCH 25/42] ingest: preserve raw BW ASCII report (.TXT) alongside
 the binary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the .TXT was parsed into the sidecar's bw_report projection
and then discarded at ingest time.  Now save_imported_bw() writes it
to <store>/<serial>/<filename>_ASCII.TXT permanently.

Rationale: with BW Mail / Forwarding Agent being phased out of the
operator workflow, the XML/PDF/WMF those tools produce won't be
available — the binary + .TXT (created by BW ACH itself) are our
only authoritative inputs going forward.  Keeping the raw .TXT
unlocks:

  - Parser bug fixes can be applied RETROACTIVELY by re-parsing the
    stored .TXT, instead of requiring a re-forward from the watcher
    PC (which lost the .TXT after BW ACH cleanup).
  - Audit trail of what BW actually sent us, for debugging.
  - The five known parser-PPV-miss events will be re-parseable once
    the regex fix lands (instead of staying broken indefinitely).

Storage cost: ~15 KB per event × 14k events = ~210 MB on the
existing prod corpus.  Negligible.

Implementation:
  - WaveformStore gains txt_path_for() + open_txt()
  - save_imported_bw() writes the .TXT when bw_report_text is supplied
  - sidecar source block records the txt_filename
  - backfill_sidecars.py preserves txt_filename across regens
  - New GET /db/events/{id}/ascii_report.txt endpoint serves it
  - Returns 404 for events ingested before this change (no .TXT in
    the store yet) — re-forward to populate

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                  |  2 ++
 minimateplus/event_file_io.py |  2 ++
 scripts/backfill_sidecars.py  |  6 +++++
 sfm/server.py                 | 33 +++++++++++++++++++++++++++
 sfm/waveform_store.py         | 42 +++++++++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c79802..0847c73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ All notable changes to seismo-relay are documented here.
 
 ### Added
 
+- **Raw BW ASCII report (.TXT) preservation.**  `save_imported_bw` now writes the paired `_ASCII.TXT` to `<store>/<serial>/<filename>_ASCII.TXT` alongside the binary at ingest time.  Previously the .TXT was parsed into the sidecar's `bw_report` projection and then discarded — meaning parser bug fixes couldn't be applied retroactively without re-forwarding from the watcher PC.  Now the raw .TXT lives in the waveform store permanently (~15 KB per event; ~210 MB total for a 14k-event store; negligible).  Sidecar's `source.txt_filename` field records the saved path; backfill_sidecars preserves it across regens.  New `GET /db/events/{id}/ascii_report.txt` endpoint serves the raw .TXT for any event ingested after this change.  Events ingested before today still return 404 from that endpoint until re-forwarded.  Architectural rationale: with BW Mail / Forwarding Agent being phased out of the operator workflow, the XML/PDF/WMF that those tools produced are no longer available — the binary + .TXT (created by BW ACH itself) are our authoritative source for everything going forward.
+
 - **Event Report PDF generation** — `GET /db/events/{id}/report.pdf` returns a single-page letter-portrait PDF for any event with waveform data on disk.  Covers every field a Blastware Event Report includes: header metadata (date/time, trigger source, range, sample rate, project/client/operator/location, serial+firmware, battery, calibration, file name), microphone block (PSPL in dB(L) + psi, ZC freq, channel test), per-channel stats table (rows differ for waveform vs histogram), Peak Vector Sum, and the 4-channel plot.  Iterated against real Blastware reference PDFs (uploaded to `example-events/pdfsnstuff/`):
    - **Waveform layout**: header shows Date/Time, Trigger Source, Range, Sample Rate; stats table has PPV / ZC Freq / Time (Rel. to Trig) / Peak Accel / Peak Disp / Sensor Check; bottom plot is 4-channel line waveform (MicL top → Tran bottom), shared time axis in seconds, dashed trigger line + triangle marker at t=0, symmetric Y on geo channels, zero-anchored on mic, "0.0" baseline label on right per BW convention; footer shows `Time X sec/div   Amplitude Geo: Y in/s/div   Mic: 0.001 psi(L)/div` and the trigger window `▶━━◀` marker.  USBM RI8507/OSMRE compliance chart placeholder upper-right.
    - **Histogram layout**: header shows Start / Finish / Intervals At Size / Range / Sample Rate (no Trigger Source — histograms aren't triggered); NO USBM chart; stats table has PPV / ZC Freq / Date / Time / Sensor Check; bottom plot is per-interval bar chart, Y-axis 0-to-peak (never negative), 0.0 baseline at the bottom; footer shows `Time INTERVAL_SIZE /div   Amplitude Geo: Y in/s/div   Mic: 0.001 psi(L)/div`.
diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index 66a4b68..e513ad3 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -332,6 +332,7 @@ def event_to_sidecar_dict(
     blastware_filesize: int,
     blastware_sha256: str,
     source_kind: str = "sfm-live",
+    txt_filename: Optional[str] = None,
     a5_pickle_filename: Optional[str] = None,
     tool_version: str = _TOOL_VERSION_DEFAULT,
     captured_at: Optional[datetime.datetime] = None,
@@ -448,6 +449,7 @@ def event_to_sidecar_dict(
             "captured_at":        captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat(),
             "tool_version":       tool_version,
             "a5_pickle_filename": a5_pickle_filename,
+            "txt_filename":       txt_filename,
         },
 
         "review": review or {
diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index 9c4bf5d..04583f7 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -300,12 +300,17 @@ def main(argv=None) -> int:
                 preserved_review     = None
                 preserved_ext        = None
                 preserved_bw_report  = None
+                preserved_txt_fn     = None
                 if sidecar_path.exists():
                     try:
                         _existing = event_file_io.read_sidecar(sidecar_path)
                         preserved_review    = _existing.get("review")
                         preserved_ext       = _existing.get("extensions")
                         preserved_bw_report = _existing.get("bw_report")
+                        # Preserve txt_filename so backfills don't blank out the
+                        # pointer to the saved raw .TXT (events ingested after
+                        # 2026-05-27 have this).
+                        preserved_txt_fn    = (_existing.get("source") or {}).get("txt_filename")
                     except Exception:
                         pass
 
@@ -334,6 +339,7 @@ def main(argv=None) -> int:
                     blastware_sha256=bw_sha,
                     source_kind=source_kind,
                     a5_pickle_filename=a5_filename,
+                    txt_filename=preserved_txt_fn,
                     review=preserved_review,
                     extensions=preserved_ext,
                 )
diff --git a/sfm/server.py b/sfm/server.py
index 2474681..aee5532 100644
--- a/sfm/server.py
+++ b/sfm/server.py
@@ -2178,6 +2178,39 @@ def db_event_blastware_file(event_id: str) -> FileResponse:
     )
 
 
+@app.get("/db/events/{event_id}/ascii_report.txt")
+def db_event_ascii_report_txt(event_id: str):
+    """Serve the raw BW ASCII report (.TXT) for an event, when preserved.
+
+    Returns 404 for events ingested before the .TXT-preservation feature
+    landed (2026-05-27) — those events have only the parsed ``bw_report``
+    block in the sidecar, not the raw .TXT.  Re-forwarding from the
+    watcher PC will populate the .TXT going forward.
+    """
+    row = _get_db().get_event(event_id)
+    if row is None:
+        raise HTTPException(status_code=404, detail=f"Event {event_id} not found")
+    serial   = row.get("serial")
+    filename = row.get("blastware_filename")
+    if not serial or not filename:
+        raise HTTPException(status_code=404, detail="Event has no associated BW file")
+    txt_path = _get_store().open_txt(serial, filename)
+    if txt_path is None:
+        raise HTTPException(
+            status_code=404,
+            detail=(
+                f"Raw .TXT not preserved for {filename}.  Events ingested "
+                "before 2026-05-27 don't have it; re-forward from the "
+                "watcher PC to populate."
+            ),
+        )
+    return FileResponse(
+        path=str(txt_path),
+        media_type="text/plain",
+        filename=txt_path.name,
+    )
+
+
 @app.get("/db/events/{event_id}/report.pdf")
 def db_event_report_pdf(event_id: str):
     """Render an Instantel-style Event Report as a PDF.
diff --git a/sfm/waveform_store.py b/sfm/waveform_store.py
index 5032dc2..d982dce 100644
--- a/sfm/waveform_store.py
+++ b/sfm/waveform_store.py
@@ -108,11 +108,30 @@ class WaveformStore:
         """Return absolute path to the .h5 clean-waveform file for a given event."""
         return self._serial_dir(serial) / f"{filename}.h5"
 
+    def txt_path_for(self, serial: str, filename: str) -> Path:
+        """Return absolute path to the preserved BW ASCII report (.TXT)
+        for a given event.
+
+        We name it ``<filename>_ASCII.TXT`` to match BW's own filename
+        convention in the ACH folder.  Saved at ingest time alongside
+        the binary so the parser bug fixes can be applied retroactively
+        by re-parsing without needing to re-forward from the watcher PC.
+        """
+        return self._serial_dir(serial) / f"{filename}_ASCII.TXT"
+
     def open_blastware(self, serial: str, filename: str) -> Optional[Path]:
         """Return absolute path to an existing event file or None."""
         bw_path, _ = self.paths_for(serial, filename)
         return bw_path if bw_path.exists() else None
 
+    def open_txt(self, serial: str, filename: str) -> Optional[Path]:
+        """Return absolute path to the preserved BW ASCII report for an
+        event, or None if the .TXT wasn't saved at ingest time (events
+        ingested before .TXT preservation landed will show None until
+        re-forwarded)."""
+        p = self.txt_path_for(serial, filename)
+        return p if p.exists() else None
+
     # ── save / load ─────────────────────────────────────────────────────────────
 
     def save(
@@ -357,6 +376,28 @@ class WaveformStore:
         filesize = bw_path.stat().st_size
         sha256   = event_file_io.file_sha256(bw_path)
 
+        # 1b. preserve the raw BW ASCII report (.TXT) alongside the binary.
+        # Saved at <root>/<serial>/<filename>_ASCII.TXT.  Lets us re-parse
+        # offline after parser fixes without needing to re-forward from
+        # the watcher PC.  Negligible storage cost (~15 KB per event).
+        # Skipped silently when no report was supplied (live download path,
+        # manual upload without paired TXT).
+        txt_filename: Optional[str] = None
+        if bw_report_text is not None:
+            try:
+                txt_path = self.txt_path_for(serial, filename)
+                if isinstance(bw_report_text, bytes):
+                    txt_path.write_bytes(bw_report_text)
+                else:
+                    txt_path.write_text(bw_report_text)
+                txt_filename = txt_path.name
+            except Exception as exc:
+                log.warning(
+                    "save_imported_bw: failed to save TXT for %s: %s — "
+                    "continuing without it",
+                    filename, exc,
+                )
+
         # 2. write the .h5 clean-waveform file from the parsed Event.
         # Note: peaks here are computed from raw samples (the BW file
         # doesn't carry the device-authoritative 0C peaks).  Best-effort.
@@ -393,6 +434,7 @@ class WaveformStore:
             blastware_sha256=sha256,
             source_kind="bw-import",
             a5_pickle_filename=None,
+            txt_filename=txt_filename,
             review=existing_review,
             bw_report=bw_report,
         )
-- 
2.52.0


From d21e3b52986c6f7dae04dbc314d8f63f74edad72 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 20:23:05 +0000
Subject: [PATCH 26/42] histogram aggregation + parser extension for BW
 interval fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three layered changes that together make histogram charts visually
match BW's printout (one bar per interval, not per codec block):

1. bw_ascii_report parser captures histogram fields it previously
   dropped:
     - Histogram Start/Stop Time + Date → datetime
     - Number of Intervals + Interval Size (string + parsed seconds)
     - <Channel> Peak Time + Peak Date → datetime (per-channel)
     - Peak Vector Sum Date (combined with PVS Time → datetime;
       clears the bogus seconds parse that interpreted "22:33:52"
       as 22.0)
   New _parse_iso_date() handles BW's ISO format for histograms
   (waveforms use "May 8, 2026" long form).  New _parse_interval_size()
   handles "1 minute" / "5 minutes" / "15 seconds" etc.

2. _bw_report_to_dict() projects the new fields into a new
   bw_report.histogram block in the sidecar.

3. /db/events/{id}/waveform.json wraps the existing path 1 (HDF5)
   output with _maybe_aggregate_histogram(): when the event is a
   histogram AND the sidecar has bw_report.histogram.n_intervals,
   group the codec's per-block samples into N intervals via
   max-per-group and return the aggregated array.  time_axis gains
   histogram_aggregated / n_intervals / interval_size_s / interval_times
   fields.

Frontend (both modal chart in sfm_webapp.html + standalone event
browser) uses interval_times as x-axis labels when provided (BW-style
HH:MM:SS), falls back to interval index.

Defensive: aggregation is no-op when the sidecar lacks the histogram
block (events ingested before this change).  Activates automatically
on prod once a watcher re-forward populates new sidecars.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                    |   9 +++
 minimateplus/bw_ascii_report.py | 121 ++++++++++++++++++++++++++++++++
 minimateplus/event_file_io.py   |  15 ++++
 sfm/event_browser.html          |  17 +++--
 sfm/server.py                   |  86 ++++++++++++++++++++++-
 sfm/sfm_webapp.html             |  16 +++--
 6 files changed, 254 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0847c73..1e1324c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,15 @@ All notable changes to seismo-relay are documented here.
 
 ### Added
 
+- **Histogram per-interval aggregation in `waveform.json`.**  Histogram events now render with one bar per BW-reported interval (matching the Blastware printout) instead of ~200 bars per event (the raw codec output).  When the sidecar's `bw_report.histogram.n_intervals` is populated (events ingested with the new parser, see next bullet), the `/db/events/{id}/waveform.json` endpoint groups the codec samples into N intervals via max-per-group and returns the aggregated array.  `time_axis` gains `histogram_aggregated: true`, `n_intervals`, `interval_size_s`, and `interval_times` (HH:MM:SS strings).  Both the modal chart and the standalone event browser use those interval timestamps as x-axis labels when present.  Defensive: no-op for events ingested before the parser extension landed (their sidecars lack `histogram.n_intervals`) — those continue to render with raw codec output.
+- **`bw_ascii_report` parser now captures histogram-specific fields.**  Previously the parser dropped these fields silently (Roadmap item closed):
+   - `Histogram Start Time` / `Histogram Start Date` (combined into `histogram_start: datetime`)
+   - `Histogram Stop Time` / `Histogram Stop Date` (combined into `histogram_stop: datetime`)
+   - `Number of Intervals` (`histogram_n_intervals: int`)
+   - `Interval Size` ("1 minute" string + parsed seconds: `histogram_interval_size_str`, `histogram_interval_size_s`)
+   - `<Channel> Peak Time` + `<Channel> Peak Date` for histogram events (combined into `channel_peak_when: dict`; waveforms continue to use `time_of_peak_s` relative)
+   - `Peak Vector Sum Date` (combined with PVS Time into `peak_vector_sum_when: datetime`; clears the previous bogus `peak_vector_sum_time_s` parse that interpreted "22:33:52" as 22.0 seconds)
+   - All new fields land in the sidecar's `bw_report.histogram` block via `_bw_report_to_dict`.  Tested against synthetic K558LLB7.V20H-shaped input.
 - **Raw BW ASCII report (.TXT) preservation.**  `save_imported_bw` now writes the paired `_ASCII.TXT` to `<store>/<serial>/<filename>_ASCII.TXT` alongside the binary at ingest time.  Previously the .TXT was parsed into the sidecar's `bw_report` projection and then discarded — meaning parser bug fixes couldn't be applied retroactively without re-forwarding from the watcher PC.  Now the raw .TXT lives in the waveform store permanently (~15 KB per event; ~210 MB total for a 14k-event store; negligible).  Sidecar's `source.txt_filename` field records the saved path; backfill_sidecars preserves it across regens.  New `GET /db/events/{id}/ascii_report.txt` endpoint serves the raw .TXT for any event ingested after this change.  Events ingested before today still return 404 from that endpoint until re-forwarded.  Architectural rationale: with BW Mail / Forwarding Agent being phased out of the operator workflow, the XML/PDF/WMF that those tools produced are no longer available — the binary + .TXT (created by BW ACH itself) are our authoritative source for everything going forward.
 
 - **Event Report PDF generation** — `GET /db/events/{id}/report.pdf` returns a single-page letter-portrait PDF for any event with waveform data on disk.  Covers every field a Blastware Event Report includes: header metadata (date/time, trigger source, range, sample rate, project/client/operator/location, serial+firmware, battery, calibration, file name), microphone block (PSPL in dB(L) + psi, ZC freq, channel test), per-channel stats table (rows differ for waveform vs histogram), Peak Vector Sum, and the 4-channel plot.  Iterated against real Blastware reference PDFs (uploaded to `example-events/pdfsnstuff/`):
diff --git a/minimateplus/bw_ascii_report.py b/minimateplus/bw_ascii_report.py
index a3aee4b..5ccb10a 100644
--- a/minimateplus/bw_ascii_report.py
+++ b/minimateplus/bw_ascii_report.py
@@ -144,6 +144,23 @@ class BwAsciiReport:
     # ── Vector sum ──────────────────────────────────────────────────────────
     peak_vector_sum_ips:    Optional[float] = None
     peak_vector_sum_time_s: Optional[float] = None
+    # Histograms additionally have an absolute date+time for the PVS
+    # (it occurred at a specific interval).  Waveform reports show
+    # only the relative-time value above.
+    peak_vector_sum_when:   Optional[datetime.datetime] = None
+
+    # ── Histogram-specific fields (populated only when Event Type starts
+    # with 'Histogram' / 'Full Histogram' / 'Histogram + Continuous') ──
+    histogram_start:        Optional[datetime.datetime] = None
+    histogram_stop:         Optional[datetime.datetime] = None
+    histogram_n_intervals:  Optional[int]   = None      # e.g. 4, 1436
+    histogram_interval_size_str: Optional[str]   = None  # "1 minute" / "5 minutes" / "15 seconds"
+    histogram_interval_size_s:   Optional[float] = None  # parsed to seconds
+    # Per-channel absolute peak time+date (histogram-specific).  For
+    # waveform events these are None — those reports use the channel's
+    # time_of_peak_s (relative to trigger) instead.  Keyed by channel
+    # name ("Tran", "Vert", "Long", "MicL").
+    channel_peak_when:      Dict[str, datetime.datetime] = field(default_factory=dict)
 
     # ── Sensor self-check (per channel) ─────────────────────────────────────
     sensor_check:      Dict[str, SensorCheck] = field(default_factory=dict)
@@ -223,6 +240,46 @@ def _parse_event_date(s: str) -> Optional[datetime.date]:
         return None
 
 
+def _parse_iso_date(s: str) -> Optional[datetime.date]:
+    """Parse "2026-05-16" → date.  Histograms use ISO format for their
+    Start Date / Stop Date / Peak Date fields; waveforms use the
+    "May 8, 2026" long form which `_parse_event_date` handles."""
+    s = s.strip()
+    try:
+        return datetime.date.fromisoformat(s)
+    except ValueError:
+        return None
+
+
+_INTERVAL_UNIT_SECONDS = {
+    "second": 1, "seconds": 1, "sec": 1, "secs": 1,
+    "minute": 60, "minutes": 60, "min": 60, "mins": 60,
+    "hour": 3600, "hours": 3600, "hr": 3600, "hrs": 3600,
+}
+
+
+def _parse_interval_size(s: str) -> Optional[float]:
+    """Parse "1 minute" / "5 minutes" / "15 seconds" / "2 seconds" → seconds.
+
+    Handles the BW Compliance Setup → Histogram Interval values verbatim
+    ("2 seconds", "5 seconds", "15 seconds", "1 minute", "5 minutes",
+    "15 minutes") plus a few defensive variants.
+    """
+    if not s:
+        return None
+    parts = s.strip().split()
+    if len(parts) < 2:
+        return None
+    try:
+        n = float(parts[0])
+    except ValueError:
+        return None
+    unit_per_s = _INTERVAL_UNIT_SECONDS.get(parts[1].lower())
+    if unit_per_s is None:
+        return None
+    return n * unit_per_s
+
+
 def _parse_event_time(s: str) -> Optional[datetime.time]:
     """Parse "15:56:35" → time."""
     s = s.strip()
@@ -336,6 +393,15 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
     in_user_notes_block = False
     user_note_position = 0
 
+    # Histogram-field staging — BW writes <Channel> Peak Time and
+    # <Channel> Peak Date on separate lines (and similarly Histogram
+    # Start Time / Date).  We stash the partial value when the time
+    # line arrives and combine it when the matching date line arrives.
+    _hist_start_time: Optional[datetime.time] = None
+    _hist_stop_time:  Optional[datetime.time] = None
+    _pending_peak_time: Dict[str, Optional[datetime.time]] = {}
+    _pvs_time_raw: Optional[str] = None  # last Peak Vector Sum Time value, raw
+
     while i < n:
         raw_line = lines[i]
         i += 1
@@ -427,11 +493,66 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
             elif stat == "Peak Acceleration":   cs.peak_accel_g   = num
             elif stat == "Peak Displacement":   cs.peak_disp_in   = num
 
+        # ── Histogram-specific fields ────────────────────────────────────────
+        # Histograms have Start/Stop time+date pairs + an interval count
+        # and size, plus per-channel absolute Peak Time/Date instead of
+        # the waveform's relative Time of Peak.
+        elif key == "Histogram Start Time":
+            _hist_start_time = _parse_event_time(value)
+        elif key == "Histogram Start Date":
+            _d = _parse_iso_date(value)
+            if _d and _hist_start_time:
+                report.histogram_start = datetime.datetime.combine(_d, _hist_start_time)
+        elif key == "Histogram Stop Time":
+            _hist_stop_time = _parse_event_time(value)
+        elif key == "Histogram Stop Date":
+            _d = _parse_iso_date(value)
+            if _d and _hist_stop_time:
+                report.histogram_stop = datetime.datetime.combine(_d, _hist_stop_time)
+        elif key == "Number of Intervals":
+            try:
+                report.histogram_n_intervals = int(float(value.strip()))
+            except ValueError:
+                pass
+        elif key == "Interval Size":
+            report.histogram_interval_size_str = value.strip()
+            report.histogram_interval_size_s   = _parse_interval_size(value)
+
+        # ── Per-channel histogram Peak Date / Peak Time ──
+        # Lines like "Tran Peak Time : 22:31:38" + "Tran Peak Date : 2026-05-16"
+        elif key in ("Tran Peak Time", "Vert Peak Time", "Long Peak Time", "MicL Time"):
+            ch_name = "MicL" if key == "MicL Time" else key.split(" ", 1)[0]
+            _pending_peak_time[ch_name] = _parse_event_time(value)
+        elif key in ("Tran Peak Date", "Vert Peak Date", "Long Peak Date", "MicL Date"):
+            ch_name = "MicL" if key == "MicL Date" else key.split(" ", 1)[0]
+            _d = _parse_iso_date(value)
+            _t = _pending_peak_time.get(ch_name)
+            if _d and _t:
+                report.channel_peak_when[ch_name] = datetime.datetime.combine(_d, _t)
+
         # ── Vector Sum ───────────────────────────────────────────────────────
         elif key == "Peak Vector Sum":
             report.peak_vector_sum_ips = _parse_number(value)
         elif key == "Peak Vector Sum Time":
             report.peak_vector_sum_time_s = _parse_number(value)
+            _pvs_time_raw = value
+        elif key == "Peak Vector Sum Date":
+            # Histogram-mode PVS gets paired with a date.  We may have
+            # captured 'Peak Vector Sum Time' as either a relative
+            # seconds float (waveform) or an HH:MM:SS string we
+            # interpreted as a number.  For histograms, BW writes
+            # "Peak Vector Sum Time : 22:33:52" which _parse_number
+            # parses as 22.0 (loses information).  When Peak Vector Sum
+            # Date arrives, re-parse the previous PVS time line as a
+            # clock time and combine into an absolute datetime.
+            _d = _parse_iso_date(value)
+            if _d and _pvs_time_raw is not None:
+                _t = _parse_event_time(_pvs_time_raw)
+                if _t:
+                    report.peak_vector_sum_when = datetime.datetime.combine(_d, _t)
+                    # The earlier seconds parse was bogus for histograms;
+                    # clear it so consumers don't think it's a real offset.
+                    report.peak_vector_sum_time_s = None
 
         # ── Microphone block ────────────────────────────────────────────────
         elif key == "Microphone":
diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index e513ad3..b455bc0 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -171,6 +171,10 @@ def _bw_report_to_dict(report: BwAsciiReport) -> dict:
             "vector_sum": {
                 "ips":     report.peak_vector_sum_ips,
                 "time_s":  report.peak_vector_sum_time_s,
+                # Histogram events have an absolute date+time for the PVS
+                # (the interval at which it occurred); waveform events
+                # only have the time_s offset.
+                "when":    report.peak_vector_sum_when.isoformat() if report.peak_vector_sum_when else None,
             },
         },
         "mic": {
@@ -185,6 +189,17 @@ def _bw_report_to_dict(report: BwAsciiReport) -> dict:
             "long": _sc("Long"),
             "mic":  _sc("MicL"),
         },
+        # Histogram-specific fields (None on waveform-mode events).
+        # Per-channel absolute peak time/date for histograms — for
+        # waveforms see channels[ch]["time_of_peak_s"] instead.
+        "histogram": {
+            "start":               report.histogram_start.isoformat() if report.histogram_start else None,
+            "stop":                report.histogram_stop.isoformat()  if report.histogram_stop  else None,
+            "n_intervals":         report.histogram_n_intervals,
+            "interval_size":       report.histogram_interval_size_str,
+            "interval_size_s":     report.histogram_interval_size_s,
+            "channel_peak_when":   {ch: dt.isoformat() for ch, dt in report.channel_peak_when.items()},
+        },
         "monitor_log":   monitor_log,
         "pc_sw_version": report.pc_sw_version,
     }
diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 1ef883b..9f5fd31 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -656,11 +656,18 @@ function renderWaveform(data) {
     chartsDiv.appendChild(wrap);
 
     // Waveform: per-sample time in ms relative to trigger (negative for pretrig).
-    // Histogram: interval index (1..N); sample_rate-based time math doesn't
-    // apply to per-interval peaks.
-    const times = isHistogram
-      ? values.map((_, i) => i + 1)
-      : values.map((_, i) => t0Ms + i * dtMs);
+    // Histogram: when the server has aggregated to BW-reported intervals AND
+    // provides per-interval timestamps, use those as x-axis labels (HH:MM:SS).
+    // Falls back to interval index.
+    let times;
+    if (isHistogram) {
+      const intervalTimes = ta.interval_times || [];
+      times = (intervalTimes.length === values.length)
+        ? intervalTimes
+        : values.map((_, i) => i + 1);
+    } else {
+      times = values.map((_, i) => t0Ms + i * dtMs);
+    }
 
     // Downsample for rendering
     const MAX_POINTS = 4000;
diff --git a/sfm/server.py b/sfm/server.py
index aee5532..93ee110 100644
--- a/sfm/server.py
+++ b/sfm/server.py
@@ -2237,6 +2237,89 @@ def db_event_report_pdf(event_id: str):
     )
 
 
+def _maybe_aggregate_histogram(plot: dict, store, serial: str, filename: str, row: dict) -> dict:
+    """For histogram events, aggregate the codec's per-block samples into
+    the BW-reported number of intervals.  No-op for waveforms or when
+    we don't have the histogram metadata (interval count + size) in the
+    sidecar's bw_report block.
+
+    Why: the histogram codec emits one value per internal block (~1 per
+    second), but BW's printout shows one bar per configured interval
+    (typically 1-15 minutes).  For a 1-minute-interval event the codec
+    gives ~60 blocks per BW bar.  Aggregating max-per-group makes the
+    SFM chart + PDF visually match BW's display.
+    """
+    record_type = row.get("record_type") or ""
+    if not record_type.lower().startswith("hist"):
+        return plot
+
+    # Read interval count + size from the sidecar's bw_report.histogram block
+    try:
+        import json as _json
+        sidecar_path = store.sidecar_path_for(serial, filename)
+        if not sidecar_path.exists():
+            return plot
+        sc = _json.loads(sidecar_path.read_text())
+        hist = (sc.get("bw_report") or {}).get("histogram") or {}
+        n_intervals = hist.get("n_intervals")
+        interval_size_s = hist.get("interval_size_s")
+        start_iso = hist.get("start")
+    except Exception:
+        return plot
+    if not n_intervals or n_intervals < 1:
+        return plot
+
+    # Aggregate each channel's values into n_intervals groups, max-per-group
+    channels = plot.get("channels") or {}
+    aggregated_channels: dict = {}
+    for ch, chd in channels.items():
+        vals = chd.get("values") or []
+        if not vals:
+            aggregated_channels[ch] = chd
+            continue
+        # Distribute len(vals) samples across n_intervals groups; uneven
+        # remainders get distributed across the first few groups.
+        per_group = len(vals) // n_intervals
+        remainder = len(vals) % n_intervals
+        agg: list = []
+        offset = 0
+        for i in range(n_intervals):
+            grp_size = per_group + (1 if i < remainder else 0)
+            if grp_size > 0:
+                grp = vals[offset:offset + grp_size]
+                # Max of absolute values (peaks are magnitudes).
+                agg.append(max((abs(v) for v in grp if v is not None), default=0))
+                offset += grp_size
+            else:
+                agg.append(0)
+        aggregated_channels[ch] = {**chd, "values": agg}
+
+    # Build per-interval timestamp labels for the x-axis if we have start time
+    interval_times: list = []
+    if start_iso and interval_size_s:
+        try:
+            import datetime as _dt
+            start = _dt.datetime.fromisoformat(start_iso)
+            for i in range(int(n_intervals)):
+                # Show the END of each interval (BW convention — the
+                # peak reported is for samples taken THROUGH that time)
+                end = start + _dt.timedelta(seconds=(i + 1) * interval_size_s)
+                interval_times.append(end.strftime("%H:%M:%S"))
+        except Exception:
+            pass
+
+    # Override the time_axis to reflect intervals (not samples).
+    plot_aggr = {**plot, "channels": aggregated_channels}
+    plot_aggr["time_axis"] = {
+        **(plot.get("time_axis") or {}),
+        "histogram_aggregated": True,
+        "n_intervals":           int(n_intervals),
+        "interval_size_s":       interval_size_s,
+        "interval_times":        interval_times,
+    }
+    return plot_aggr
+
+
 @app.get("/db/events/{event_id}/waveform.json")
 def db_event_waveform_json(event_id: str) -> dict:
     """
@@ -2268,7 +2351,8 @@ def db_event_waveform_json(event_id: str) -> dict:
     h5_path = store.hdf5_path_for(serial, filename)
     if h5_path.exists():
         try:
-            return event_hdf5.plot_json_from_hdf5(h5_path, event_id=event_id)
+            plot = event_hdf5.plot_json_from_hdf5(h5_path, event_id=event_id)
+            return _maybe_aggregate_histogram(plot, store, serial, filename, row)
         except Exception as exc:
             log.warning("HDF5 read failed (%s); falling back to A5 path", exc)
 
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 188f4d5..9b0f862 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2684,10 +2684,18 @@ function _renderScWaveform(data) {
     chartsDiv.appendChild(wrap);
 
     // Waveform: per-sample time in ms relative to trigger (negative for pretrig).
-    // Histogram: interval index (1..N); time math doesn't apply to per-interval peaks.
-    const times = isHistogram
-      ? values.map((_, i) => i + 1)
-      : values.map((_, i) => t0Ms + i * dtMs);
+    // Histogram: when the server has aggregated to BW-reported intervals AND
+    // provides per-interval timestamps, use those as x-axis labels (HH:MM:SS).
+    // Falls back to interval index.
+    let times;
+    if (isHistogram) {
+      const intervalTimes = ta.interval_times || [];
+      times = (intervalTimes.length === values.length)
+        ? intervalTimes
+        : values.map((_, i) => i + 1);
+    } else {
+      times = values.map((_, i) => t0Ms + i * dtMs);
+    }
 
     // Downsample for rendering when very long.
     const MAX = 3000;
-- 
2.52.0


From 3457ed00724740a3d68c65586365aa02458c4100 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 20:32:56 +0000
Subject: [PATCH 27/42] bw_ascii_report: parse OORANGE saturation marker +
 TimeSum typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BW writes "OORANGE" (truncation of "Out Of Range") when a channel
exceeds its full-scale, and uses a typo'd label "Peak Vector Sum
TimeSum" for the PVS time field.  Both confirmed against real ASCII
files pulled from a Windows watcher PC 2026-05-27:

  T190LD5Q.LK0W  Vert PPV = OORANGE  (Normal range, 10 in/s exceeded)
  T438L713.RY0W  All three PPVs OORANGE  (Sensitive range, 1.25 in/s)
  K557L3YM.OE0W  Tran+Vert PPV OORANGE + MicL PSPL OORANGE

Previously our _parse_number() returned None for OORANGE → DB columns
ended up NULL → events vanished from filters / sorts / dashboards
despite being legitimate high-amplitude events.

New behavior — substitute a conservative bound + set a saturation flag:
  - Channel PPV       → geo_range_ips + ChannelStats.ppv_saturated
  - Peak Vector Sum   → sqrt(3) * geo_range_ips + peak_vector_sum_saturated
  - MicL PSPL         → 140 dB(L) + MicStats.pspl_saturated

Flags propagate to the sidecar's bw_report block so the SFM UI can
render "> 10 in/s" / "> 140 dBL" rather than treating the substituted
value as exact.

Same commit also accepts "Peak Vector Sum TimeSum" as an alias for
"Peak Vector Sum Time" (BW always writes the typo on OORANGE PVS
lines — every example file confirms it).

Tests: new test_oorange_marker_treated_as_saturation (synthetic) +
test_real_oorange_event_t190_parses (skips if real fixture absent).
177/177 tests pass; 16 pre-existing missing-fixture skips unchanged.

Five events on prod (T190, T438, K557, plus 2 others matching the
same fault pattern) will pick up correct peaks + saturation flags
once watchers re-forward.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                    | 10 +++++
 minimateplus/bw_ascii_report.py | 78 +++++++++++++++++++++++++++++----
 minimateplus/event_file_io.py   | 18 ++++++--
 tests/test_bw_ascii_report.py   | 58 ++++++++++++++++++++++++
 4 files changed, 151 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1e1324c..3d1a575 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,16 @@ All notable changes to seismo-relay are documented here.
 
 ## [Unreleased]
 
+### Fixed
+
+- **bw_ascii_report parser now handles `OORANGE` saturation marker.**  BW writes `"OORANGE"` (truncation of "Out Of Range") in PPV / PVS / MicL PSPL fields when the underlying measurement exceeded the channel's full-scale.  Previously our `_parse_number()` returned None → DB ended up with NULL peaks for legitimate high-amplitude events.  Confirmed on real ASCII files pulled 2026-05-27 from the Windows watcher PC: T190LD5Q.LK0W (Vert saturated at Normal range 10 in/s), T438L713.RY0W (all three channels saturated at Sensitive range 1.25 in/s), K557L3YM.OE0W (Tran+Vert saturated + Mic PSPL OORANGE).  New behavior:
+   - Per-channel PPV: substitute `geo_range_ips` as a conservative lower bound + set `ppv_saturated` flag
+   - Peak Vector Sum: substitute `sqrt(3) * geo_range_ips` (the theoretical max when all 3 channels are simultaneously at full-scale) + `peak_vector_sum_saturated` flag
+   - MicL PSPL: substitute 140 dB(L) (conservative NL-43 max) + `pspl_saturated` flag
+   - Saturation flags are propagated into the sidecar's `bw_report` block for downstream UI rendering (`> 10 in/s` or similar)
+   - Five events on prod (T190 / T438 / K557 + 2 others matching the same fault pattern) will pick up correct DB peaks + saturation flags once re-forwarded
+- **bw_ascii_report parser handles `Peak Vector Sum TimeSum` typo'd label.**  Real BW output uses this misspelled label (Sum appended twice instead of "Peak Vector Sum Time").  Now accepted as an alias.  Confirmed against all three OORANGE example files — every one has the typo.
+
 ### Added
 
 - **Histogram per-interval aggregation in `waveform.json`.**  Histogram events now render with one bar per BW-reported interval (matching the Blastware printout) instead of ~200 bars per event (the raw codec output).  When the sidecar's `bw_report.histogram.n_intervals` is populated (events ingested with the new parser, see next bullet), the `/db/events/{id}/waveform.json` endpoint groups the codec samples into N intervals via max-per-group and returns the aggregated array.  `time_axis` gains `histogram_aggregated: true`, `n_intervals`, `interval_size_s`, and `interval_times` (HH:MM:SS strings).  Both the modal chart and the standalone event browser use those interval timestamps as x-axis labels when present.  Defensive: no-op for events ingested before the parser extension landed (their sidecars lack `histogram.n_intervals`) — those continue to render with raw codec output.
diff --git a/minimateplus/bw_ascii_report.py b/minimateplus/bw_ascii_report.py
index 5ccb10a..2d85b97 100644
--- a/minimateplus/bw_ascii_report.py
+++ b/minimateplus/bw_ascii_report.py
@@ -60,6 +60,13 @@ class ChannelStats:
     time_of_peak_s:    Optional[float] = None      # seconds (relative to trigger; can be negative)
     peak_accel_g:      Optional[float] = None      # g               (geo channels only)
     peak_disp_in:      Optional[float] = None      # in              (geo channels only)
+    # When BW writes "OORANGE" (Out Of Range — truncated) for a PPV
+    # value, the true peak exceeded the channel's full-scale range.
+    # We substitute the range max (e.g. 10.000 in/s for Normal range)
+    # as a lower bound, and flag here so downstream UI / alerts know
+    # to render "> 10 in/s" or "saturated" instead of trusting the
+    # value as an exact measurement.
+    ppv_saturated:     bool = False
 
 
 @dataclass
@@ -69,6 +76,11 @@ class MicStats:
     pspl_dbl:          Optional[float] = None      # dB(L)
     zc_freq_hz:        Optional[float] = None
     time_of_peak_s:    Optional[float] = None
+    # Set when BW writes "OORANGE" for PSPL — mic exceeded its
+    # measurement range.  pspl_dbl gets the conservative upper bound
+    # 140 dBL (typical NL-43 max; some units cap at 148).  Consumers
+    # should render "> 140 dB(L)" or similar when this flag is set.
+    pspl_saturated:    bool = False
 
 
 @dataclass
@@ -92,6 +104,21 @@ class MonitorLogEntry:
     description: Optional[str] = None
 
 
+# BW saturation marker — appears in PPV / Peak Vector Sum / similar
+# numeric fields when the underlying measurement exceeded the
+# channel's full-scale range (e.g., a geophone reading > 10 in/s at
+# Normal range, or a mic exceeding its sensitivity ceiling).  Treated
+# as "≥ range_max" + a saturated flag rather than discarded.
+# Appears as: ``"Tran PPV : OORANGE in/s"``
+_OORANGE_MARKERS = ("OORANGE", "OUT OF RANGE")
+
+
+def _is_oorange(value: str) -> bool:
+    """True when a BW numeric field is an Out-Of-Range saturation marker."""
+    s = value.strip().upper()
+    return any(m in s for m in _OORANGE_MARKERS)
+
+
 @dataclass
 class BwAsciiReport:
     """Structured representation of one BW per-event ASCII export."""
@@ -144,6 +171,12 @@ class BwAsciiReport:
     # ── Vector sum ──────────────────────────────────────────────────────────
     peak_vector_sum_ips:    Optional[float] = None
     peak_vector_sum_time_s: Optional[float] = None
+    # Saturation flag — set when BW writes "OORANGE" for the PVS.  We
+    # then substitute sqrt(3) * geo_range_ips as a conservative upper
+    # bound (the theoretical maximum PVS when all 3 geo channels are
+    # simultaneously at full-scale).  Consumers should display this as
+    # ">{value} in/s" or similar.
+    peak_vector_sum_saturated: bool = False
     # Histograms additionally have an absolute date+time for the PVS
     # (it occurred at a specific interval).  Waveform reports show
     # only the relative-time value above.
@@ -486,12 +519,20 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
         ):
             ch_name, stat = key.split(" ", 1)
             cs = report.channels.setdefault(ch_name, ChannelStats())
-            num = _parse_number(value)
-            if   stat == "PPV":                 cs.ppv_ips        = num
-            elif stat == "ZC Freq":             cs.zc_freq_hz     = num
-            elif stat == "Time of Peak":        cs.time_of_peak_s = num
-            elif stat == "Peak Acceleration":   cs.peak_accel_g   = num
-            elif stat == "Peak Displacement":   cs.peak_disp_in   = num
+            if stat == "PPV":
+                if _is_oorange(value):
+                    # Channel saturated — substitute range max as lower
+                    # bound; flag so downstream UI can render "> 10 in/s".
+                    cs.ppv_ips       = report.geo_range_ips
+                    cs.ppv_saturated = True
+                else:
+                    cs.ppv_ips = _parse_number(value)
+            else:
+                num = _parse_number(value)
+                if   stat == "ZC Freq":             cs.zc_freq_hz     = num
+                elif stat == "Time of Peak":        cs.time_of_peak_s = num
+                elif stat == "Peak Acceleration":   cs.peak_accel_g   = num
+                elif stat == "Peak Displacement":   cs.peak_disp_in   = num
 
         # ── Histogram-specific fields ────────────────────────────────────────
         # Histograms have Start/Stop time+date pairs + an interval count
@@ -532,8 +573,22 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
 
         # ── Vector Sum ───────────────────────────────────────────────────────
         elif key == "Peak Vector Sum":
-            report.peak_vector_sum_ips = _parse_number(value)
-        elif key == "Peak Vector Sum Time":
+            if _is_oorange(value):
+                # PVS saturated — conservative upper bound is
+                # sqrt(3) * geo_range_ips (all 3 channels at full-scale).
+                # Real PVS could be lower (channels rarely peak
+                # simultaneously) but never higher within the range.
+                if report.geo_range_ips is not None:
+                    import math as _math
+                    report.peak_vector_sum_ips = _math.sqrt(3) * report.geo_range_ips
+                report.peak_vector_sum_saturated = True
+            else:
+                report.peak_vector_sum_ips = _parse_number(value)
+        # BW writes the PVS-time label with a typo: "Peak Vector Sum TimeSum"
+        # (looks like Sum got appended twice).  Accept both forms.  Confirmed
+        # against actual BW output on 2026-05-27 — every PVS-time line in
+        # the field examples (T190, T438, K557) uses the typo'd label.
+        elif key in ("Peak Vector Sum Time", "Peak Vector Sum TimeSum"):
             report.peak_vector_sum_time_s = _parse_number(value)
             _pvs_time_raw = value
         elif key == "Peak Vector Sum Date":
@@ -558,7 +613,12 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
         elif key == "Microphone":
             report.mic.weighting = value
         elif key == "MicL PSPL":
-            report.mic.pspl_dbl = _parse_number(value)
+            if _is_oorange(value):
+                # Mic saturated — substitute conservative upper bound 140 dBL.
+                report.mic.pspl_dbl       = 140.0
+                report.mic.pspl_saturated = True
+            else:
+                report.mic.pspl_dbl = _parse_number(value)
             # Mirror onto the "MicL" entry in channels so callers querying
             # `channels["MicL"].ppv_ips` see something — but it's dB(L), not
             # in/s, so we store as-is in the MicStats and mark the channel.
diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index b455bc0..36bf56d 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -120,7 +120,12 @@ def _bw_report_to_dict(report: BwAsciiReport) -> dict:
             "peak_disp_in":    cs.peak_disp_in,
         }
         # Drop all-None entries — keeps the JSON tidy for partial reports.
-        return {k: v for k, v in out.items() if v is not None}
+        out = {k: v for k, v in out.items() if v is not None}
+        # Saturation flag (only present when True) — signals that ppv_ips
+        # is the channel range max (a lower bound), not an exact reading.
+        if getattr(cs, "ppv_saturated", False):
+            out["ppv_saturated"] = True
+        return out
 
     def _sc(ch_name: str) -> dict:
         sc = report.sensor_check.get(ch_name)
@@ -169,17 +174,22 @@ def _bw_report_to_dict(report: BwAsciiReport) -> dict:
             "vert":         _ch("Vert"),
             "long":         _ch("Long"),
             "vector_sum": {
-                "ips":     report.peak_vector_sum_ips,
-                "time_s":  report.peak_vector_sum_time_s,
+                "ips":       report.peak_vector_sum_ips,
+                "time_s":    report.peak_vector_sum_time_s,
                 # Histogram events have an absolute date+time for the PVS
                 # (the interval at which it occurred); waveform events
                 # only have the time_s offset.
-                "when":    report.peak_vector_sum_when.isoformat() if report.peak_vector_sum_when else None,
+                "when":      report.peak_vector_sum_when.isoformat() if report.peak_vector_sum_when else None,
+                # Set when BW reported the PVS as OORANGE — value is the
+                # conservative upper bound sqrt(3) * geo_range_ips, not
+                # an exact peak.
+                "saturated": bool(getattr(report, "peak_vector_sum_saturated", False)),
             },
         },
         "mic": {
             "weighting":        report.mic.weighting,
             "pspl_dbl":         report.mic.pspl_dbl,
+            "pspl_saturated":   bool(getattr(report.mic, "pspl_saturated", False)),
             "zc_freq_hz":       report.mic.zc_freq_hz,
             "time_of_peak_s":   report.mic.time_of_peak_s,
         },
diff --git a/tests/test_bw_ascii_report.py b/tests/test_bw_ascii_report.py
index 024a9a4..ddfae70 100644
--- a/tests/test_bw_ascii_report.py
+++ b/tests/test_bw_ascii_report.py
@@ -385,6 +385,64 @@ def test_user_notes_extra_lines_beyond_four_are_dropped():
     assert "L5" not in r.user_note_labels.values()
 
 
+def test_oorange_marker_treated_as_saturation():
+    """BW writes 'OORANGE' (Out Of Range — truncated) when a channel
+    exceeds its full-scale.  Verify ppv_ips falls back to geo_range_ips
+    + saturated flag is set, mirroring the real T190LD5Q.LK0W,
+    T438L713.RY0W, and K557L3YM.OE0W events from prod 2026-05-27.
+    """
+    txt = """\
+"Event Type : Full Waveform"
+"Serial Number : BE18190"
+"Geo Range : 10.000 in/s"
+"Tran PPV : 2.140 in/s"
+"Vert PPV : OORANGE in/s"
+"Long PPV : 2.830 in/s"
+"Peak Vector Sum : OORANGE in/s"
+"Peak Vector Sum TimeSum : 0.007 s"
+"MicL PSPL : OORANGE "
+"""
+    r = parse_report(txt)
+    # Tran/Long parse normally
+    assert r.channels["Tran"].ppv_ips == 2.14
+    assert r.channels["Tran"].ppv_saturated is False
+    assert r.channels["Long"].ppv_ips == 2.83
+    # Vert saturated → range max + flag
+    assert r.channels["Vert"].ppv_ips == 10.0
+    assert r.channels["Vert"].ppv_saturated is True
+    # PVS saturated → sqrt(3) * range_max as upper bound + flag
+    import math
+    assert r.peak_vector_sum_ips == pytest.approx(math.sqrt(3) * 10.0)
+    assert r.peak_vector_sum_saturated is True
+    # Mic saturated → 140 dBL conservative upper bound + flag
+    assert r.mic.pspl_dbl == 140.0
+    assert r.mic.pspl_saturated is True
+    # PVS time still parses despite the BW typo'd label "TimeSum"
+    assert r.peak_vector_sum_time_s == pytest.approx(0.007)
+
+
+def test_real_oorange_event_t190_parses():
+    """End-to-end against the real T190LD5Q.LK0W ASCII file pulled from
+    a Windows watcher PC on 2026-05-27.  This is the canonical example
+    of the parser-PPV-miss bug we fixed in this iteration."""
+    fixture_path = (
+        Path(__file__).parent.parent / "example-events" /
+        "ascii-5-27-26" / "T190LD5Q_LK0W_ASCII.TXT"
+    )
+    if not fixture_path.exists():
+        pytest.skip("real ASCII fixture not present (local-only)")
+    r = parse_report_file(fixture_path)
+    assert r.serial == "BE18190"
+    assert r.geo_range_ips == 10.0
+    # Tran reads cleanly, Vert was OORANGE
+    assert r.channels["Tran"].ppv_ips == pytest.approx(2.14)
+    assert r.channels["Vert"].ppv_ips == 10.0
+    assert r.channels["Vert"].ppv_saturated is True
+    assert r.channels["Long"].ppv_ips == pytest.approx(2.83)
+    assert r.peak_vector_sum_saturated is True
+    assert r.peak_vector_sum_time_s == pytest.approx(0.007)
+
+
 def test_real_histogram_fixture_populates_sensor_location():
     """End-to-end: the histogram fixture uses 'Seis. Location:' — must
     successfully populate sensor_location via position-based parsing."""
-- 
2.52.0


From 8cbda09917a622743c76b20ba9a91ce55beb83d7 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 22:30:43 +0000
Subject: [PATCH 28/42] viewers: render timestamps in browser-local time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Spotted on the SFM webapp event modal — "Received by server at" was
showing the raw ISO string "2026-05-27T21:59:57.213043Z" because we
were assigning ev.timestamp / src.captured_at directly to the
textContent of the modal fields, bypassing the existing _fmtTs()
helper that wraps them in toLocaleString().

Net effect for operators: confusing "21:59 vs it's 6 PM" mismatch
when the displayed UTC timestamp didn't match wall-clock time.  The
values were always correct; the display was just ambiguous.

After this fix:
  - "Recorded at" (naive ISO from BW = unit local time) renders
    cleanly as the unit wrote it: "5/27/2026, 6:00:13 AM"
  - "Received by server at" (UTC with Z suffix) converts to browser
    local: "5/27/2026, 5:59:57 PM"
  - Timestamp column in the history table already used _fmtTs —
    unchanged
  - Same fix applied to the standalone /events page (sidebar event
    list + meta header) via a new _fmtTsLocal helper

Note: did NOT add file-mtime-on-watcher-PC tracking as a separate
"Called in at" column — discussed and decided created_at is close
enough for schedule-compliance monitoring (worst case lag = watcher
poll interval ~60s, indistinguishable from BW write time at the
operationally-relevant resolution).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 14 ++++++++++++--
 sfm/sfm_webapp.html    |  9 +++++++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 9f5fd31..30542af 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -356,6 +356,16 @@ function _psiToDbl(psi) {
   return 20 * Math.log10(psi / DBL_REF);
 }
 
+// Format an ISO timestamp in the browser's local timezone — UTC values
+// (with 'Z' suffix) convert; naive values are interpreted as local clock.
+// Returns '—' for null/empty/unparseable.
+function _fmtTsLocal(iso) {
+  if (!iso) return '—';
+  const d = new Date(iso);
+  if (isNaN(d)) return iso;
+  return d.toLocaleString();
+}
+
 // Adaptive decimal formatter — scientific notation only for truly extreme
 // values.  Normal-range peaks render as plain decimals with sensible
 // precision (was previously forcing toExponential(3) which produced ugly
@@ -458,7 +468,7 @@ function renderEventList() {
     const row = document.createElement('div');
     row.className = 'event-row' + (ev.false_trigger ? ' false_trigger' : '');
     if (ev.id === currentEventId) row.className += ' active';
-    const ts = (ev.timestamp || '').replace('T', ' ').replace('Z', '');
+    const ts = _fmtTsLocal(ev.timestamp);
     const pvs = ev.peak_vector_sum != null ? `${ev.peak_vector_sum.toFixed(3)} in/s` : '—';
     row.innerHTML = `
       <div class="er-top">
@@ -510,7 +520,7 @@ function renderMeta(data, ev) {
   const metaDiv = document.getElementById('event-meta');
   const fields = [
     ['Serial',      data.serial || ev?.serial || '—'],
-    ['Timestamp',   (data.timestamp || ev?.timestamp || '—').replace('T', ' ').replace('Z', '')],
+    ['Timestamp',   _fmtTsLocal(data.timestamp || ev?.timestamp)],
     ['Record',      data.record_type || ev?.record_type || '—'],
     ['Sample rate', data.sample_rate ? `${data.sample_rate} sps` : '—'],
     ['Geo range',   data.geo_range ? `${data.geo_range} (${data.geo_full_scale_ips} in/s FS)` : '—'],
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 9b0f862..856c8f5 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2864,7 +2864,9 @@ function _renderSidecar(data) {
   };
 
   document.getElementById('sc-f-serial').textContent   = ev.serial          || '—';
-  document.getElementById('sc-f-ts').textContent       = ev.timestamp       || '—';
+  // Route through _fmtTs so the unit-local naive timestamp shows as
+  // "5/27/2026, 6:00:13 AM" instead of "2026-05-27T06:00:13".
+  document.getElementById('sc-f-ts').textContent       = _fmtTs(ev.timestamp);
   document.getElementById('sc-f-rt').textContent       = ev.record_type     || '—';
   document.getElementById('sc-f-sr').textContent       = (ev.sample_rate ?? '—') + (ev.sample_rate ? ' sps' : '');
   document.getElementById('sc-f-key').textContent      = ev.waveform_key    || '—';
@@ -2884,7 +2886,10 @@ function _renderSidecar(data) {
   document.getElementById('sc-f-bwsize').textContent   = bw.filesize != null ? `${bw.filesize} bytes` : '—';
   document.getElementById('sc-f-sha').textContent      = bw.sha256          || '—';
   document.getElementById('sc-f-src').textContent      = src.kind           || '—';
-  document.getElementById('sc-f-cap').textContent      = src.captured_at    || '—';
+  // captured_at has a "Z" suffix (UTC); _fmtTs converts to browser local
+  // — matches the BW-reported recorded-at, no more "21:59:57 vs it's 6 PM"
+  // confusion from operators reading the raw UTC value.
+  document.getElementById('sc-f-cap').textContent      = _fmtTs(src.captured_at);
 
   document.getElementById('sc-edit-ft').checked        = !!rev.false_trigger;
   document.getElementById('sc-edit-reviewer').value    = rev.reviewer || '';
-- 
2.52.0


From ace542cba58ecb065ace57100ba4a45d453cc7d9 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 22:47:53 +0000
Subject: [PATCH 29/42] report_pdf: wire histogram peak date/time + PVS-when +
 Finish field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Spotted comparing our PDF to BW's reference for T003LLUB.CE0H:
  - Finish blank
  - Per-channel Date / Time rows all dashes
  - MicL PSPL line missing "on May 27, 2026 at 06:19:14"
  - Peak Vector Sum missing "on May 27, 2026 At 06:06:14"

Root cause: I'd added these fields to the projection (write side) in
_bw_report_to_dict but never wired them into gather_report_data
(read side).  Plus the projection used keys "start"/"stop" while
gather was reading "start_str"/"stop_str" — typo'd lookup.

Fixes:
  - gather_report_data now reads bw_report.histogram.start /
    .stop / .channel_peak_when (correct keys, matching the projection)
  - Per-channel "peak_date" / "peak_time" populated from
    channel_peak_when[<channel>] for the histogram stats table
  - MicL PSPL line formats as "PSPL  125.7 dB(L) on May 27, 2026
    at 06:19:14" (BW style) when channel_peak_when["MicL"] is present;
    falls back to the waveform-relative "at 0.012 sec" otherwise
  - PVS line formats as "Peak Vector Sum  0.091 in/s on May 27, 2026
    At 06:06:14" (BW style) when bw_report.peaks.vector_sum.when is
    populated; falls back to the relative time_s for waveforms
  - New _split_iso_to_date_time() helper splits ISO timestamps into
    BW-formatted ("May 27 /26", "06:06:14") date+time pairs for the
    stats table's separate Date and Time rows

Events ingested BEFORE the parser extension landed (most of the
existing prod corpus) still show dashes — their sidecars lack the
histogram block.  Re-forwarding repopulates.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/report_pdf.py | 87 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 17 deletions(-)

diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index 9a256a0..1df8699 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -97,6 +97,7 @@ class ReportData:
     mic_pspl_dbl:           Optional[float] = None
     mic_pspl_psi:           Optional[float] = None
     mic_pspl_time_s:        Optional[float] = None
+    mic_pspl_when_str:      Optional[str] = None    # histogram absolute date+time, BW-formatted
     mic_zc_freq_hz:         Optional[float] = None
     mic_channel_test_result: Optional[str] = None
     mic_channel_test_freq_hz: Optional[float] = None
@@ -220,12 +221,19 @@ def gather_report_data(
         rd.mic_channel_test_freq_hz  = sc_mic.get("freq_hz")
         rd.mic_channel_test_amp_mv   = sc_mic.get("amplitude_mv")
 
-        # Per-channel stats (Tran / Vert / Long)
+        # Per-channel stats (Tran / Vert / Long).  Per-channel peak
+        # date+time for histograms comes from bw_report.histogram.channel_peak_when
+        # (populated when the parser captured it; see the bw_ascii_report
+        # parser's histogram-fields handler).
         peaks = bw.get("peaks") or {}
         sc_block = bw.get("sensor_check") or {}
+        hist_block = bw.get("histogram") or {}
+        peak_when = hist_block.get("channel_peak_when") or {}
         for ch_lc, ch_label in (("tran", "Tran"), ("vert", "Vert"), ("long", "Long")):
             ch = peaks.get(ch_lc) or {}
             sc_ch = sc_block.get(ch_lc) or {}
+            ch_when_iso = peak_when.get(ch_label)
+            peak_date, peak_time = _split_iso_to_date_time(ch_when_iso)
             rd.channel_stats.append({
                 "name":          ch_label,
                 "ppv_ips":       ch.get("ppv_ips"),
@@ -234,25 +242,30 @@ def gather_report_data(
                 "peak_accel_g":  ch.get("peak_accel_g"),
                 "peak_disp_in":  ch.get("peak_disp_in"),
                 "sensor_check":  sc_ch.get("result"),
+                "peak_date":     peak_date,
+                "peak_time":     peak_time,
             })
 
+        # MicL peak time (used in the mic block — "PSPL ... on DATE at TIME")
+        mic_when_iso = peak_when.get("MicL")
+        rd.mic_pspl_when_str = _fmt_iso_to_bw(mic_when_iso) if mic_when_iso else None
+
         # Peak Vector Sum
         vs = peaks.get("vector_sum") or {}
         rd.peak_vector_sum_ips    = vs.get("ips")
         rd.peak_vector_sum_time_s = vs.get("time_s")
+        # PVS absolute date+time (histograms).  Same formatting as Mic.
+        pvs_when_iso = vs.get("when")
+        rd.peak_vector_sum_when_str = _fmt_iso_to_bw(pvs_when_iso) if pvs_when_iso else None
 
-        # Histogram-specific header fields.  These come from the BW XML
-        # at ingest time (when present); the parsed bw_report dict
-        # carries them under the 'histogram' sub-block (added by the
-        # BW XML parser once that lands).  For now, derive from the
-        # event timestamp + recording config as a best-effort.
+        # Histogram-specific header fields — keys match the projection in
+        # _bw_report_to_dict ("start" / "stop", not "_str" suffixed).
         if rd.is_histogram:
-            hist = bw.get("histogram") or {}
-            rd.histogram_start_str = hist.get("start_str") or rd.event_datetime_str
-            rd.histogram_stop_str  = hist.get("stop_str")
-            rd.histogram_n_intervals = hist.get("n_intervals")
-            rd.histogram_interval_size = hist.get("interval_size")
-            rd.histogram_interval_times = hist.get("interval_times") or []
+            rd.histogram_start_str   = hist_block.get("start") or rd.event_datetime_str
+            rd.histogram_stop_str    = hist_block.get("stop")
+            rd.histogram_n_intervals = hist_block.get("n_intervals")
+            rd.histogram_interval_size = hist_block.get("interval_size")
+            rd.histogram_interval_times = hist_block.get("interval_times") or []
 
     # ── Waveform samples — from the .h5 via the existing helper ──
     from sfm import event_hdf5
@@ -376,6 +389,24 @@ def _fmt_iso_to_bw(iso: Optional[str]) -> Optional[str]:
         return iso
 
 
+def _split_iso_to_date_time(iso: Optional[str]) -> tuple[Optional[str], Optional[str]]:
+    """Split an ISO timestamp into BW-formatted ("May 27 /26", "06:06:14")
+    date+time strings.  Used for the histogram stats table where the
+    Date and Time rows are presented separately.  Returns (None, None)
+    if the input isn't a valid ISO datetime."""
+    if not iso:
+        return (None, None)
+    try:
+        import datetime as _dt
+        dt = _dt.datetime.fromisoformat(iso.replace("Z", "+00:00"))
+        # BW format: "May 27 /26" (3-letter month + 2-digit year)
+        date_str = dt.strftime("%b %d /%y").replace(" 0", " ")
+        time_str = dt.strftime("%H:%M:%S")
+        return (date_str, time_str)
+    except Exception:
+        return (None, None)
+
+
 def _kv(ax, x, y, label, value, *, label_w=0.18):
     """Render a 'Label  Value' row at axes-coordinates (x, y)."""
     ax.text(x, y, label, fontsize=8, color="#555", ha="left", va="top",
@@ -489,11 +520,28 @@ def _draw_mic_and_usbm(ax, rd: ReportData) -> None:
 
 
 def _mic_rows(rd: ReportData) -> list[tuple[str, Optional[str]]]:
-    """Build the mic-section value rows (shared by both layouts)."""
+    """Build the mic-section value rows (shared by both layouts).
+
+    For histograms, BW formats the PSPL line as
+        "125.7 dB(L) on May 27, 2026 at 06:19:14"
+    (absolute date+time of peak).  Waveform events show the relative
+    "at 0.012 sec." instead.  Both formats covered here based on which
+    field is populated.
+    """
     rows: list[tuple[str, Optional[str]]] = []
     if rd.mic_pspl_dbl is not None:
         line = f"{rd.mic_pspl_dbl:.1f} dB(L)"
-        if rd.mic_pspl_time_s is not None:
+        if rd.mic_pspl_when_str:
+            # Histogram-style: "PSPL  125.7 dB(L) on May 27, 2026 at 06:19:14"
+            # mic_pspl_when_str is already "HH:MM:SS Month DD, YYYY";
+            # reformat to "on Month DD, YYYY at HH:MM:SS" for BW match.
+            parts = rd.mic_pspl_when_str.split(" ", 1)
+            if len(parts) == 2:
+                line += f" on {parts[1]} at {parts[0]}"
+            else:
+                line += f" on {rd.mic_pspl_when_str}"
+        elif rd.mic_pspl_time_s is not None:
+            # Waveform-style: relative-to-trigger seconds.
             line += f" at {rd.mic_pspl_time_s:.3f} sec."
         rows.append(("PSPL", line))
     if rd.mic_zc_freq_hz is not None:
@@ -545,10 +593,15 @@ def _draw_channel_stats_histogram(ax, rd: ReportData) -> None:
     ]
     _draw_stats_table(ax, rd, rows_spec)
     if rd.peak_vector_sum_ips is not None:
-        when = rd.peak_vector_sum_when_str or ""
         line = f"Peak Vector Sum   {rd.peak_vector_sum_ips:.3f} in/s"
-        if when:
-            line += f" on {when}"
+        # Histograms: "0.091 in/s on May 27, 2026 At 06:06:14"
+        # The when_str is "HH:MM:SS Month DD, YYYY" — reformat for BW match.
+        if rd.peak_vector_sum_when_str:
+            parts = rd.peak_vector_sum_when_str.split(" ", 1)
+            if len(parts) == 2:
+                line += f" on {parts[1]} At {parts[0]}"
+            else:
+                line += f" on {rd.peak_vector_sum_when_str}"
         ax.text(0.0, -0.08, line, fontsize=9, weight="bold",
                 ha="left", va="top", transform=ax.transAxes)
         ax.text(0.0, -0.18, "NA: Not Applicable", fontsize=7, color="#888",
-- 
2.52.0


From 87aec3f4d1755fe57527b36a2fe75a5d6a6fa031 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Wed, 27 May 2026 23:08:21 +0000
Subject: [PATCH 30/42] viewers: smoother mic dBL chart + restore binary/TXT
 download links
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues spotted in the modal:

1. Mic dBL chart looked spikey/discontinuous — isolated bars at 80-95
   with gaps in between.  Cause: _psiToDbl() returns null for zero or
   negative samples, and most mic samples on a quiet event sit at the
   digitization noise floor where they're effectively zero.  Result:
   the chart only renders the moments when instantaneous SPL exceeded
   the Y-axis bottom — looks like a sound trigger gate.

   Fix: new _psiToDblForChart() rectifies the AC waveform (abs), then
   converts to dBL, then floors at MIC_DBL_FLOOR=60 dBL.  Chart now
   has a continuous 60 dBL baseline with peaks above it — matches how
   acoustic engineers expect SPL-vs-time.  Y-axis bottom pinned to
   MIC_DBL_FLOOR, top to peak + 5 dB headroom.  Peak label still uses
   the unrectified _psiToDbl so the displayed peak value is exact.

2. Filename in Source/Files block was unlinked.  Endpoint exists
   (/db/events/{id}/blastware_file) — just wasn't wired to the modal.
   Made it a clickable download link.  Same treatment for the
   preserved .TXT — added "(download .TXT)" link next to source kind
   when source.txt_filename is populated (events ingested after the
   .TXT preservation feature landed; older events show no link).

Applied to both the inline modal in sfm_webapp.html and the
standalone /events page in event_browser.html.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 24 ++++++++++++++-
 sfm/sfm_webapp.html    | 70 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 88 insertions(+), 6 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 30542af..9be5431 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -356,6 +356,18 @@ function _psiToDbl(psi) {
   return 20 * Math.log10(psi / DBL_REF);
 }
 
+// Per-sample mic chart conversion — rectify the AC waveform, dBL,
+// floor below the noise-floor minimum.  Gives a continuous baseline
+// instead of the spikey/discontinuous look you get from raw _psiToDbl.
+const MIC_DBL_FLOOR = 60;
+function _psiToDblForChart(psi) {
+  if (psi == null) return MIC_DBL_FLOOR;
+  const a = Math.abs(psi);
+  if (a === 0) return MIC_DBL_FLOOR;
+  const dbl = 20 * Math.log10(a / DBL_REF);
+  return dbl > MIC_DBL_FLOOR ? dbl : MIC_DBL_FLOOR;
+}
+
 // Format an ISO timestamp in the browser's local timezone — UTC values
 // (with 'Z' suffix) convert; naive values are interpreted as local clock.
 // Returns '—' for null/empty/unparseable.
@@ -638,7 +650,10 @@ function renderWaveform(data) {
     let peak  = chData.peak;
     const peakT = chData.peak_t_ms;
     if (ch === 'MicL' && unit === 'psi' && micUnit === 'dBL') {
-      values = values.map(_psiToDbl);
+      // Per-sample chart uses rectified-and-floored conversion so the
+      // baseline is continuous; the peak label uses the unrectified
+      // converter to preserve the true measurement.
+      values = values.map(_psiToDblForChart);
       peak   = _psiToDbl(peak);
       unit   = 'dB(L)';
     }
@@ -711,6 +726,13 @@ function renderWaveform(data) {
       }
       const padded = (absMax || 1) * 1.10;
       yBounds = { min: -padded, max: padded };
+    } else if (ch === 'MicL' && micUnit === 'dBL') {
+      // Baseline at noise-floor minimum (matches what we floored
+      // null/quiet samples to), top at peak + 5 dB headroom.
+      const peakDbl = (typeof peak === 'number' && isFinite(peak))
+        ? peak + 5
+        : 100;
+      yBounds = { min: MIC_DBL_FLOOR, max: Math.max(peakDbl, MIC_DBL_FLOOR + 20) };
     }
 
     const chart = new Chart(canvas, {
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 856c8f5..19a3e05 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2589,6 +2589,23 @@ function _psiToDbl(psi) {
   return 20 * Math.log10(psi / DBL_REF);
 }
 
+// Per-sample mic display floor.  Sound pressure AC samples spend most
+// of their time at the digitization noise floor (1-2 ADC counts ≈ ~20-40
+// dBL).  Rendering each one as null/-inf produces a spikey discontinuous
+// chart of "moments when sound briefly exceeded 80 dBL" — confusing.
+// Instead we rectify (abs the AC waveform), convert to dBL, and floor
+// anything below MIC_DBL_FLOOR so the chart has a continuous baseline
+// with peaks rising above it.  Matches how acoustic engineers expect to
+// see SPL-vs-time.
+const MIC_DBL_FLOOR = 60;
+function _psiToDblForChart(psi) {
+  if (psi == null) return MIC_DBL_FLOOR;
+  const a = Math.abs(psi);
+  if (a === 0) return MIC_DBL_FLOOR;
+  const dbl = 20 * Math.log10(a / DBL_REF);
+  return dbl > MIC_DBL_FLOOR ? dbl : MIC_DBL_FLOOR;
+}
+
 // Adaptive decimal formatter — scientific notation is reserved for truly
 // extreme values (10000+ or sub-0.0001).  Normal-range values (most peaks
 // fall here) render as decimals with sensible precision.  Replaces the
@@ -2649,10 +2666,14 @@ function _renderScWaveform(data) {
     let chPeak = chData.peak;
 
     // Mic channel: convert from raw psi to dB(L) when user prefers dBL
-    // (default).  Mic samples that are zero/negative become null (Chart.js
-    // renders them as gaps in line mode, zero-height bars in histogram mode).
+    // (default).  Per-sample values use _psiToDblForChart which rectifies
+    // (abs) the AC waveform and floors at MIC_DBL_FLOOR so the chart is
+    // continuous with a baseline + peaks above it, instead of a sparse
+    // pattern of isolated spikes for "moments when sound briefly exceeded
+    // the Y-axis bottom".  The peak label uses _psiToDbl with the
+    // unrectified peak (preserves the true measurement).
     if (ch === 'MicL' && chUnit === 'psi' && micUnit === 'dBL') {
-      values = values.map(_psiToDbl);
+      values = values.map(_psiToDblForChart);
       chPeak = _psiToDbl(chPeak);
       chUnit = 'dB(L)';
     }
@@ -2736,6 +2757,13 @@ function _renderScWaveform(data) {
       }
       const padded = (absMax || 1) * 1.10;
       yBounds = { min: -padded, max: padded };
+    } else if (ch === 'MicL' && micUnit === 'dBL') {
+      // Pin baseline at the chart floor (which matches what we flooded
+      // null/quiet samples to), top at the actual peak + a few dB headroom.
+      const peakDbl = (typeof chPeak === 'number' && isFinite(chPeak))
+        ? chPeak + 5
+        : 100;
+      yBounds = { min: MIC_DBL_FLOOR, max: Math.max(peakDbl, MIC_DBL_FLOOR + 20) };
     }
 
     _scCharts[ch] = new Chart(canvas, {
@@ -2882,10 +2910,42 @@ function _renderSidecar(data) {
   document.getElementById('sc-f-operator').textContent = pi.operator        || '—';
   document.getElementById('sc-f-loc').textContent      = pi.sensor_location || '—';
 
-  document.getElementById('sc-f-bw').textContent       = bw.filename        || '—';
+  // Filename rendered as a clickable download link for the original BW
+  // binary.  Same endpoint the live-device viewer uses for stored events
+  // (/db/events/{id}/blastware_file).
+  const bwCell = document.getElementById('sc-f-bw');
+  bwCell.innerHTML = '';
+  if (bw.filename && _scCurrentEventId) {
+    const a = document.createElement('a');
+    a.href = `${api()}/db/events/${_scCurrentEventId}/blastware_file`;
+    a.textContent = bw.filename;
+    a.download = bw.filename;
+    a.title = 'Download original BW event binary';
+    a.style.color = 'var(--accent, #58a6ff)';
+    a.style.textDecoration = 'underline';
+    bwCell.appendChild(a);
+  } else {
+    bwCell.textContent = '—';
+  }
   document.getElementById('sc-f-bwsize').textContent   = bw.filesize != null ? `${bw.filesize} bytes` : '—';
   document.getElementById('sc-f-sha').textContent      = bw.sha256          || '—';
-  document.getElementById('sc-f-src').textContent      = src.kind           || '—';
+  // Source kind + a download link for the preserved BW ASCII report
+  // (.TXT), when available.  Only events ingested after 2026-05-27
+  // have the .TXT preserved; older events show "—".
+  const srcCell = document.getElementById('sc-f-src');
+  srcCell.innerHTML = '';
+  srcCell.appendChild(document.createTextNode(src.kind || '—'));
+  if (src.txt_filename && _scCurrentEventId) {
+    const a = document.createElement('a');
+    a.href = `${api()}/db/events/${_scCurrentEventId}/ascii_report.txt`;
+    a.textContent = ' (download .TXT)';
+    a.download = src.txt_filename;
+    a.title = 'Download preserved BW ASCII report';
+    a.style.color = 'var(--accent, #58a6ff)';
+    a.style.marginLeft = '8px';
+    a.style.fontSize = '11px';
+    srcCell.appendChild(a);
+  }
   // captured_at has a "Z" suffix (UTC); _fmtTs converts to browser local
   // — matches the BW-reported recorded-at, no more "21:59:57 vs it's 6 PM"
   // confusion from operators reading the raw UTC value.
-- 
2.52.0


From b59f886cb7d14be706612ff803431e8cab62ce7c Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 04:17:50 +0000
Subject: [PATCH 31/42] docs: roadmap entry for sensor-check waveform
 extraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BW's Event Report PDFs include a per-channel sensor-check response
waveform on the right side of the bottom plot (damped sinusoid for
geo channels, sawtooth-at-test-freq for mic).  Looks like real
per-sample data extracted from the binary, not synthesized.

Our parser captures the test results (freq, ratio, amplitude,
pass/fail) but not the waveform samples — so the report shows text
only for sensor check.  Pinning a roadmap entry to investigate the
binary for the sample data (path a) or fall back to synthesized
visualization (path b).

Current text-only display is operationally sufficient.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d62c1e5..bff24db 100644
--- a/README.md
+++ b/README.md
@@ -567,3 +567,4 @@ Implementation steps (concrete):
 - [ ] RV55 DCD/DTR — newer RV55 firmware doesn't assert DCD by default; units don't resume monitoring after call-home disconnect (`--restart-monitoring` flag deferred).
 - [ ] **NULL-timestamp duplicate-row dedup.**  A small handful of events (2 known on prod as of 2026-05-22) have `events.timestamp IS NULL` because the codec couldn't extract a timestamp from the binary footer.  The `UNIQUE(serial, timestamp)` constraint doesn't fire on `NULL` (SQL semantics: `NULL ≠ NULL`), so every `--force` backfill INSERTs a new row instead of UPSERTing the existing one.  Cleanup: a one-shot SQL query that keeps only the newest row per `(serial, blastware_filename)` and deletes the rest.  Longer-term: extend the unique key to `(serial, COALESCE(timestamp, blastware_filename))` or reject inserts with NULL timestamp.
 - [ ] **Histogram body sub-format with `byte[5] != 0`.**  ~3 events on prod (`T190LD5Q.LD0H`, `O121L4L1.GU0H`) use a histogram body my walker doesn't recognize — the first block has `byte[5] = 0x01` or `0x07` instead of `0x00`, and the entire body lacks the `1e 0a 00 00` tail signature.  Codec returns 0 valid blocks; their DB PVS comes from the bw_report ASCII overlay (which BW computed from the same binary, so the DB columns are correct).  Only the `.h5` waveform plot is empty.  Cracking the sub-format would unlock the plot.  Needs binary+ASCII pairs from a few `byte[5]!=0` events; same RE approach as the K558 case.
+- [ ] **Sensor-check waveform extraction from the BW binary.**  BW's Event Report PDFs include a narrow panel on the right side of the waveform plot showing each channel's response to the sensor self-check signal (a damped sinusoid for geo, sawtooth-at-test-freq for mic).  Our parser captures the test RESULTS (`test_freq_hz`, `test_ratio`, `test_amplitude_mv`, `test_results` pass/fail) and the PDF + modal display them as text — but BW's per-sample sensor-check waveform isn't accessible to us today.  Two paths to add it:  (a) RE the binary to find where the sensor-check samples are stored — could be a section before STRT, after the footer, or in a separate sub-record; protocol reference doesn't currently mention it.  (b) If samples aren't in the binary, synthesize a representative waveform from the test parameters (damped sinusoid at `test_freq_hz` with damping from `test_ratio`).  Path (a) is the honest answer; path (b) is decorative.  Until either lands, the text-only sensor-check display in the report is fine.
-- 
2.52.0


From b9f8bbb220cebe8bd9d4dd76a4dd59fa32dd3077 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 04:23:01 +0000
Subject: [PATCH 32/42] viewers: enforce minimum Y-range on histogram channels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quiet histogram events were filling the chart panel even though the
peak was tiny (0.005 in/s rendered as 90% of chart height because
Chart.js auto-scaled to peak * 1.1).  Made everything look uniformly
loud regardless of actual amplitude.

BW's solution: a near-fixed scale per channel ("Geo: 0.002 in/s/div"
from the footer).  Quiet events render small, loud events render
proportionally tall.

Match the intent without copying BW's "no Y-axis labels at all"
convention.  For histogram channels:

  Geo (in/s):       min Y range 0.05 in/s
  Mic in psi:       min Y range 0.001 psi
  Mic in dBL:       unchanged (the 60 dBL floor + peak+5 top already
                    gives quiet events a sensible baseline)

So a 0.005 in/s geo event renders as ~10% of chart height; a 0.05
event fills it; a 5.0 event still fills it (max(peak*1.1, 0.05) ==
peak*1.1 for any peak > 0.045).

Waveform charts unchanged — they should zoom for shape detail.
Applied to both the modal in sfm_webapp.html and the standalone
/events page in event_browser.html.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 21 +++++++++++++++++----
 sfm/sfm_webapp.html    | 27 +++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index 9be5431..bbd960f 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -717,8 +717,9 @@ function renderWaveform(data) {
     // up AND down).  Mic + histograms keep default auto-scale (always
     // positive values; zero at the bottom).
     let yBounds = {};
-    const isGeoWaveform = !isHistogram && ch !== 'MicL';
-    if (isGeoWaveform) {
+    const isGeo = ch !== 'MicL';
+    if (isGeo && !isHistogram) {
+      // Waveform geo: symmetric around zero for full shape detail.
       let absMax = 0;
       for (const v of values) {
         const a = Math.abs(v);
@@ -726,13 +727,25 @@ function renderWaveform(data) {
       }
       const padded = (absMax || 1) * 1.10;
       yBounds = { min: -padded, max: padded };
+    } else if (isGeo && isHistogram) {
+      // Histogram geo: enforce minimum chart range so quiet events
+      // look quiet (matches BW's near-fixed-scale convention).
+      const HIST_GEO_MIN_INS = 0.05;
+      let p = 0;
+      for (const v of values) { const a = Math.abs(v); if (a > p) p = a; }
+      yBounds = { min: 0, max: Math.max(p * 1.10, HIST_GEO_MIN_INS) };
     } else if (ch === 'MicL' && micUnit === 'dBL') {
-      // Baseline at noise-floor minimum (matches what we floored
-      // null/quiet samples to), top at peak + 5 dB headroom.
+      // Mic dBL: baseline at noise-floor minimum, top at peak + 5 dB.
       const peakDbl = (typeof peak === 'number' && isFinite(peak))
         ? peak + 5
         : 100;
       yBounds = { min: MIC_DBL_FLOOR, max: Math.max(peakDbl, MIC_DBL_FLOOR + 20) };
+    } else if (ch === 'MicL' && isHistogram && micUnit === 'psi') {
+      // Mic histogram in psi: same minimum-range treatment as geo.
+      const HIST_MIC_MIN_PSI = 0.001;
+      let p = 0;
+      for (const v of values) { const a = Math.abs(v); if (a > p) p = a; }
+      yBounds = { min: 0, max: Math.max(p * 1.10, HIST_MIC_MIN_PSI) };
     }
 
     const chart = new Chart(canvas, {
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 19a3e05..5021c79 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2748,8 +2748,9 @@ function _renderScWaveform(data) {
     //   - Mic (always positive sound pressure) + histograms (per-interval
     //     peaks, always positive): default auto-scale, zero at the bottom.
     let yBounds = {};
-    const isGeoWaveform = !isHistogram && ch !== 'MicL';
-    if (isGeoWaveform) {
+    const isGeo = ch !== 'MicL';
+    if (isGeo && !isHistogram) {
+      // Waveform geo: symmetric around zero, full zoom to shape detail.
       let absMax = 0;
       for (const v of values) {
         const a = Math.abs(v);
@@ -2757,13 +2758,31 @@ function _renderScWaveform(data) {
       }
       const padded = (absMax || 1) * 1.10;
       yBounds = { min: -padded, max: padded };
+    } else if (isGeo && isHistogram) {
+      // Histogram geo: enforce a minimum chart range so a quiet
+      // 0.005 in/s event renders as ~10% of chart height instead of
+      // filling the panel.  Matches BW's near-fixed-scale convention
+      // (their footer is "Geo: 0.002 in/s/div" — a chart-relative scale,
+      // not auto-zoom).
+      const HIST_GEO_MIN_INS = 0.05;
+      let peak = 0;
+      for (const v of values) { const a = Math.abs(v); if (a > peak) peak = a; }
+      yBounds = { min: 0, max: Math.max(peak * 1.10, HIST_GEO_MIN_INS) };
     } else if (ch === 'MicL' && micUnit === 'dBL') {
-      // Pin baseline at the chart floor (which matches what we flooded
-      // null/quiet samples to), top at the actual peak + a few dB headroom.
+      // Mic in dBL — pin baseline at noise-floor minimum (where we floored
+      // quiet samples), top at actual peak + a few dB headroom.
       const peakDbl = (typeof chPeak === 'number' && isFinite(chPeak))
         ? chPeak + 5
         : 100;
       yBounds = { min: MIC_DBL_FLOOR, max: Math.max(peakDbl, MIC_DBL_FLOOR + 20) };
+    } else if (ch === 'MicL' && isHistogram && micUnit === 'psi') {
+      // Mic histogram in psi — same minimum-range treatment as geo.
+      // 0.001 psi ≈ 110 dBL — typical "loud" mic peak.  Quiet events
+      // sit near the bottom.
+      const HIST_MIC_MIN_PSI = 0.001;
+      let peak = 0;
+      for (const v of values) { const a = Math.abs(v); if (a > peak) peak = a; }
+      yBounds = { min: 0, max: Math.max(peak * 1.10, HIST_MIC_MIN_PSI) };
     }
 
     _scCharts[ch] = new Chart(canvas, {
-- 
2.52.0


From a5888e1b5c0551410cf7f53672f7d2e68d288554 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 04:33:53 +0000
Subject: [PATCH 33/42] report_pdf: PDF histogram aggregation + fix
 footer/x-axis overlap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues spotted on a histogram event PDF:

1. Footer scale ("Time — /div  Amplitude Geo: X in/s/div  Mic: Y
   psi(L)/div") was overlapping horizontally with the x-axis tick
   labels (0, 20, 40, 60...).  Both rendered on the same Y row.
   Fix: bumped gridspec bottom margin from 0.06 → 0.12, moved the
   footer text from y=0.045 → y=0.030 (below the tick labels), moved
   the page-bottom Created/Event line from y=0.015 → y=0.005.
   Trigger legend on waveforms moved 0.030 → 0.018.  Everything
   stacks cleanly now without collision.

2. PDF was showing the raw codec output (~150+ bars per histogram)
   instead of BW's per-interval aggregation.  Why: the aggregation
   I'd added to /db/events/{id}/waveform.json wasn't replicated in
   the PDF gather path.  Now: gather_report_data does the same
   max-per-group aggregation when bw_report.histogram.n_intervals is
   populated, AND derives per-interval HH:MM:SS labels from the
   start time + interval_size_s.  Result: histogram PDFs now match
   BW's display (one bar per BW interval, x-axis labeled with actual
   times) — same fix as the modal chart, applied to the PDF.

For events ingested BEFORE the parser extension (no histogram block
in their sidecar), aggregation is a no-op — they still render with
per-block bars + interval-index x-axis (but the overlap fix applies
to them too).  Re-forwarding repopulates the histogram block.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/report_pdf.py | 66 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 55 insertions(+), 11 deletions(-)

diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index 1df8699..6635f06 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -285,6 +285,43 @@ def gather_report_data(
         except Exception as exc:
             log.warning("gather_report_data: hdf5 read failed: %s", exc)
 
+    # ── Histogram aggregation ──
+    # Codec emits ~N per-block samples (typically 1/sec); BW reports
+    # one bar per configured interval (1 min / 5 min / etc.).  When
+    # bw_report.histogram.n_intervals is populated (events ingested
+    # with the parser extension), group max-per-group to match.  Also
+    # derives per-interval timestamps for the x-axis.  No-op for
+    # waveform events or when n_intervals is missing.
+    if rd.is_histogram and rd.histogram_n_intervals and rd.histogram_n_intervals >= 1:
+        n = int(rd.histogram_n_intervals)
+        for ch, vals in list(rd.channels.items()):
+            if not vals:
+                continue
+            per_group = len(vals) // n
+            remainder = len(vals) % n
+            agg: list = []
+            offset = 0
+            for i in range(n):
+                grp_size = per_group + (1 if i < remainder else 0)
+                if grp_size > 0:
+                    grp = vals[offset:offset + grp_size]
+                    agg.append(max((abs(v) for v in grp if v is not None), default=0))
+                    offset += grp_size
+                else:
+                    agg.append(0)
+            rd.channels[ch] = agg
+        # Derive per-interval HH:MM:SS labels if we have the start time + size
+        if rd.histogram_start_str and rd.histogram_interval_size_s and not rd.histogram_interval_times:
+            try:
+                import datetime as _dt
+                start = _dt.datetime.fromisoformat(rd.histogram_start_str)
+                rd.histogram_interval_times = [
+                    (start + _dt.timedelta(seconds=(i + 1) * rd.histogram_interval_size_s)).strftime("%H:%M:%S")
+                    for i in range(n)
+                ]
+            except Exception:
+                pass
+
     return rd
 
 
@@ -308,16 +345,18 @@ def render_event_report_pdf(rd: ReportData) -> bytes:
     else:
         _render_waveform_layout(fig, rd)
 
-    # Footer (common to both layouts) — Created date + Xmark-like attribution.
+    # Page footer (common to both layouts) — Created date + event id.
+    # Pushed to the very page bottom so it doesn't collide with the
+    # waveform footer scale / trigger legend lines just above.
     fig.text(
-        0.07, 0.015,
+        0.07, 0.005,
         f"Created: {rd.server_received_at or '—'}  •  seismo-relay",
-        fontsize=7, color="#888", ha="left",
+        fontsize=6, color="#888", ha="left",
     )
     fig.text(
-        0.93, 0.015,
+        0.93, 0.005,
         f"Event {rd.event_id[:8] if rd.event_id else '—'}",
-        fontsize=7, color="#888", ha="right",
+        fontsize=6, color="#888", ha="right",
     )
 
     buf = io.BytesIO()
@@ -331,10 +370,13 @@ def _render_waveform_layout(fig, rd: ReportData) -> None:
 
     Stats table includes Time (Rel. to Trig), Peak Accel, Peak Disp.
     Left margin sized to fit the channel labels (MicL/Long/Vert/Tran).
+    Extra bottom margin reserves space for x-axis tick labels +
+    "Amplitude Geo: X in/s/div Mic: Y psi(L)/div" footer + trigger
+    legend without overlap.
     """
     gs = fig.add_gridspec(
         nrows=4, ncols=1,
-        left=0.11, right=0.94, top=0.97, bottom=0.06,
+        left=0.11, right=0.94, top=0.97, bottom=0.12,
         height_ratios=[1.7, 2.0, 1.8, 5.5],
         hspace=0.35,
     )
@@ -355,11 +397,13 @@ def _render_histogram_layout(fig, rd: ReportData) -> None:
 
     No USBM compliance chart (it's a waveform-only concept).  Stats table
     uses Date + Time-of-peak instead of relative-time + accel + disp.
-    Left margin sized to fit the channel labels.
+    Left margin sized to fit the channel labels.  Extra bottom margin
+    leaves room for the x-axis time labels + footer scale legend
+    without overlap.
     """
     gs = fig.add_gridspec(
         nrows=4, ncols=1,
-        left=0.11, right=0.94, top=0.97, bottom=0.06,
+        left=0.11, right=0.94, top=0.97, bottom=0.12,
         height_ratios=[1.8, 0.9, 1.7, 5.6],
         hspace=0.35,
     )
@@ -718,12 +762,12 @@ def _draw_waveform_subplot(fig, gridspec_cell, rd: ReportData) -> None:
             geo_amp_div = f"{(amax * 1.1 * 2) / 10:.3f}"
             break
     fig.text(
-        0.07, 0.045,
+        0.11, 0.030,
         f"Time(Seconds) {div_s:.2f} sec/div   Amplitude Geo: {geo_amp_div} in/s/div   Mic: 0.001 psi(L)/div",
         fontsize=7, color="#444", ha="left",
     )
     fig.text(
-        0.07, 0.030,
+        0.11, 0.018,
         "Trigger = ▶━━━━━ ━━━━━━◀",
         fontsize=7, color="#444", ha="left",
     )
@@ -789,7 +833,7 @@ def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
             geo_amp_div = f"{amax / 5:.3f}"
             break
     fig.text(
-        0.07, 0.045,
+        0.11, 0.030,
         f"Time {interval_str} /div   Amplitude Geo: {geo_amp_div} in/s/div   Mic: 0.001 psi(L)/div",
         fontsize=7, color="#444", ha="left",
     )
-- 
2.52.0


From 53c05d93e2414f9dcbf7dc3760fa4ee953a5b1ef Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 05:31:08 +0000
Subject: [PATCH 34/42] delete: also clean up preserved _ASCII.TXT file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_cleanup_event_files() removes the on-disk artifacts when an event is
hard-deleted (binary, a5_pickle, sidecar, h5).  Today's .TXT
preservation feature added a new on-disk file (_ASCII.TXT next to the
binary) but the cleanup didn't know about it — so any event deleted
via /db/events/{id} (single) or /db/events/delete_bulk (or the
Terra-View "SFM Event DB Manager" UI which proxies through to those
endpoints) was leaving orphan .TXT files in the store.

Added "txt" to the cleanup list using the new
WaveformStore.txt_path_for().  Safe for old events without a .TXT —
the exists() check skips the unlink.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/server.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/sfm/server.py b/sfm/server.py
index 93ee110..ed42775 100644
--- a/sfm/server.py
+++ b/sfm/server.py
@@ -1987,10 +1987,15 @@ def _cleanup_event_files(row: dict) -> dict:
     base_name = bw_name or a5_name or sc_name
     if base_name:
         bw_path, a5_path = store.paths_for(serial, base_name)
-        sc_path = store.sidecar_path_for(serial, base_name)
-        h5_path = store.hdf5_path_for(serial, base_name)
+        sc_path  = store.sidecar_path_for(serial, base_name)
+        h5_path  = store.hdf5_path_for(serial, base_name)
+        # Preserved BW ASCII report (added 2026-05-27 with the .TXT
+        # preservation feature) — needs to be cleaned up too, otherwise
+        # deletes leave orphan _ASCII.TXT files behind.
+        txt_path = store.txt_path_for(serial, base_name)
         for kind, p in [("blastware", bw_path), ("a5_pickle", a5_path),
-                        ("sidecar", sc_path), ("hdf5", h5_path)]:
+                        ("sidecar", sc_path), ("hdf5", h5_path),
+                        ("txt", txt_path)]:
             try:
                 if p.exists():
                     p.unlink()
-- 
2.52.0


From 6381dcb3127b35c3012a89421de0228ccd0c0d95 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 05:41:10 +0000
Subject: [PATCH 35/42] tz: server-wide display timezone via TZ env var
 (default EST/EDT)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User-reported issue: server logs were timestamped in UTC ("05:36:20"
when local was ~01:36 EDT), and the PDF report's "Created" footer
similarly showed raw UTC.  Inconsistent with the modal which already
converts to browser local via toLocaleString.

Solution: standard Linux TZ env var.  Set once in the container, and:
  - Python's datetime.now() uses local
  - Logging module's timestamps use local
  - matplotlib renderers + report_pdf formatters use local
  - astimezone() conversions resolve to the configured TZ

DB columns stay UTC (created_at uses SQLite's strftime('%Y-...Z', 'now')
which is always UTC, regardless of TZ env var — proper "store UTC,
display local" pattern).

Changes:
  - Dockerfile: install tzdata (python:3.11-slim omits the timezone
    database), set default TZ=America/New_York
  - sfm/report_pdf.py: _fmt_iso_to_bw and _split_iso_to_date_time now
    convert UTC inputs (Z-suffixed) to local via astimezone(); naïve
    inputs (BW recorded-at, already unit-local) returned as-is.
    New _to_display_local helper centralizes the logic.
  - "Created" line in the PDF page footer now uses the converted
    timestamp.

Override per-deployment via the TZ env var in docker-compose
(separate commit on terra-view side).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Dockerfile        | 13 ++++++++++++-
 sfm/report_pdf.py | 47 ++++++++++++++++++++++++++++++++++-------------
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a9526a9..af55af5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,10 +2,21 @@ FROM python:3.11-slim
 
 WORKDIR /app
 
+# tzdata is required for the TZ env var to take effect (python:slim
+# omits the timezone database).  Without it, datetime.now() / logging
+# / matplotlib all stay in UTC regardless of TZ.  Default zone gets
+# set further down via ENV; users override per-deployment via the
+# `TZ` env var in docker-compose.
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends curl && \
+    apt-get install -y --no-install-recommends curl tzdata && \
     rm -rf /var/lib/apt/lists/*
 
+# Default display timezone — applied to server logs, datetime.now(),
+# matplotlib rendered timestamps, and any naïve-vs-aware datetime
+# conversions in the PDF renderer.  Override via TZ env var in
+# docker-compose; storage in the DB is always UTC regardless.
+ENV TZ=America/New_York
+
 COPY pyproject.toml requirements.txt ./
 COPY minimateplus ./minimateplus
 COPY micromate    ./micromate
diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index 6635f06..2f57f63 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -348,9 +348,11 @@ def render_event_report_pdf(rd: ReportData) -> bytes:
     # Page footer (common to both layouts) — Created date + event id.
     # Pushed to the very page bottom so it doesn't collide with the
     # waveform footer scale / trigger legend lines just above.
+    # Convert UTC server_received_at to local for display.
+    created_local = _fmt_iso_to_bw(rd.server_received_at) if rd.server_received_at else "—"
     fig.text(
         0.07, 0.005,
-        f"Created: {rd.server_received_at or '—'}  •  seismo-relay",
+        f"Created: {created_local}  •  seismo-relay",
         fontsize=6, color="#888", ha="left",
     )
     fig.text(
@@ -419,31 +421,50 @@ def _render_histogram_layout(fig, rd: ReportData) -> None:
     _draw_histogram_subplot(fig, gs[3], rd)
 
 
+def _to_display_local(iso: str):
+    """Parse an ISO timestamp and return a datetime in the system's local
+    timezone (set by the TZ env var, default America/New_York via the
+    Dockerfile).
+
+    Behaviour:
+      - "...Z" or "...+HH:MM" suffix → tz-aware UTC → converted to local
+      - Naïve "YYYY-MM-DDTHH:MM:SS" (no tz) → returned as-is.  This
+        matches the convention used elsewhere in seismo-relay: BW's
+        recorded-at timestamps are naïve and ALREADY in the unit's
+        local clock; we don't second-guess them.
+    """
+    import datetime as _dt
+    dt = _dt.datetime.fromisoformat(iso.replace("Z", "+00:00"))
+    if dt.tzinfo is not None:
+        # Convert from UTC (or other tz) → local per the TZ env var.
+        # astimezone() without arg uses the system timezone.
+        dt = dt.astimezone()
+    return dt
+
+
 def _fmt_iso_to_bw(iso: Optional[str]) -> Optional[str]:
-    """Convert a ISO-8601 timestamp like '2026-05-16T22:30:37' to BW's
-    display format '22:30:37 May 16, 2026'.  Returns input unchanged if
-    it doesn't look like ISO."""
+    """Convert an ISO-8601 timestamp to BW's display format
+    '22:30:37 May 16, 2026'.  UTC inputs (with Z suffix) are
+    converted to the system's local timezone first; naïve inputs
+    are formatted as-is.  Returns input unchanged on parse failure."""
     if not iso or "T" not in iso:
         return iso
     try:
-        import datetime as _dt
-        dt = _dt.datetime.fromisoformat(iso.replace("Z", "+00:00"))
-        return dt.strftime("%H:%M:%S %B %d, %Y").replace(" 0", " ")
+        return _to_display_local(iso).strftime("%H:%M:%S %B %d, %Y").replace(" 0", " ")
     except Exception:
         return iso
 
 
 def _split_iso_to_date_time(iso: Optional[str]) -> tuple[Optional[str], Optional[str]]:
-    """Split an ISO timestamp into BW-formatted ("May 27 /26", "06:06:14")
+    """Split an ISO timestamp into BW-formatted ('May 27 /26', '06:06:14')
     date+time strings.  Used for the histogram stats table where the
-    Date and Time rows are presented separately.  Returns (None, None)
-    if the input isn't a valid ISO datetime."""
+    Date and Time rows are presented separately.  UTC inputs are
+    converted to local time first.  Returns (None, None) on parse failure."""
     if not iso:
         return (None, None)
     try:
-        import datetime as _dt
-        dt = _dt.datetime.fromisoformat(iso.replace("Z", "+00:00"))
-        # BW format: "May 27 /26" (3-letter month + 2-digit year)
+        dt = _to_display_local(iso)
+        # BW format: 'May 27 /26' (3-letter month + 2-digit year)
         date_str = dt.strftime("%b %d /%y").replace(" 0", " ")
         time_str = dt.strftime("%H:%M:%S")
         return (date_str, time_str)
-- 
2.52.0


From 86325b9bab97b8dc17589690195af86a08563004 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 05:42:18 +0000
Subject: [PATCH 36/42] docs: roadmap entry for a SECOND undecoded histogram
 sub-format (S353)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Observed in fresh ingest logs on 2026-05-28: BE17353 events
(S353L4H2.FZ0H, S353L4H2.P00H, etc.) cause "body codec failed to
decode" warnings.  Different from the byte[5]!=0 case already tracked
(T190 / O121) — these have byte[5]==0x00 with what looks like a
valid block header, but the walker finds zero data blocks anyway.

Operational impact identical to the existing case: ingestion
succeeds, DB peaks come from bw_report overlay, only the chart is
empty.  No data loss.

Pinning so it doesn't get lost — needs a hex dump of one body to
work out what's different about these.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index bff24db..114a943 100644
--- a/README.md
+++ b/README.md
@@ -567,4 +567,5 @@ Implementation steps (concrete):
 - [ ] RV55 DCD/DTR — newer RV55 firmware doesn't assert DCD by default; units don't resume monitoring after call-home disconnect (`--restart-monitoring` flag deferred).
 - [ ] **NULL-timestamp duplicate-row dedup.**  A small handful of events (2 known on prod as of 2026-05-22) have `events.timestamp IS NULL` because the codec couldn't extract a timestamp from the binary footer.  The `UNIQUE(serial, timestamp)` constraint doesn't fire on `NULL` (SQL semantics: `NULL ≠ NULL`), so every `--force` backfill INSERTs a new row instead of UPSERTing the existing one.  Cleanup: a one-shot SQL query that keeps only the newest row per `(serial, blastware_filename)` and deletes the rest.  Longer-term: extend the unique key to `(serial, COALESCE(timestamp, blastware_filename))` or reject inserts with NULL timestamp.
 - [ ] **Histogram body sub-format with `byte[5] != 0`.**  ~3 events on prod (`T190LD5Q.LD0H`, `O121L4L1.GU0H`) use a histogram body my walker doesn't recognize — the first block has `byte[5] = 0x01` or `0x07` instead of `0x00`, and the entire body lacks the `1e 0a 00 00` tail signature.  Codec returns 0 valid blocks; their DB PVS comes from the bw_report ASCII overlay (which BW computed from the same binary, so the DB columns are correct).  Only the `.h5` waveform plot is empty.  Cracking the sub-format would unlock the plot.  Needs binary+ASCII pairs from a few `byte[5]!=0` events; same RE approach as the K558 case.
+- [ ] **Histogram body sub-format with `byte[5] == 0x00` but undecodable.**  Observed 2026-05-28 on BE17353 (S353) events: `S353L4H2.FZ0H`, `S353L4H2.P00H`, `S353L4H3.7O0H`, `S353L4H3.E10H`.  Body starts `00 00 00 01 0a 00 XX 00 ...` which LOOKS like a valid histogram block header (marker 0x000a at byte[4:6] ✓, byte[5]=0x00 normal-format ✓), but the walker finds zero data blocks across the whole body.  Likely an extra header before the block stream OR a different tail signature than `1e 0a 00 00`.  Smaller body lengths (1900-2100 bytes) suggest these may be short-recording histogram variants.  Same operational impact as the byte[5]!=0 case: event ingests cleanly, DB peaks correct via bw_report overlay, only the chart is empty.  Worth dumping a hex view of one body to diagnose.
 - [ ] **Sensor-check waveform extraction from the BW binary.**  BW's Event Report PDFs include a narrow panel on the right side of the waveform plot showing each channel's response to the sensor self-check signal (a damped sinusoid for geo, sawtooth-at-test-freq for mic).  Our parser captures the test RESULTS (`test_freq_hz`, `test_ratio`, `test_amplitude_mv`, `test_results` pass/fail) and the PDF + modal display them as text — but BW's per-sample sensor-check waveform isn't accessible to us today.  Two paths to add it:  (a) RE the binary to find where the sensor-check samples are stored — could be a section before STRT, after the footer, or in a separate sub-record; protocol reference doesn't currently mention it.  (b) If samples aren't in the binary, synthesize a representative waveform from the test parameters (damped sinusoid at `test_freq_hz` with damping from `test_ratio`).  Path (a) is the honest answer; path (b) is decorative.  Until either lands, the text-only sensor-check display in the report is fine.
-- 
2.52.0


From ad2702d4bf73f9f9a7dd1d21623a63782297e415 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 18:07:41 +0000
Subject: [PATCH 37/42] fix(report_pdf): add missing histogram_interval_size_s
 field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The histogram-interval-times derivation block at line 314 references
rd.histogram_interval_size_s, but the field wasn't declared on the
ReportData dataclass — only the string form histogram_interval_size
was.  Result: every PDF render of a histogram event raised
AttributeError → 500 from /db/events/{id}/report.pdf.

Cause: when the histogram aggregation block was inlined into
gather_report_data, the seconds-numeric counterpart that the
projection already carries (bw_report.histogram.interval_size_s) was
never wired into the dataclass.  Waveform PDFs weren't affected
because the offending line is gated on is_histogram.

Fix: add the field, read it from the projection alongside the other
histogram keys.  No-op for waveform events (the field stays None and
the gate skips it).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/report_pdf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index 2f57f63..2a30939 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -129,6 +129,7 @@ class ReportData:
     histogram_stop_str:     Optional[str] = None
     histogram_n_intervals:  Optional[float] = None     # 4.00
     histogram_interval_size: Optional[str] = None      # "1 minute"
+    histogram_interval_size_s: Optional[float] = None  # 60.0 — numeric seconds, used to derive interval_times
     histogram_interval_times: list[str] = field(default_factory=list)  # per-interval timestamps for x-axis
 
     # Peak Vector Sum metadata (histograms show absolute date+time)
@@ -265,6 +266,7 @@ def gather_report_data(
             rd.histogram_stop_str    = hist_block.get("stop")
             rd.histogram_n_intervals = hist_block.get("n_intervals")
             rd.histogram_interval_size = hist_block.get("interval_size")
+            rd.histogram_interval_size_s = hist_block.get("interval_size_s")
             rd.histogram_interval_times = hist_block.get("interval_times") or []
 
     # ── Waveform samples — from the .h5 via the existing helper ──
-- 
2.52.0


From f6abe3caa00ab456556d4c2bf1243952973baab0 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 18:22:20 +0000
Subject: [PATCH 38/42] fix(report_pdf): histogram geo channels share
 nice-quantized y-axis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two related visual bugs on histogram PDFs:

1. Per-channel auto-scale meant Tran/Vert/Long had different y-axes
   (e.g. 0-0.015, 0-0.025, 0-0.020) — bars looked taller on the
   channel that happened to be quietest.  Not directly comparable.

2. Footer "Amplitude Geo: X in/s/div" was just amax/5 of the FIRST
   geo channel with data, with no LSB quantization — producing
   nonsense like 0.003 in/s/div when the geophone LSB is 0.005.

Fix: compute a single shared geo y-axis range from max(Tran,Vert,Long),
quantize the per-division step to BW's 1-2-5 sequence rounded to the
0.005 LSB (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, ...), apply the same
ylim + ticks to all three geo subplots, and use that same step for the
footer label.  MicL stays on its own auto-scale (different units).

Verified across edge cases including the reported event
(geo max 0.025 → 0.005/div, top 0.025), small PVS events, and large
blast amplitudes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/report_pdf.py | 52 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 11 deletions(-)

diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index 2a30939..a051393 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -796,11 +796,31 @@ def _draw_waveform_subplot(fig, gridspec_cell, rd: ReportData) -> None:
     )
 
 
+def _nice_geo_step(amax: float) -> float:
+    """Pick a "nice" per-division step for the geo y-axis.
+
+    Geo LSB is 0.005 in/s — sub-LSB steps like 0.003/div are nonsense.
+    Quantize to the BW-style 1-2-5 sequence (0.005, 0.01, 0.025, 0.05,
+    …) and return the smallest step where 5 divisions >= amax, so the
+    top of the chart lands on a tick.
+    """
+    if amax <= 0:
+        return 0.005
+    for step in (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0):
+        if step * 5 >= amax:
+            return step
+    return 10.0
+
+
 def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
     """4-channel stacked histogram bar chart — per-interval peaks.
 
     X-axis labeled with the actual times from rd.histogram_interval_times
     when available; otherwise interval index.
+
+    The three geo channels share a single y-axis scale (a BW-style nice
+    multiple of the 0.005 in/s LSB) so bar heights are directly
+    comparable across channels.  MicL has its own auto-scale.
     """
     inner = gridspec_cell.subgridspec(4, 1, hspace=0.0)
     order = ["MicL", "Long", "Vert", "Tran"]
@@ -809,6 +829,16 @@ def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
     # X-axis: use absolute time labels if we have them, else interval index
     have_times = bool(rd.histogram_interval_times)
 
+    # Shared geo scale: max across Tran/Vert/Long, quantized to a nice
+    # tick step.  Used for ylim + the footer "Amplitude Geo: X in/s/div".
+    geo_amax = 0.0
+    for gch in ("Tran", "Vert", "Long"):
+        gv = rd.channels.get(gch) or []
+        if gv:
+            geo_amax = max(geo_amax, max(abs(x) for x in gv if x is not None))
+    geo_step = _nice_geo_step(geo_amax)
+    geo_top  = geo_step * 5  # 5 divisions — top tick lands at this value
+
     for i, ch in enumerate(order):
         ax = fig.add_subplot(inner[i])
         values = rd.channels.get(ch) or []
@@ -821,9 +851,13 @@ def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
             xs = np.arange(len(abs_vals))
             color = _channel_axis_color(ch)
             ax.bar(xs, abs_vals, color=color, width=0.85, linewidth=0)
-            amax = max(abs_vals, default=0)
-            if amax > 0:
-                ax.set_ylim(0, amax * 1.10)
+            if ch in ("Tran", "Vert", "Long"):
+                ax.set_ylim(0, geo_top)
+                ax.set_yticks([j * geo_step for j in range(6)])
+            else:
+                amax = max(abs_vals, default=0)
+                if amax > 0:
+                    ax.set_ylim(0, amax * 1.10)
         ax.set_ylabel(ch, fontsize=8, rotation=0, ha="right", va="center",
                       color=_channel_axis_color(ch), weight="bold", labelpad=14)
         ax.text(1.005, 0.02, "0.0", transform=ax.transAxes,
@@ -846,15 +880,11 @@ def _draw_histogram_subplot(fig, gridspec_cell, rd: ReportData) -> None:
             ax.tick_params(axis="x", labelsize=7)
         ax.tick_params(axis="y", labelsize=6)
 
-    # Footer scale info — histograms use minute/div
+    # Footer scale info — histograms use minute/div.  Reuses the shared
+    # geo_step computed above so the label matches the actual y-axis
+    # tick spacing on every subplot.
     interval_str = rd.histogram_interval_size or "—"
-    geo_amp_div = "—"
-    for ch in ("Tran", "Vert", "Long"):
-        v = rd.channels.get(ch) or []
-        if v:
-            amax = max(abs(x) for x in v)
-            geo_amp_div = f"{amax / 5:.3f}"
-            break
+    geo_amp_div = f"{geo_step:.3f}"
     fig.text(
         0.11, 0.030,
         f"Time {interval_str} /div   Amplitude Geo: {geo_amp_div} in/s/div   Mic: 0.001 psi(L)/div",
-- 
2.52.0


From 780b45a37124711d020d57f4767d5c8b01ecd524 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 18:38:49 +0000
Subject: [PATCH 39/42] =?UTF-8?q?feat:=20render=20">100"=20for=20above-ran?=
 =?UTF-8?q?ge=20ZC=20Freq=20instead=20of=20"=E2=80=94"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BW writes ">100 Hz" for ZC Freq when the zero-crossing algorithm sees a
peak too fast to count — the device's reporting ceiling is 100 Hz on
V10.72.  Our parser fell back to None via _parse_number (which requires
a leading digit), so the PDF rendered "—" where BW shows ">100".

Mirrors the OORANGE/saturated pattern already used for PPV and PSPL:
parser stores the threshold (100.0) on zc_freq_hz + sets a new
zc_freq_above_range flag.  Projection carries the flag through to the
sidecar; PDF renderer prepends ">" when set.

Affects both per-channel stats tables (waveform + histogram variants)
and the mic block's ZC Freq row.

Verified on the real T190LD5Q.LK0W fixture: Tran zc_freq_hz=100.0
above_range=True; Vert/Long (normal values) above_range=False; "N/A"
still produces zc_freq_hz=None which renders as "—" (unchanged).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 minimateplus/bw_ascii_report.py | 43 ++++++++++++++++++++++++++++++---
 minimateplus/event_file_io.py   | 15 ++++++++----
 sfm/report_pdf.py               | 36 ++++++++++++++++-----------
 tests/test_bw_ascii_report.py   | 34 ++++++++++++++++++++++++++
 4 files changed, 105 insertions(+), 23 deletions(-)

diff --git a/minimateplus/bw_ascii_report.py b/minimateplus/bw_ascii_report.py
index 2d85b97..2f919c4 100644
--- a/minimateplus/bw_ascii_report.py
+++ b/minimateplus/bw_ascii_report.py
@@ -67,6 +67,11 @@ class ChannelStats:
     # to render "> 10 in/s" or "saturated" instead of trusting the
     # value as an exact measurement.
     ppv_saturated:     bool = False
+    # Set when BW writes ">100 Hz" for ZC Freq — the zero-crossing
+    # algorithm's peak frequency exceeded the device's reporting
+    # ceiling (typically 100 Hz on V10.72).  zc_freq_hz gets the
+    # threshold (100.0) as a lower bound; downstream UI renders ">100".
+    zc_freq_above_range: bool = False
 
 
 @dataclass
@@ -81,6 +86,9 @@ class MicStats:
     # 140 dBL (typical NL-43 max; some units cap at 148).  Consumers
     # should render "> 140 dB(L)" or similar when this flag is set.
     pspl_saturated:    bool = False
+    # Same semantics as ChannelStats.zc_freq_above_range — mic ZC
+    # peak exceeded device reporting ceiling.
+    zc_freq_above_range: bool = False
 
 
 @dataclass
@@ -119,6 +127,20 @@ def _is_oorange(value: str) -> bool:
     return any(m in s for m in _OORANGE_MARKERS)
 
 
+def _parse_above_range(value: str) -> Optional[float]:
+    """For BW "above-range" markers like ">100 Hz", return the threshold.
+
+    BW writes ZC Freq as ">100 Hz" when the zero-crossing algorithm sees
+    a peak too fast to count (device cuts off at 100 Hz).  Returns the
+    numeric portion after the '>' (e.g. 100.0), or None if `value` is
+    not an above-range marker.
+    """
+    s = value.strip()
+    if not s.startswith(">"):
+        return None
+    return _parse_number(s[1:])
+
+
 @dataclass
 class BwAsciiReport:
     """Structured representation of one BW per-event ASCII export."""
@@ -527,10 +549,17 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
                     cs.ppv_saturated = True
                 else:
                     cs.ppv_ips = _parse_number(value)
+            elif stat == "ZC Freq":
+                # ">100 Hz" → store threshold + flag; numeric → parse normally
+                threshold = _parse_above_range(value)
+                if threshold is not None:
+                    cs.zc_freq_hz = threshold
+                    cs.zc_freq_above_range = True
+                else:
+                    cs.zc_freq_hz = _parse_number(value)
             else:
                 num = _parse_number(value)
-                if   stat == "ZC Freq":             cs.zc_freq_hz     = num
-                elif stat == "Time of Peak":        cs.time_of_peak_s = num
+                if   stat == "Time of Peak":        cs.time_of_peak_s = num
                 elif stat == "Peak Acceleration":   cs.peak_accel_g   = num
                 elif stat == "Peak Displacement":   cs.peak_disp_in   = num
 
@@ -627,9 +656,15 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
             cs = report.channels.setdefault("MicL", ChannelStats())
             cs.time_of_peak_s = report.mic.time_of_peak_s
         elif key == "MicL ZC Freq":
-            report.mic.zc_freq_hz = _parse_number(value)
+            threshold = _parse_above_range(value)
+            if threshold is not None:
+                report.mic.zc_freq_hz         = threshold
+                report.mic.zc_freq_above_range = True
+            else:
+                report.mic.zc_freq_hz = _parse_number(value)
             cs = report.channels.setdefault("MicL", ChannelStats())
-            cs.zc_freq_hz = report.mic.zc_freq_hz
+            cs.zc_freq_hz          = report.mic.zc_freq_hz
+            cs.zc_freq_above_range = report.mic.zc_freq_above_range
 
         # ── Sensor self-check ────────────────────────────────────────────────
         elif key in (
diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py
index 36bf56d..7dc74c1 100644
--- a/minimateplus/event_file_io.py
+++ b/minimateplus/event_file_io.py
@@ -125,6 +125,10 @@ def _bw_report_to_dict(report: BwAsciiReport) -> dict:
         # is the channel range max (a lower bound), not an exact reading.
         if getattr(cs, "ppv_saturated", False):
             out["ppv_saturated"] = True
+        # ZC Freq above device reporting ceiling (BW ">100 Hz") — value
+        # in zc_freq_hz is the threshold, not an exact measurement.
+        if getattr(cs, "zc_freq_above_range", False):
+            out["zc_freq_above_range"] = True
         return out
 
     def _sc(ch_name: str) -> dict:
@@ -187,11 +191,12 @@ def _bw_report_to_dict(report: BwAsciiReport) -> dict:
             },
         },
         "mic": {
-            "weighting":        report.mic.weighting,
-            "pspl_dbl":         report.mic.pspl_dbl,
-            "pspl_saturated":   bool(getattr(report.mic, "pspl_saturated", False)),
-            "zc_freq_hz":       report.mic.zc_freq_hz,
-            "time_of_peak_s":   report.mic.time_of_peak_s,
+            "weighting":             report.mic.weighting,
+            "pspl_dbl":              report.mic.pspl_dbl,
+            "pspl_saturated":        bool(getattr(report.mic, "pspl_saturated", False)),
+            "zc_freq_hz":            report.mic.zc_freq_hz,
+            "zc_freq_above_range":   bool(getattr(report.mic, "zc_freq_above_range", False)),
+            "time_of_peak_s":        report.mic.time_of_peak_s,
         },
         "sensor_check": {
             "tran": _sc("Tran"),
diff --git a/sfm/report_pdf.py b/sfm/report_pdf.py
index a051393..6618d9a 100644
--- a/sfm/report_pdf.py
+++ b/sfm/report_pdf.py
@@ -99,6 +99,7 @@ class ReportData:
     mic_pspl_time_s:        Optional[float] = None
     mic_pspl_when_str:      Optional[str] = None    # histogram absolute date+time, BW-formatted
     mic_zc_freq_hz:         Optional[float] = None
+    mic_zc_freq_above_range: bool           = False
     mic_channel_test_result: Optional[str] = None
     mic_channel_test_freq_hz: Optional[float] = None
     mic_channel_test_amp_mv: Optional[float] = None
@@ -216,7 +217,8 @@ def gather_report_data(
             # Inverse of the dBL formula → psi.  Mirrors waveform_codec convention.
             rd.mic_pspl_psi = DBL_REF_PSI * (10 ** (rd.mic_pspl_dbl / 20))
         rd.mic_pspl_time_s = mic.get("time_of_peak_s")
-        rd.mic_zc_freq_hz  = mic.get("zc_freq_hz")
+        rd.mic_zc_freq_hz             = mic.get("zc_freq_hz")
+        rd.mic_zc_freq_above_range    = bool(mic.get("zc_freq_above_range"))
         sc_mic = (bw.get("sensor_check") or {}).get("mic") or {}
         rd.mic_channel_test_result   = sc_mic.get("result")
         rd.mic_channel_test_freq_hz  = sc_mic.get("freq_hz")
@@ -236,15 +238,16 @@ def gather_report_data(
             ch_when_iso = peak_when.get(ch_label)
             peak_date, peak_time = _split_iso_to_date_time(ch_when_iso)
             rd.channel_stats.append({
-                "name":          ch_label,
-                "ppv_ips":       ch.get("ppv_ips"),
-                "zc_freq_hz":    ch.get("zc_freq_hz"),
-                "time_of_peak_s": ch.get("time_of_peak_s"),
-                "peak_accel_g":  ch.get("peak_accel_g"),
-                "peak_disp_in":  ch.get("peak_disp_in"),
-                "sensor_check":  sc_ch.get("result"),
-                "peak_date":     peak_date,
-                "peak_time":     peak_time,
+                "name":               ch_label,
+                "ppv_ips":            ch.get("ppv_ips"),
+                "zc_freq_hz":         ch.get("zc_freq_hz"),
+                "zc_freq_above_range": bool(ch.get("zc_freq_above_range")),
+                "time_of_peak_s":     ch.get("time_of_peak_s"),
+                "peak_accel_g":       ch.get("peak_accel_g"),
+                "peak_disp_in":       ch.get("peak_disp_in"),
+                "sensor_check":       sc_ch.get("result"),
+                "peak_date":          peak_date,
+                "peak_time":          peak_time,
             })
 
         # MicL peak time (used in the mic block — "PSPL ... on DATE at TIME")
@@ -612,7 +615,8 @@ def _mic_rows(rd: ReportData) -> list[tuple[str, Optional[str]]]:
             line += f" at {rd.mic_pspl_time_s:.3f} sec."
         rows.append(("PSPL", line))
     if rd.mic_zc_freq_hz is not None:
-        rows.append(("ZC Freq", f"{rd.mic_zc_freq_hz:.0f} Hz"))
+        prefix = ">" if rd.mic_zc_freq_above_range else ""
+        rows.append(("ZC Freq", f"{prefix}{rd.mic_zc_freq_hz:.0f} Hz"))
     if rd.mic_channel_test_result:
         line = rd.mic_channel_test_result
         if rd.mic_channel_test_freq_hz is not None and rd.mic_channel_test_amp_mv is not None:
@@ -684,13 +688,17 @@ def _draw_stats_table(ax, rd: ReportData, rows_spec: list[tuple[str, str, str]])
     ch_lookup = {c["name"]: c for c in rd.channel_stats}
 
     def _cell(field, ch_name):
-        val = ch_lookup.get(ch_name, {}).get(field)
+        ch_rec = ch_lookup.get(ch_name, {})
+        val = ch_rec.get(field)
         if val is None:
             return "—"
         if isinstance(val, float):
-            # ZC Freq is integer-formatted in BW; everything else with 3 decimals
+            # ZC Freq is integer-formatted in BW; ">100 Hz" sentinel
+            # rendered as ">N" (val carries the threshold).  Everything
+            # else gets 3 decimals.
             if field == "zc_freq_hz":
-                return f"{val:.0f}"
+                prefix = ">" if ch_rec.get("zc_freq_above_range") else ""
+                return f"{prefix}{val:.0f}"
             return f"{val:.3f}"
         return str(val)
 
diff --git a/tests/test_bw_ascii_report.py b/tests/test_bw_ascii_report.py
index ddfae70..5756fb2 100644
--- a/tests/test_bw_ascii_report.py
+++ b/tests/test_bw_ascii_report.py
@@ -441,6 +441,40 @@ def test_real_oorange_event_t190_parses():
     assert r.channels["Long"].ppv_ips == pytest.approx(2.83)
     assert r.peak_vector_sum_saturated is True
     assert r.peak_vector_sum_time_s == pytest.approx(0.007)
+    # Same fixture: Tran ZC Freq is ">100 Hz" — must parse as 100 +
+    # above_range flag, not None (which would render as "—" on the PDF).
+    assert r.channels["Tran"].zc_freq_hz == 100.0
+    assert r.channels["Tran"].zc_freq_above_range is True
+    # Vert/Long are normal numeric values; flag stays False.
+    assert r.channels["Vert"].zc_freq_above_range is False
+    assert r.channels["Long"].zc_freq_above_range is False
+
+
+def test_above_range_marker_treated_as_zc_threshold():
+    """BW writes '>100 Hz' for ZC Freq when the zero-crossing algorithm
+    sees a peak too fast to count (cuts off at the device's 100 Hz
+    reporting ceiling).  Parser must store the threshold + flag, not
+    fall back to None.
+    """
+    txt = """\
+"Event Type : Full Waveform"
+"Serial Number : BE18190"
+"Tran ZC Freq : >100 Hz"
+"Vert ZC Freq : 73 Hz"
+"Long ZC Freq : N/A Hz"
+"MicL  ZC Freq : >100 Hz"
+"""
+    r = parse_report(txt)
+    assert r.channels["Tran"].zc_freq_hz == 100.0
+    assert r.channels["Tran"].zc_freq_above_range is True
+    assert r.channels["Vert"].zc_freq_hz == 73.0
+    assert r.channels["Vert"].zc_freq_above_range is False
+    # N/A → None, flag stays False
+    assert r.channels["Long"].zc_freq_hz is None
+    assert r.channels["Long"].zc_freq_above_range is False
+    # Mic above-range
+    assert r.mic.zc_freq_hz == 100.0
+    assert r.mic.zc_freq_above_range is True
 
 
 def test_real_histogram_fixture_populates_sensor_location():
-- 
2.52.0


From 6a73523e4d2d245cfa3967e8ed258c9beee6c117 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 18:47:37 +0000
Subject: [PATCH 40/42] ui: surface per-channel ZC Freq (and ">100") in event
 modals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PDF report shows per-channel ZC Freq alongside PPV in the stats
block, but neither modal exposed it.  Now that the sidecar projection
carries zc_freq_hz + zc_freq_above_range, plumb them through:

- sfm_webapp.html: inline suffix on existing Peaks cells, e.g.
  "Tran  0.04500 in/s · >100 Hz".  Empty suffix when no ZC is
  available (legacy events without a preserved .TXT).

- event_browser.html: new ZC Freq column on the per-channel stats
  table.  Required adding a parallel sidecar fetch in loadEvent()
  (waveform.json alone doesn't carry bw_report).  Fetch failure is
  non-fatal — falls back to "—" in the new column.

Above-range ZC peaks (BW ">100 Hz") render with a literal ">"
prefix mirroring the PDF, so operators don't have to generate the
PDF to see when a channel hit the zero-crossing ceiling.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sfm/event_browser.html | 37 ++++++++++++++++++++++++++-----------
 sfm/sfm_webapp.html    | 22 ++++++++++++++++++----
 2 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/sfm/event_browser.html b/sfm/event_browser.html
index bbd960f..ca19794 100644
--- a/sfm/event_browser.html
+++ b/sfm/event_browser.html
@@ -499,6 +499,14 @@ async function loadEvent(eventId) {
   renderEventList();
   setStatus('Loading waveform…');
   try {
+    // Sidecar fetch runs in parallel — its bw_report block carries ZC
+    // Freq + above-range flags + sensor-check results that the per-
+    // channel stats table surfaces.  Failures are non-fatal (legacy
+    // events without a preserved .TXT have no sidecar bw_report).
+    const sidecarP = fetch(`${apiBase}/db/events/${eventId}/sidecar`)
+      .then(r => r.ok ? r.json() : null)
+      .catch(() => null);
+
     const r = await fetch(`${apiBase}/db/events/${eventId}/waveform.json`);
     if (!r.ok) {
       if (r.status === 404) {
@@ -511,7 +519,8 @@ async function loadEvent(eventId) {
     renderWaveform(data);
     // Also fetch metadata from the events list for richer header
     const ev = allEvents.find(e => e.id === eventId);
-    renderMeta(data, ev);
+    const sidecar = await sidecarP;
+    renderMeta(data, ev, sidecar);
     setStatus(`Event loaded.`, 'ok');
   } catch (e) {
     setStatus(`Failed to load event: ${e.message}`, 'error');
@@ -528,7 +537,7 @@ function showEmpty(msg) {
   charts = {};
 }
 
-function renderMeta(data, ev) {
+function renderMeta(data, ev, sidecar) {
   const metaDiv = document.getElementById('event-meta');
   const fields = [
     ['Serial',      data.serial || ev?.serial || '—'],
@@ -543,14 +552,20 @@ function renderMeta(data, ev) {
   ];
 
   // Per-channel stats table mirroring the printout's middle block.
-  // Pulls per-channel PPV from the events row (DB columns) and additional
-  // details (peak time, peak accel, peak displacement, sensor check) from
-  // bw_report when present.
+  // PPV from the events DB row; ZC Freq + saturation flags from the
+  // sidecar's bw_report block (when a .TXT was preserved on ingest).
+  const bwrPeaks = (sidecar?.bw_report || {}).peaks || {};
+  const bwrMic   = (sidecar?.bw_report || {}).mic   || {};
   const fmt = v => (v == null ? '—' : (typeof v === 'number' ? v.toFixed(3) : v));
+  const fmtZc = bwr => {
+    if (!bwr || bwr.zc_freq_hz == null) return '—';
+    const prefix = bwr.zc_freq_above_range ? '>' : '';
+    return `${prefix}${Math.round(bwr.zc_freq_hz)} Hz`;
+  };
   const rows = [
-    ['Tran', ev?.tran_ppv],
-    ['Vert', ev?.vert_ppv],
-    ['Long', ev?.long_ppv],
+    ['Tran', ev?.tran_ppv, fmtZc(bwrPeaks.tran)],
+    ['Vert', ev?.vert_ppv, fmtZc(bwrPeaks.vert)],
+    ['Long', ev?.long_ppv, fmtZc(bwrPeaks.long)],
   ];
   // Mic display honors the current user preference (dBL default).
   // mic_ppv is stored as raw psi on series3 events; convert when needed.
@@ -568,11 +583,11 @@ function renderMeta(data, ev) {
   const statsHtml = `
     <table class="stats-table">
       <thead>
-        <tr><th>Channel</th><th>PPV (in/s)</th></tr>
+        <tr><th>Channel</th><th>PPV (in/s)</th><th>ZC Freq</th></tr>
       </thead>
       <tbody>
-        ${rows.map(([ch, ppv]) => `<tr><td>${ch}</td><td>${fmt(ppv)}</td></tr>`).join('')}
-        <tr><td>MicL</td><td>${micStr}</td></tr>
+        ${rows.map(([ch, ppv, zc]) => `<tr><td>${ch}</td><td>${fmt(ppv)}</td><td>${zc}</td></tr>`).join('')}
+        <tr><td>MicL</td><td>${micStr}</td><td>${fmtZc(bwrMic)}</td></tr>
       </tbody>
     </table>
   `;
diff --git a/sfm/sfm_webapp.html b/sfm/sfm_webapp.html
index 5021c79..7f283a4 100644
--- a/sfm/sfm_webapp.html
+++ b/sfm/sfm_webapp.html
@@ -2886,6 +2886,12 @@ function _renderSidecar(data) {
   const bw   = data.blastware    || {};
   const src  = data.source       || {};
   const rev  = data.review       || {};
+  // bw_report carries the per-channel ASCII-derived stats (ZC Freq,
+  // saturation flags, peak time, etc.).  Only present on events
+  // ingested with a preserved .TXT (post-2026-05-27); falls back to
+  // empty for legacy events.
+  const bwrPeaks = (data.bw_report || {}).peaks || {};
+  const bwrMic   = (data.bw_report || {}).mic   || {};
 
   document.getElementById('sc-title').textContent = `Event — ${bw.filename || ev.waveform_key || 'unknown'}`;
 
@@ -2918,11 +2924,19 @@ function _renderSidecar(data) {
   document.getElementById('sc-f-sr').textContent       = (ev.sample_rate ?? '—') + (ev.sample_rate ? ' sps' : '');
   document.getElementById('sc-f-key').textContent      = ev.waveform_key    || '—';
 
-  document.getElementById('sc-f-tran').textContent     = fmtPpv(pv.transverse);
-  document.getElementById('sc-f-vert').textContent     = fmtPpv(pv.vertical);
-  document.getElementById('sc-f-long').textContent     = fmtPpv(pv.longitudinal);
+  // Suffix with " · {prefix}{N} Hz" when bw_report has a ZC Freq.
+  // Above-range ZC peaks (BW ">100 Hz") get a literal ">" prefix so
+  // operators see the same indicator the PDF shows.
+  const fmtZc = bwr => {
+    if (!bwr || bwr.zc_freq_hz == null) return '';
+    const prefix = bwr.zc_freq_above_range ? '>' : '';
+    return ` · ${prefix}${Math.round(bwr.zc_freq_hz)} Hz`;
+  };
+  document.getElementById('sc-f-tran').textContent     = fmtPpv(pv.transverse)   + fmtZc(bwrPeaks.tran);
+  document.getElementById('sc-f-vert').textContent     = fmtPpv(pv.vertical)     + fmtZc(bwrPeaks.vert);
+  document.getElementById('sc-f-long').textContent     = fmtPpv(pv.longitudinal) + fmtZc(bwrPeaks.long);
   document.getElementById('sc-f-pvs').textContent      = fmtPpv(pv.vector_sum);
-  document.getElementById('sc-f-mic').textContent      = fmtMic(pv.mic_psi);
+  document.getElementById('sc-f-mic').textContent      = fmtMic(pv.mic_psi)      + fmtZc(bwrMic);
 
   document.getElementById('sc-f-project').textContent  = pi.project         || '—';
   document.getElementById('sc-f-client').textContent   = pi.client          || '—';
-- 
2.52.0


From a3cc44d30a90b7c093e3b54a934d90b3d9e101e5 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 18:56:23 +0000
Subject: [PATCH 41/42] feat(backfill): --reparse-txt flag to refresh bw_report
 from preserved .TXT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing backfill_sidecars.py PRESERVES the bw_report block across
regenerations — it's treated as the source of truth from the original
ingest pass (the .TXT isn't reachable from the script's normal data
path, so it can't be re-derived).

That means parser-side fixes (like the 2026-05-28 ">100 Hz" ZC Freq
addition) won't reach old events even with --force.  The new
--reparse-txt flag fixes that: when the sidecar's source.txt_filename
points at a preserved <serial>/<filename>_ASCII.TXT, the script re-runs
the current parser against it and overwrites the bw_report block.

Implies sidecar regeneration on every event (bypasses the
sha-up-to-date / version-up-to-date skip), so that the .h5 cascade-
regenerates alongside.  No-op for events without a preserved .TXT
(legacy ingests pre-2026-05-27).  Idempotent — re-running it produces
the same sidecar bytes when the parser hasn't changed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/backfill_sidecars.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/scripts/backfill_sidecars.py b/scripts/backfill_sidecars.py
index 04583f7..04789a5 100644
--- a/scripts/backfill_sidecars.py
+++ b/scripts/backfill_sidecars.py
@@ -103,6 +103,17 @@ def main(argv=None) -> int:
             "STRT-rectime byte-offset fix in v0.15.x)."
         ),
     )
+    p.add_argument(
+        "--reparse-txt", action="store_true",
+        help=(
+            "Re-parse the preserved <serial>/<filename>_ASCII.TXT with the "
+            "current bw_ascii_report parser and overwrite the sidecar's "
+            "bw_report block.  Use this after upgrading the ASCII parser to "
+            "pull in new fields (e.g. zc_freq_above_range for BW '>100 Hz' "
+            "ZC peaks).  No-op for events without a preserved .TXT; safely "
+            "idempotent when the parser hasn't changed."
+        ),
+    )
     p.add_argument("-v", "--verbose", action="store_true")
     args = p.parse_args(argv)
 
@@ -153,7 +164,7 @@ def main(argv=None) -> int:
             # of the sidecar implies staleness of the derived .h5 (both
             # come out of the same decoder).
             sidecar_stale = True
-            if sidecar_path.exists() and not args.force:
+            if sidecar_path.exists() and not args.force and not args.reparse_txt:
                 try:
                     existing = event_file_io.read_sidecar(sidecar_path)
                     sha_ok = existing.get("blastware", {}).get("sha256") == bw_sha
@@ -314,6 +325,24 @@ def main(argv=None) -> int:
                     except Exception:
                         pass
 
+                # --reparse-txt: if a .TXT is preserved on disk, run the
+                # current parser against it and overwrite the bw_report
+                # block.  Picks up post-ingest parser fixes (e.g. the
+                # 2026-05-28 zc_freq_above_range / ">100 Hz" addition).
+                if args.reparse_txt and preserved_txt_fn:
+                    try:
+                        from minimateplus import bw_ascii_report
+                        txt_path = store.txt_path_for(serial, path.name)
+                        if txt_path.exists():
+                            refreshed = bw_ascii_report.parse_report_file(txt_path)
+                            preserved_bw_report = event_file_io._bw_report_to_dict(refreshed)
+                            log.debug("reparsed bw_report from %s", txt_path.name)
+                        else:
+                            log.debug("--reparse-txt: no .TXT at %s (sidecar says %r)",
+                                      txt_path, preserved_txt_fn)
+                    except Exception as exc:
+                        log.warning("--reparse-txt failed for %s: %s", path.name, exc)
+
                 # Overlay BW ASCII report fields onto the rebuilt Event
                 # BEFORE the sidecar + DB write.  Mirrors what the ingest
                 # path does — BW's reported peaks (and sample_rate /
-- 
2.52.0


From 1bccc44b8844134a8c8ffc1c856b9dc701b9086e Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Thu, 28 May 2026 21:17:53 +0000
Subject: [PATCH 42/42] =?UTF-8?q?release:=20v0.20.0=20=E2=80=94=20PDF=20+?=
 =?UTF-8?q?=20parser=20polish?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes out the Event-Report PDF iteration started in v0.17.x and ships
the parser fixes the real-world events were tripping over.

Today's additions on top of the pre-v0.20 unreleased body:

- Server-wide display TZ via the TZ env var (default America/New_York
  on prod).  Affects server logs, the PDF report's "Created" footer,
  matplotlib datetime axes.  DB columns stay UTC.  Dockerfile now
  installs tzdata.
- ZC Freq "above-range" handling — parser stores 100.0 +
  zc_freq_above_range flag for BW's ">100 Hz" marker.  Renders as
  >100 in the PDF stats table, both modals (inline on webapp Peaks,
  new column on event-browser table).
- scripts/backfill_sidecars.py --reparse-txt — re-runs the current
  parser against the preserved _ASCII.TXT and overwrites the
  sidecar's bw_report block.  Lets parser fixes reach old events
  without re-forwarding.  Validated end-to-end against ~10k prod
  events.

Fixes shipped today:
- histogram_interval_size_s missing from ReportData → every
  histogram PDF render 500'd.
- Histogram PDF geo channels now share a nice-quantized y-axis
  (0.005-LSB-aware 1-2-5 step sequence) instead of auto-scaling
  per channel + inventing sub-LSB "0.003 in/s/div" footer labels.

Roadmap delta: closes the BW ASCII parser "PPV-miss on some TXT
formats", "histogram-specific structural fields", and ">100 Hz value
parsing" items.  Adds a new entry for the byte[5]==0 histogram body
sub-format observed on S353 events.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md   | 47 +++++++++++++++++++++++++++++++++++++++++++----
 CLAUDE.md      |  2 +-
 README.md      | 18 ++++++++++++++----
 pyproject.toml |  2 +-
 4 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d1a575..1b92776 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,46 @@ All notable changes to seismo-relay are documented here.
 
 ## [Unreleased]
 
-### Fixed
+---
+
+## v0.20.0 — 2026-05-28
+
+The "PDF + parser polish" release.  Closes out the Event-Report PDF iteration started in v0.17.x: histogram layouts now render correctly against BW reference PDFs, the ASCII parser handles the real-world edge cases production events were tripping over (OORANGE, `>100 Hz`, histogram timestamps), and the `.TXT` preservation rollout lets parser fixes be applied retroactively to ingested events.  Adds server-wide timezone support so operator-visible timestamps no longer drift into UTC.  Rolls up the substantial "pre-v0.20" body of work that had accumulated under `[Unreleased]` (PDF generation, histogram codec fix, histogram parser fields, `.TXT` preservation, backfill safety) — see the trailing "pre-v0.20.0 work" section below for the full list.
+
+### Added (2026-05-28)
+
+- **Server-wide display timezone via `TZ` env var.**  Both seismo-relay and terra-view now respect a `TZ` environment variable (default `America/New_York` on prod).  Affects server log timestamps, the PDF report renderer's UTC→local conversions on the "Created" footer line, matplotlib's datetime axes, and any other naïve-vs-aware datetime rendering.  DB columns (`created_at`, etc.) stay UTC regardless — this is a display-side fix, not a storage-side one.  Dockerfile now installs `tzdata` (required for the env var to take effect under `python:slim`).  Override per-deployment via the `TZ` line in `docker-compose.yml`.
+- **ZC Freq "above-range" handling — render `>100 Hz` instead of `—`.**  BW writes `">100 Hz"` literally when the zero-crossing algorithm sees a peak too fast to count (device cuts off at 100 Hz on V10.72).  Previously `_parse_number(">100")` returned None and the PDF stats table rendered `—`.  Now the parser mirrors the OORANGE pattern: stores 100.0 on `zc_freq_hz` and sets a new `zc_freq_above_range` flag.  Flag rides through the sidecar's `bw_report` block.  Renders as `>100` in the PDF (per-channel + mic block), as `· >100 Hz` inline on the event modal's Peaks section, and as a dedicated column on the event-browser stats table.  Verified against the real T190LD5Q.LK0W fixture from 2026-05-27 plus a synthetic test case.
+- **Per-channel ZC Freq surfaced in event modals.**  Neither the main webapp modal (`sfm_webapp.html`) nor the standalone event browser (`event_browser.html`) previously exposed ZC Freq.  Now both do — webapp shows it inline alongside PPV (`0.04500 in/s · 47 Hz`); event-browser gets a dedicated column on its per-channel stats table.  Required wiring a parallel sidecar fetch into the event-browser's `loadEvent()` (it was only fetching `waveform.json`).  Falls back to `—` for events without a preserved `.TXT` (pre-2026-05-27 ingests).
+- **`scripts/backfill_sidecars.py --reparse-txt` flag.**  Before this, the backfill script preserved the `bw_report` block from existing sidecars verbatim — so parser-side fixes (like the `>100 Hz` addition above) couldn't reach old events.  The new flag re-runs the current parser against the preserved `<serial>/<filename>_ASCII.TXT`, overwrites the bw_report block, and cascade-regenerates the sidecar.  Implies sidecar regeneration on every event (bypasses the sha/version skip).  No-op for events without a preserved .TXT (legacy ingests pre-2026-05-27 .TXT-preservation rollout).  Idempotent.  Run with `--skip-hdf5` to skip waveform regen — recommended when only the bw_report needs refreshing.  Validated end-to-end on prod: 9,999 events refreshed cleanly, ZC Freq + OORANGE flags now populated where the original .TXT had them.
+
+### Fixed (2026-05-28)
+
+- **Histogram PDFs no longer 500 on the missing `histogram_interval_size_s` attribute.**  The histogram-interval-times derivation block in `gather_report_data` referenced `rd.histogram_interval_size_s`, but the field was never declared on the `ReportData` dataclass nor read from the sidecar projection (it was inlined into `gather_report_data` without the seconds-numeric counterpart making it onto the dataclass).  Every histogram PDF render raised `AttributeError → 500`.  Waveform PDFs were unaffected.  Fix: add the field, read it from the projection's existing `bw_report.histogram.interval_size_s` key.
+- **Histogram PDF geo channels now share a single nice-quantized y-axis.**  Previously each geo subplot auto-scaled independently — Tran, Vert, and Long all showed different per-channel maxes, so bar heights weren't directly comparable across channels.  The footer "Amplitude Geo: X in/s/div" label was also computed as `max(first_geo_channel) / 5` with no LSB quantization, producing nonsense values like `0.003 in/s/div` when the geophone LSB is 0.005.  Fix: compute a single shared geo y-axis range from `max(Tran, Vert, Long)`, quantize the per-division step to BW's 1-2-5 sequence rounded to the 0.005 in/s LSB (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, ...), apply the same `ylim` + ticks to all three subplots, and use that step for the footer label.  MicL stays on its own auto-scale (different units).  Matches BW's chart styling.
+
+### Docs (2026-05-28)
+
+- **Roadmap entry for a second undecoded histogram body sub-format.**  BE17353 (S353) events observed on 2026-05-28 use a histogram body where `byte[5] = 0x00` (looks like a valid block header by every prior signal) but the walker finds zero data blocks.  Different from the existing `byte[5] != 0` roadmap entry (T190 / O121).  Operationally identical impact — ingestion succeeds, DB peaks come from the bw_report overlay, only the chart is empty.  Sample events captured in the roadmap entry for future RE work.
+
+### Migration / Operations
+
+- **Re-parse existing events to pick up the new parser fields.**  Run on whichever box hosts the live waveform store:
+  ```bash
+  docker exec terra-view-sfm-1 python /app/scripts/backfill_sidecars.py \
+      --reparse-txt --skip-hdf5 --dry-run -v | tail
+  # Looks reasonable?  Run for real:
+  docker exec terra-view-sfm-1 python /app/scripts/backfill_sidecars.py \
+      --reparse-txt --skip-hdf5 -v | tee /tmp/reparse.log | tail -30
+  ```
+  Idempotent; safe to re-run.  Only touches sidecars on disk — no DB writes.
+- **terra-view docker-compose.yml**: add `TZ=America/New_York` (or your deployment's zone) to both the `terra-view` and `sfm` service `environment:` blocks.  Without this, server-rendered timestamps stay in UTC even on the rebuilt SFM image.
+
+### Pre-v0.20.0 work (rolled into this release)
+
+The bullets below accumulated under `[Unreleased]` between v0.19.0 and v0.20.0; kept here so the historical narrative isn't lost.
+
+#### Fixed
 
 - **bw_ascii_report parser now handles `OORANGE` saturation marker.**  BW writes `"OORANGE"` (truncation of "Out Of Range") in PPV / PVS / MicL PSPL fields when the underlying measurement exceeded the channel's full-scale.  Previously our `_parse_number()` returned None → DB ended up with NULL peaks for legitimate high-amplitude events.  Confirmed on real ASCII files pulled 2026-05-27 from the Windows watcher PC: T190LD5Q.LK0W (Vert saturated at Normal range 10 in/s), T438L713.RY0W (all three channels saturated at Sensitive range 1.25 in/s), K557L3YM.OE0W (Tran+Vert saturated + Mic PSPL OORANGE).  New behavior:
    - Per-channel PPV: substitute `geo_range_ips` as a conservative lower bound + set `ppv_saturated` flag
@@ -16,7 +55,7 @@ All notable changes to seismo-relay are documented here.
    - Five events on prod (T190 / T438 / K557 + 2 others matching the same fault pattern) will pick up correct DB peaks + saturation flags once re-forwarded
 - **bw_ascii_report parser handles `Peak Vector Sum TimeSum` typo'd label.**  Real BW output uses this misspelled label (Sum appended twice instead of "Peak Vector Sum Time").  Now accepted as an alias.  Confirmed against all three OORANGE example files — every one has the typo.
 
-### Added
+#### Added
 
 - **Histogram per-interval aggregation in `waveform.json`.**  Histogram events now render with one bar per BW-reported interval (matching the Blastware printout) instead of ~200 bars per event (the raw codec output).  When the sidecar's `bw_report.histogram.n_intervals` is populated (events ingested with the new parser, see next bullet), the `/db/events/{id}/waveform.json` endpoint groups the codec samples into N intervals via max-per-group and returns the aggregated array.  `time_axis` gains `histogram_aggregated: true`, `n_intervals`, `interval_size_s`, and `interval_times` (HH:MM:SS strings).  Both the modal chart and the standalone event browser use those interval timestamps as x-axis labels when present.  Defensive: no-op for events ingested before the parser extension landed (their sidecars lack `histogram.n_intervals`) — those continue to render with raw codec output.
 - **`bw_ascii_report` parser now captures histogram-specific fields.**  Previously the parser dropped these fields silently (Roadmap item closed):
@@ -43,13 +82,13 @@ All notable changes to seismo-relay are documented here.
 - **`apply_bw_report_dict_to_event` helper** in `minimateplus.event_file_io`.  Mirror of `apply_report_to_event` for the projected sidecar dict shape — used by the backfill path, which has the preserved `bw_report` block but not the original `.TXT` file.  BW's reported peaks (and `sample_rate` / `record_time`) now win over codec output during `--force` backfill, matching ingest-path behavior.
 - **`scripts/check_bw_report_preservation.py`** — two-step snapshot/diff tool to verify that `backfill_sidecars.py` doesn't wipe the `bw_report` block from existing sidecars.  Classifies every sidecar as PRESERVED / CHANGED / WIPED / STILL_MISSING / NEW / ADDED / REMOVED.  Exit code 1 if any WIPED or CHANGED entries are found, so it can gate a CI step or deploy script.
 
-### Fixed
+#### Fixed
 
 - **`scripts/backfill_sidecars.py` no longer wipes `bw_report`.**  Before this fix, `event_to_sidecar_dict` silently dropped the preserved `bw_report` block during every backfill, since the function only emits a `bw_report` when called with a live `BwAsciiReport` dataclass (which the backfill doesn't have — only the projected sidecar dict).  Now we read the existing sidecar's `bw_report` and overlay it onto the regenerated sidecar, alongside the existing `review` and `extensions` preservation.
 - **`scripts/backfill_sidecars.py --force` no longer overwrites BW-overlaid DB peaks with codec output.**  The backfill path now calls `apply_bw_report_dict_to_event` before the DB upsert, mirroring what the ingest path does (`/db/import/blastware_file` parses the `.TXT` into a `BwAsciiReport`, calls `apply_report_to_event`, then upserts).  Without this, events where the codec doesn't fully decode (waveform walker edge cases on SP0/SS0/SV0-style events, histogram `byte[5]!=0` sub-format) ended up with PVS=0 in the DB after a `--force` backfill; bit on prod 2026-05-22, rolled back the same day.
 - **Thor IDF files no longer attempted as BW events in backfill.**  `scripts/backfill_sidecars.py` now filters out `.IDFW` / `.IDFH` files in `_looks_like_event_file()`; they share the `.X0W` / `.X0H` suffix shape but use a separate ingest path (`WaveformStore.save_imported_idf`) and aren't decodable by `event_file_io.read_blastware_file`.
 
-### Docs
+#### Docs
 
 - **CLAUDE.md** — added a three-tier conceptual architecture model (SFM / SDM / shared codec library) near the top of the file, with a placement rule for where new code goes.  Documents that what is conceptually SDM (database, waveform store, ingest, `/db/*` endpoints) still lives under `sfm/` for historical reasons; rename deferred until the codebase is quiet enough for a clean refactor.
 - **README.md** — added a "Strategic direction" lead-in to the Roadmap that frames seismo-relay as a suite of cooperating components (not a single app), and an explicit "Terra-View ↔ SFM device control" roadmap section with a concrete implementation checklist (auth as hard prerequisite, embedded live-monitor view, action history, Series IV live-device support).
diff --git a/CLAUDE.md b/CLAUDE.md
index e46b30b..c2892d6 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,7 +2,7 @@
 
 Ground-up Python replacement for **Blastware**, Instantel's Windows-only software for
 managing MiniMate Plus seismographs. Connects over direct RS-232 or cellular modem
-(Sierra Wireless RV50 / RV55). Current version: **v0.17.0**.
+(Sierra Wireless RV50 / RV55). Current version: **v0.20.0**.
 
 When new information about the protocol is discovered, please update the instantel_protocol_reference.md with the findings in addition to this document
 
diff --git a/README.md b/README.md
index 114a943..7522bb1 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# seismo-relay  `v0.19.0`
+# seismo-relay  `v0.20.0`
 
 A ground-up replacement for **Blastware** — Instantel's aging Windows-only
 software for managing seismographs.  Supports both the **MiniMate Plus
@@ -35,6 +35,16 @@ over direct RS-232 or cellular modem (Sierra Wireless RV50 / RV55).
 > and storage layer dispatch deterministically instead of sniffing
 > filenames.  Self-applying migration backfills existing rows from the
 > binary filename extension.
+> **v0.20.0 (2026-05-28)** closes out the Event-Report PDF iteration
+> started in v0.17.x: histogram layouts render correctly against BW
+> reference PDFs, the ASCII parser handles real-world edge cases
+> (`OORANGE`, `>100 Hz`, histogram timestamps), and per-channel ZC
+> Freq is surfaced in both modals (event browser + main webapp).
+> Adds a server-wide `TZ` env var so operator-visible timestamps
+> render in local time instead of UTC.  New
+> `scripts/backfill_sidecars.py --reparse-txt` lets parser fixes be
+> applied retroactively to existing events without re-forwarding,
+> using the `.TXT` files preserved at ingest time.
 > See [CHANGELOG.md](CHANGELOG.md) for full version history.
 
 ---
@@ -536,10 +546,10 @@ Implementation steps (concrete):
 
 ### BW ASCII report parser enhancements (built in v0.16.0)
 
-- [ ] **PPV field misses on certain TXT formats.**  Discovered 2026-05-22 during the histogram-codec backfill validation: a handful of events (5 in prod) have a `bw_report` block where `peaks.{tran,vert,long}.ppv_ips` and `peaks.vector_sum.ips` are all `None`, despite the parser correctly extracting every OTHER field for the same channels (zc_freq_hz, time_of_peak_s, peak_accel_g, peak_disp_in).  Symptom on the DB side: `peak_vector_sum=0` after a `--force` backfill that overlays from the parsed bw_report dict.  Affected events on prod include `T190LD5Q.LK0W`, `T438L713.RY0W`, `K557L3YM.OE0W`.  Root cause likely a regex or format mismatch for the "PPV" header line in those specific firmware/event-type outputs.  Once fixed, re-forwarding the events from series3-watcher will re-populate the `bw_report` blocks correctly.
-- [ ] **Histogram-specific structural fields.**  Current parser handles the shared fields (PPV, ZC Freq, sensor self-check, project) but silently drops histogram-only fields: `Histogram Start/Stop Time`, `Histogram Start/Stop Date`, `Number of Intervals`, `Interval Size`, per-channel `Peak Time` + `Peak Date` (absolute timestamps rather than the waveform's `Time of Peak` relative seconds).
+- [x] **PPV field misses on certain TXT formats.** ✅ v0.20.0 — root cause was the `OORANGE` (Out Of Range) saturation marker that BW writes when a channel exceeds its full-scale; `_parse_number()` returned None for the non-numeric value.  Parser now substitutes `geo_range_ips` as a lower bound + sets `ppv_saturated` flag.  All 5 prod events (T190LD5Q.LK0W, T438L713.RY0W, K557L3YM.OE0W, + 2 others) now parse cleanly.
+- [x] **Histogram-specific structural fields.** ✅ v0.20.0 — `Histogram Start/Stop Time+Date`, `Number of Intervals`, `Interval Size`, per-channel `Peak Time` + `Peak Date`, and `Peak Vector Sum Date` all parse now.  Land in the sidecar's `bw_report.histogram` block.
 - [ ] **Histogram interval bin-table parsing.**  Trailing 792-row table (per-interval Peak/Freq per channel + MicL) in histogram TXTs is unparsed.  Probably too big for the sidecar JSON; may want a separate `.histogram.h5` companion file.
-- [ ] **`>100 Hz` value parsing.**  Histogram TXTs use `>100 Hz` for out-of-range ZC freq; current `_parse_number()` returns `None` for these (loses information).
+- [x] **`>100 Hz` value parsing.** ✅ v0.20.0 — parser now mirrors the OORANGE pattern: stores 100.0 on `zc_freq_hz` + sets `zc_freq_above_range` flag.  PDF + both modals render `>100 Hz` instead of `—`.
 
 ### Ingestion gaps
 
diff --git a/pyproject.toml b/pyproject.toml
index d5db51f..5151f55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "seismo-relay"
-version = "0.19.0"
+version = "0.20.0"
 description = "Python client and REST server for MiniMate Plus seismographs"
 requires-python = ">=3.10"
 dependencies = [
-- 
2.52.0