From 6b2a44ff02f8850d892c545e8a65e52b0d0e7a42 Mon Sep 17 00:00:00 2001 From: serversdown Date: Mon, 11 May 2026 05:51:39 +0000 Subject: [PATCH] fix(import): overlay BW report onto Event + upsert DB row on re-import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two compounding bugs caused forwarded events to land in the DB with broken-codec peak values (~10 in/s saturation on every channel) and no project info, even when the watcher correctly paired a BW ASCII report with the binary. Bug 1: save_imported_bw built the sidecar JSON with the report's authoritative peak / project values via event_to_sidecar_dict( bw_report=...), but never overlaid those onto the in-memory Event that flows to db.insert_events(). So the DB row got peak_values from read_blastware_file()._peaks_from_samples() — which runs the still-undecoded waveform body codec assuming raw int16 LE and produces ±32K-shaped noise (= ±10 in/s at Normal range) regardless of the actual signal. The sidecar JSON had the truth but the DB columns (which the webapp queries for fast filter/sort) lied. Bug 2: insert_events' IntegrityError handler only refreshed the filename/filesize/a5_pickle/sidecar columns when a duplicate (serial, timestamp) was seen. Peak values, project info, sample_rate, record_type stayed locked in at whatever the FIRST insert wrote. So even after Bug 1 was fixed, the historical events in the DB (already inserted with broken-codec peaks) would never get their values corrected, because a re-forward would just hit IntegrityError and skip the field refresh. Fix 1 (minimateplus/event_file_io.py + sfm/waveform_store.py): - New apply_report_to_event(event, report) helper folds the BW report's device-authoritative fields onto the Event in-place: per-channel PPV, peak vector sum, mic PSPL→psi, project / client / operator / sensor_location, sample_rate, record_time. - save_imported_bw() calls the helper right after parsing the report. The Event that flows to insert_events() now carries correct values. Fix 2 (sfm/database.py): - insert_events()'s IntegrityError UPDATE now refreshes every device-authoritative column from the new data: tran_ppv, vert_ppv, long_ppv, peak_vector_sum, mic_ppv, project, client, operator, sensor_location, sample_rate, record_type, plus the existing filename/filesize/a5_pickle/sidecar fields. - Preserves: id, waveform_key, session_id, created_at (immutable / FK fields), and false_trigger (operator review state). End-to-end simulation verified: - Step 1: import without report → DB has ±10 in/s peaks, no project - Step 2: re-import WITH report → upsert path fires, DB now has device-authoritative 0.005 in/s peaks + sensor_location - Step 3: operator sets false_trigger=1, re-import again → flag preserved, peaks remain correct For the user's situation: deleting the watcher state file forces a re-forward of all events. Each re-forward now pairs with its _ASCII.TXT, applies the report onto the Event, and the upsert refreshes the DB row. No DB nuke needed. Full SFM suite: 62 passed, 44 skipped. --- minimateplus/event_file_io.py | 57 +++++++++++++++++++++++++ sfm/database.py | 80 +++++++++++++++++++++++++---------- sfm/waveform_store.py | 17 ++++++++ 3 files changed, 132 insertions(+), 22 deletions(-) diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py index e5f180b..ae76064 100644 --- a/minimateplus/event_file_io.py +++ b/minimateplus/event_file_io.py @@ -195,6 +195,63 @@ def _dbl_to_psi(pspl_dbl: float) -> float: return _DBL_REF_PSI * (10.0 ** (pspl_dbl / 20.0)) +def apply_report_to_event(event: Event, report: BwAsciiReport) -> None: + """Overlay device-authoritative fields from a parsed BW ASCII report + onto an in-memory Event, IN-PLACE. + + Why this exists + ─────────────── + `read_blastware_file()` parses the BW binary and fills `Event.peak_values` + via `_peaks_from_samples()` — which runs the (still-undecoded) BW body + codec assuming raw int16 LE and produces ±32K-shaped noise on every + channel. Result: peak values land in the SeismoDb event row as + ~10 in/s on every event regardless of the actual signal. + + When a paired BW ASCII report is available, the report carries the + device's own authoritative peak / project / sample-rate / record-time + values. This helper folds those onto the Event before it flows to + `SeismoDb.insert_events()`, so the DB columns reflect the report + rather than the broken-codec output. + + Fields overlaid (only when the report supplies a non-None value): + - peak_values.tran / .vert / .long (from report.channels) + - peak_values.peak_vector_sum (from report.peak_vector_sum_ips) + - peak_values.micl (psi) (from report.mic.pspl_dbl → psi) + - project_info.project / .client / .operator / .sensor_location + - sample_rate (from report.sample_rate_sps) + - rectime_seconds (from report.record_time_s) + + Fields NOT touched (operator-edit / parser-output preserved): + - timestamp, raw_samples, record_type, total_samples, + pretrig_samples, _waveform_key, _a5_frames, _raw_record + - false_trigger and review state (those live on the sidecar, not on Event) + """ + if event.peak_values is None: + event.peak_values = PeakValues() + pv = event.peak_values + ch = report.channels + if (t := ch.get("Tran")) and t.ppv_ips is not None: pv.tran = t.ppv_ips + if (v := ch.get("Vert")) and v.ppv_ips is not None: pv.vert = v.ppv_ips + if (l := ch.get("Long")) and l.ppv_ips is not None: pv.long = l.ppv_ips + if report.peak_vector_sum_ips is not None: + pv.peak_vector_sum = report.peak_vector_sum_ips + if report.mic.pspl_dbl is not None and report.mic.pspl_dbl > 0: + pv.micl = _dbl_to_psi(report.mic.pspl_dbl) + + if event.project_info is None: + event.project_info = ProjectInfo() + pi = event.project_info + if report.project: pi.project = report.project + if report.client: pi.client = report.client + if report.operator: pi.operator = report.operator + if report.sensor_location: pi.sensor_location = report.sensor_location + + if report.sample_rate_sps: + event.sample_rate = report.sample_rate_sps + if report.record_time_s is not None: + event.rectime_seconds = report.record_time_s + + def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict: if pi is None: return { diff --git a/sfm/database.py b/sfm/database.py index 156d31d..88497d8 100644 --- a/sfm/database.py +++ b/sfm/database.py @@ -374,28 +374,64 @@ class SeismoDb: inserted += 1 except sqlite3.IntegrityError: skipped += 1 - # Upsert waveform fields onto the existing dedup row so a - # re-download via the live endpoint refreshes filename / - # size / sidecar without churning the rest of the row. - if rec and ts: - conn.execute( - """ - UPDATE events - SET blastware_filename = ?, - blastware_filesize = ?, - a5_pickle_filename = ?, - sidecar_filename = ? - WHERE serial = ? AND timestamp = ? - """, - ( - rec.get("filename"), - rec.get("filesize"), - rec.get("a5_pickle_filename"), - rec.get("sidecar_filename"), - serial, - ts, - ), - ) + # UPSERT path: a row for this (serial, timestamp) already + # exists. Refresh every device-authoritative field from + # the new data so that a re-import with better data (e.g. + # a watcher re-forward where the previous attempt missed + # the paired BW ASCII report) replaces stale peaks / + # project info / sample_rate. + # + # Preserved (not in this UPDATE): + # id, waveform_key, session_id, created_at — immutable / FK + # false_trigger — operator review state + # + # Behaviour change vs prior versions: this UPDATE used + # to only refresh filename / filesize / a5_pickle / + # sidecar fields. As a result, the first insert's + # broken-codec peak values were locked in forever even + # if subsequent re-forwards arrived with correct + # report-derived values. Now every re-import lifts the + # DB row up to whatever the latest Event carries. + conn.execute( + """ + UPDATE events + SET tran_ppv = ?, + vert_ppv = ?, + long_ppv = ?, + peak_vector_sum = ?, + mic_ppv = ?, + project = ?, + client = ?, + operator = ?, + sensor_location = ?, + sample_rate = ?, + record_type = ?, + blastware_filename = ?, + blastware_filesize = ?, + a5_pickle_filename = ?, + sidecar_filename = ? + WHERE serial = ? AND timestamp = ? + """, + ( + pv.tran if pv else None, + pv.vert if pv else None, + pv.long if pv else None, + pv.peak_vector_sum if pv else None, + pv.micl if pv else None, + pi.project if pi else None, + pi.client if pi else None, + pi.operator if pi else None, + pi.sensor_location if pi else None, + ev.sample_rate, + ev.record_type, + rec.get("filename") if rec else None, + rec.get("filesize") if rec else None, + rec.get("a5_pickle_filename") if rec else None, + rec.get("sidecar_filename") if rec else None, + serial, + ts, + ), + ) log.debug("insert_events serial=%s inserted=%d skipped=%d", serial, inserted, skipped) diff --git a/sfm/waveform_store.py b/sfm/waveform_store.py index 10b3d23..93cd970 100644 --- a/sfm/waveform_store.py +++ b/sfm/waveform_store.py @@ -314,6 +314,23 @@ class WaveformStore: exc, ) + # If we have a report, overlay its device-authoritative fields + # (peaks, project, sample_rate, record_time) onto the Event + # BEFORE handing it to db.insert_events(). Without this overlay + # the DB row gets `peak_values` from _peaks_from_samples(), which + # runs the still-undecoded waveform codec on the BW body and + # produces ±10 in/s saturation values on every channel for every + # event. The sidecar JSON had the correct values via + # event_to_sidecar_dict(bw_report=...) but the DB columns didn't. + if bw_report is not None: + try: + event_file_io.apply_report_to_event(ev, bw_report) + except Exception as exc: + log.warning( + "save_imported_bw: failed to overlay report onto event: %s", + exc, + ) + # Resolve serial. blastware_filename derives a 4-char prefix from # the numeric serial (e.g. BE11529 → M529); we go the other way # via the source filename if a hint wasn't given.