merge: update to 0.17.0 #21

Merged
serversdown merged 5 commits from ach-report-ingestion into main 2026-05-17 19:13:57 -04:00
Showing only changes of commit b6911009ff - Show all commits
+150
View File
@@ -0,0 +1,150 @@
"""
scripts/backfill_record_type.py — fix `record_type` on legacy event
rows whose value was hardcoded to "Waveform" regardless of actual type.
Why this is needed
──────────────────
Pre-v0.16.1 the BW file importer (`event_file_io.read_blastware_file`)
hardcoded `ev.record_type = "Waveform"` for every imported event. Fixed
in commit aac1c8e — new ingests now derive the type from the Blastware
filename's extension last character (H=Histogram, W=Waveform, M=Manual,
E=Event, C=Combo) per the V10.72+ MiniMate Plus AB0T filename scheme.
Effect on a server that imported events under the old code: every
events row has `record_type = "Waveform"`, even for histograms,
manuals, etc. Visible in terra-view's event-detail modal under the
"Record Type" field. Terra-view also has a client-side workaround
that derives the type from the filename for display purposes, so
operators see the correct type in the UI even before this backfill.
This script makes the DB column match what the UI is already showing,
which matters for reporting and any downstream consumer that reads
events.record_type directly.
This script
───────────
Walks the `events` table and updates each row's `record_type` to the
derived value from its `blastware_filename`. Old S338 firmware files
(3-char extensions ending in `0`) and any unrecognized suffix get
left at the existing value (defaults to "Waveform").
Idempotent: re-running after a successful backfill finds zero rows
needing updates and exits cleanly (it always re-derives but only
writes when the value would change).
Usage
─────
# Dry-run (default): print what would change, don't touch the DB
python -m scripts.backfill_record_type --db bridges/captures/seismo_relay.db
# Apply the backfill
python -m scripts.backfill_record_type --db bridges/captures/seismo_relay.db --apply
"""
from __future__ import annotations
import argparse
import sqlite3
import sys
from collections import Counter
from pathlib import Path
# Must stay in sync with minimateplus.event_file_io._RECORD_TYPE_BY_EXT_SUFFIX.
_TYPE_FROM_SUFFIX = {
"H": "Histogram",
"W": "Waveform",
"M": "Manual",
"E": "Event",
"C": "Combo",
}
def derive_record_type(filename: str | None, default: str = "Waveform") -> str:
"""Mirror of minimateplus.event_file_io.derive_record_type_from_filename.
Vendored here so this script runs without needing the seismo-relay
package on the Python path (useful on prod where you might be
running it via `docker exec` against a container's DB volume).
"""
if not filename:
return default
name = Path(filename).name
if "." not in name:
return default
ext = name.rsplit(".", 1)[1]
if not ext:
return default
return _TYPE_FROM_SUFFIX.get(ext[-1].upper(), default)
def main() -> int:
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--db", required=True, help="Path to seismo_relay.db")
ap.add_argument("--apply", action="store_true",
help="Actually write changes (default is dry-run).")
ap.add_argument("--default", default="Waveform",
help="Fallback record_type when filename doesn't encode one. "
"Default: Waveform (matches the pre-fix bug's behavior).")
args = ap.parse_args()
db_path = Path(args.db)
if not db_path.exists():
print(f"ERROR: database not found at {db_path}", file=sys.stderr)
return 1
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
cur = conn.cursor()
cur.execute("""
SELECT id, blastware_filename, record_type
FROM events
WHERE blastware_filename IS NOT NULL
AND blastware_filename != ''
""")
rows = cur.fetchall()
total = len(rows)
print(f"Scanning {total:,} event rows…")
print()
# Tally proposed changes.
transitions: Counter[tuple[str, str]] = Counter()
update_ids: list[tuple[str, str]] = []
unrecognized = 0
for row in rows:
derived = derive_record_type(row["blastware_filename"], default=args.default)
current = row["record_type"] or ""
if derived == current:
continue
transitions[(current, derived)] += 1
update_ids.append((row["id"], derived))
if not update_ids:
print("Nothing to update — all rows already match.")
conn.close()
return 0
print(f"{len(update_ids):,} row(s) need updating:")
for (old, new), count in sorted(transitions.items(), key=lambda x: -x[1]):
print(f" {count:>6,} {old!r:14s}{new!r}")
print()
if not args.apply:
print("(dry-run — re-run with --apply to write changes)")
conn.close()
return 0
print("Applying changes…")
cur.executemany(
"UPDATE events SET record_type = ? WHERE id = ?",
[(new, eid) for eid, new in update_ids],
)
conn.commit()
print(f"Done. Updated {cur.rowcount:,} row(s).")
conn.close()
return 0
if __name__ == "__main__":
sys.exit(main())