merge: update to 0.17.0 #21
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
scripts/backfill_record_type.py — fix `record_type` on legacy event
|
||||
rows whose value was hardcoded to "Waveform" regardless of actual type.
|
||||
|
||||
Why this is needed
|
||||
──────────────────
|
||||
Pre-v0.16.1 the BW file importer (`event_file_io.read_blastware_file`)
|
||||
hardcoded `ev.record_type = "Waveform"` for every imported event. Fixed
|
||||
in commit aac1c8e — new ingests now derive the type from the Blastware
|
||||
filename's extension last character (H=Histogram, W=Waveform, M=Manual,
|
||||
E=Event, C=Combo) per the V10.72+ MiniMate Plus AB0T filename scheme.
|
||||
|
||||
Effect on a server that imported events under the old code: every
|
||||
events row has `record_type = "Waveform"`, even for histograms,
|
||||
manuals, etc. Visible in terra-view's event-detail modal under the
|
||||
"Record Type" field. Terra-view also has a client-side workaround
|
||||
that derives the type from the filename for display purposes, so
|
||||
operators see the correct type in the UI even before this backfill.
|
||||
This script makes the DB column match what the UI is already showing,
|
||||
which matters for reporting and any downstream consumer that reads
|
||||
events.record_type directly.
|
||||
|
||||
This script
|
||||
───────────
|
||||
Walks the `events` table and updates each row's `record_type` to the
|
||||
derived value from its `blastware_filename`. Old S338 firmware files
|
||||
(3-char extensions ending in `0`) and any unrecognized suffix get
|
||||
left at the existing value (defaults to "Waveform").
|
||||
|
||||
Idempotent: re-running after a successful backfill finds zero rows
|
||||
needing updates and exits cleanly (it always re-derives but only
|
||||
writes when the value would change).
|
||||
|
||||
Usage
|
||||
─────
|
||||
# Dry-run (default): print what would change, don't touch the DB
|
||||
python -m scripts.backfill_record_type --db bridges/captures/seismo_relay.db
|
||||
|
||||
# Apply the backfill
|
||||
python -m scripts.backfill_record_type --db bridges/captures/seismo_relay.db --apply
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Must stay in sync with minimateplus.event_file_io._RECORD_TYPE_BY_EXT_SUFFIX.
|
||||
_TYPE_FROM_SUFFIX = {
|
||||
"H": "Histogram",
|
||||
"W": "Waveform",
|
||||
"M": "Manual",
|
||||
"E": "Event",
|
||||
"C": "Combo",
|
||||
}
|
||||
|
||||
|
||||
def derive_record_type(filename: str | None, default: str = "Waveform") -> str:
|
||||
"""Mirror of minimateplus.event_file_io.derive_record_type_from_filename.
|
||||
|
||||
Vendored here so this script runs without needing the seismo-relay
|
||||
package on the Python path (useful on prod where you might be
|
||||
running it via `docker exec` against a container's DB volume).
|
||||
"""
|
||||
if not filename:
|
||||
return default
|
||||
name = Path(filename).name
|
||||
if "." not in name:
|
||||
return default
|
||||
ext = name.rsplit(".", 1)[1]
|
||||
if not ext:
|
||||
return default
|
||||
return _TYPE_FROM_SUFFIX.get(ext[-1].upper(), default)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("--db", required=True, help="Path to seismo_relay.db")
|
||||
ap.add_argument("--apply", action="store_true",
|
||||
help="Actually write changes (default is dry-run).")
|
||||
ap.add_argument("--default", default="Waveform",
|
||||
help="Fallback record_type when filename doesn't encode one. "
|
||||
"Default: Waveform (matches the pre-fix bug's behavior).")
|
||||
args = ap.parse_args()
|
||||
|
||||
db_path = Path(args.db)
|
||||
if not db_path.exists():
|
||||
print(f"ERROR: database not found at {db_path}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("""
|
||||
SELECT id, blastware_filename, record_type
|
||||
FROM events
|
||||
WHERE blastware_filename IS NOT NULL
|
||||
AND blastware_filename != ''
|
||||
""")
|
||||
rows = cur.fetchall()
|
||||
total = len(rows)
|
||||
print(f"Scanning {total:,} event rows…")
|
||||
print()
|
||||
|
||||
# Tally proposed changes.
|
||||
transitions: Counter[tuple[str, str]] = Counter()
|
||||
update_ids: list[tuple[str, str]] = []
|
||||
unrecognized = 0
|
||||
|
||||
for row in rows:
|
||||
derived = derive_record_type(row["blastware_filename"], default=args.default)
|
||||
current = row["record_type"] or ""
|
||||
if derived == current:
|
||||
continue
|
||||
transitions[(current, derived)] += 1
|
||||
update_ids.append((row["id"], derived))
|
||||
|
||||
if not update_ids:
|
||||
print("Nothing to update — all rows already match.")
|
||||
conn.close()
|
||||
return 0
|
||||
|
||||
print(f"{len(update_ids):,} row(s) need updating:")
|
||||
for (old, new), count in sorted(transitions.items(), key=lambda x: -x[1]):
|
||||
print(f" {count:>6,} {old!r:14s} → {new!r}")
|
||||
print()
|
||||
|
||||
if not args.apply:
|
||||
print("(dry-run — re-run with --apply to write changes)")
|
||||
conn.close()
|
||||
return 0
|
||||
|
||||
print("Applying changes…")
|
||||
cur.executemany(
|
||||
"UPDATE events SET record_type = ? WHERE id = ?",
|
||||
[(new, eid) for eid, new in update_ids],
|
||||
)
|
||||
conn.commit()
|
||||
print(f"Done. Updated {cur.rowcount:,} row(s).")
|
||||
conn.close()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user