diff --git a/scripts/backfill_record_type.py b/scripts/backfill_record_type.py new file mode 100644 index 0000000..b2d5202 --- /dev/null +++ b/scripts/backfill_record_type.py @@ -0,0 +1,150 @@ +""" +scripts/backfill_record_type.py — fix `record_type` on legacy event +rows whose value was hardcoded to "Waveform" regardless of actual type. + +Why this is needed +────────────────── +Pre-v0.16.1 the BW file importer (`event_file_io.read_blastware_file`) +hardcoded `ev.record_type = "Waveform"` for every imported event. Fixed +in commit aac1c8e — new ingests now derive the type from the Blastware +filename's extension last character (H=Histogram, W=Waveform, M=Manual, +E=Event, C=Combo) per the V10.72+ MiniMate Plus AB0T filename scheme. + +Effect on a server that imported events under the old code: every +events row has `record_type = "Waveform"`, even for histograms, +manuals, etc. Visible in terra-view's event-detail modal under the +"Record Type" field. Terra-view also has a client-side workaround +that derives the type from the filename for display purposes, so +operators see the correct type in the UI even before this backfill. +This script makes the DB column match what the UI is already showing, +which matters for reporting and any downstream consumer that reads +events.record_type directly. + +This script +─────────── +Walks the `events` table and updates each row's `record_type` to the +derived value from its `blastware_filename`. Old S338 firmware files +(3-char extensions ending in `0`) and any unrecognized suffix get +left at the existing value (defaults to "Waveform"). + +Idempotent: re-running after a successful backfill finds zero rows +needing updates and exits cleanly (it always re-derives but only +writes when the value would change). + +Usage +───── + # Dry-run (default): print what would change, don't touch the DB + python -m scripts.backfill_record_type --db bridges/captures/seismo_relay.db + + # Apply the backfill + python -m scripts.backfill_record_type --db bridges/captures/seismo_relay.db --apply +""" + +from __future__ import annotations + +import argparse +import sqlite3 +import sys +from collections import Counter +from pathlib import Path + + +# Must stay in sync with minimateplus.event_file_io._RECORD_TYPE_BY_EXT_SUFFIX. +_TYPE_FROM_SUFFIX = { + "H": "Histogram", + "W": "Waveform", + "M": "Manual", + "E": "Event", + "C": "Combo", +} + + +def derive_record_type(filename: str | None, default: str = "Waveform") -> str: + """Mirror of minimateplus.event_file_io.derive_record_type_from_filename. + + Vendored here so this script runs without needing the seismo-relay + package on the Python path (useful on prod where you might be + running it via `docker exec` against a container's DB volume). + """ + if not filename: + return default + name = Path(filename).name + if "." not in name: + return default + ext = name.rsplit(".", 1)[1] + if not ext: + return default + return _TYPE_FROM_SUFFIX.get(ext[-1].upper(), default) + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("--db", required=True, help="Path to seismo_relay.db") + ap.add_argument("--apply", action="store_true", + help="Actually write changes (default is dry-run).") + ap.add_argument("--default", default="Waveform", + help="Fallback record_type when filename doesn't encode one. " + "Default: Waveform (matches the pre-fix bug's behavior).") + args = ap.parse_args() + + db_path = Path(args.db) + if not db_path.exists(): + print(f"ERROR: database not found at {db_path}", file=sys.stderr) + return 1 + + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + + cur.execute(""" + SELECT id, blastware_filename, record_type + FROM events + WHERE blastware_filename IS NOT NULL + AND blastware_filename != '' + """) + rows = cur.fetchall() + total = len(rows) + print(f"Scanning {total:,} event rows…") + print() + + # Tally proposed changes. + transitions: Counter[tuple[str, str]] = Counter() + update_ids: list[tuple[str, str]] = [] + unrecognized = 0 + + for row in rows: + derived = derive_record_type(row["blastware_filename"], default=args.default) + current = row["record_type"] or "" + if derived == current: + continue + transitions[(current, derived)] += 1 + update_ids.append((row["id"], derived)) + + if not update_ids: + print("Nothing to update — all rows already match.") + conn.close() + return 0 + + print(f"{len(update_ids):,} row(s) need updating:") + for (old, new), count in sorted(transitions.items(), key=lambda x: -x[1]): + print(f" {count:>6,} {old!r:14s} → {new!r}") + print() + + if not args.apply: + print("(dry-run — re-run with --apply to write changes)") + conn.close() + return 0 + + print("Applying changes…") + cur.executemany( + "UPDATE events SET record_type = ? WHERE id = ?", + [(new, eid) for eid, new in update_ids], + ) + conn.commit() + print(f"Done. Updated {cur.rowcount:,} row(s).") + conn.close() + return 0 + + +if __name__ == "__main__": + sys.exit(main())