feat: v0.15.0
### Added
- **Layered event storage architecture.** Each event now lands as four
files in the per-serial waveform store, each with a clear role:
- `<filename>` — the Blastware-readable binary (BW file). Untouched.
- `<filename>.a5.pkl` — the raw 5A frames (regenerative source).
- `<filename>.h5` — clean per-channel waveform arrays in physical
units (in/s for geo, psi for mic) plus event metadata (HDF5 with
gzip compression). This is the canonical format for downstream
analysis tools.
- `<filename>.sfm.json` — the modern review/metadata sidecar (peaks,
project, source provenance, review state, extensions).
SQLite (`seismo_relay.db`) is the searchable index over all four.
- **Plot-ready waveform JSON (`sfm.plot.v1`).** The `/device/event/{idx}/waveform`
and `/db/events/{id}/waveform.json` endpoints now return samples in
physical units with explicit time-axis metadata, peak markers, and
per-channel unit hints — no more guessing the ADC-to-velocity scale
client-side. The webapp waveform viewer was rewritten to consume
this shape.
- **In-app waveform viewer accuracy fix.** The standalone SFM webapp
viewer was scaling geophone amplitudes by `geoAdcScale / 32767`
(≈ 6.206 / 32767), where `geoAdcScale = 6.206053` is the device's
*in/s per V* hardware constant — not the ADC-counts-to-velocity
factor. This silently scaled every plot ~38% too low for Normal-range
geophones (the correct full-scale is 10.0 in/s, or 1.25 in/s for
Sensitive). Conversion is now done server-side using the geo_range
from compliance config; the client just plots.
- New `sfm/event_hdf5.py` module: `write_event_hdf5()`,
`read_event_hdf5()`, plus a plot-JSON helper.
- Backfill script extended to also emit `.h5` for existing events.
### Dependencies
- Added `h5py>=3.10` and `numpy>=1.24` for the HDF5 storage layer.
- Added `python-multipart>=0.0.7` (required by FastAPI for the
`/db/import/blastware_file` endpoint introduced in this release).
This commit is contained in:
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
sfm/import_bw.py — CLI for ingesting Blastware-format event files.
|
||||
|
||||
Walks a path (file or directory), parses each recognised event-file
|
||||
binary, copies it into the canonical waveform store, writes the
|
||||
.sfm.json sidecar, and upserts a row in seismo_relay.db.
|
||||
|
||||
Use cases:
|
||||
- Migrating a Blastware ACH inbox into SFM
|
||||
- One-off imports of files emailed in by field crews
|
||||
- Bulk-loading historical archives
|
||||
|
||||
Usage:
|
||||
python -m sfm.import_bw <path-or-dir> [--serial BE11529]
|
||||
[--db-path bridges/captures/seismo_relay.db]
|
||||
[--store-root bridges/captures/waveforms]
|
||||
[--dry-run]
|
||||
[-v]
|
||||
|
||||
Examples:
|
||||
python -m sfm.import_bw ~/Downloads/M529LKIQ.7M0W
|
||||
python -m sfm.import_bw /path/to/blastware_archive --serial BE11529
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
# Allow running from the repo root without installation.
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from sfm.database import SeismoDb
|
||||
from sfm.waveform_store import WaveformStore
|
||||
|
||||
log = logging.getLogger("sfm.import_bw")
|
||||
|
||||
|
||||
# Blastware event-file extensions: 4-char `AB0T` (T = W or H) for ACH
|
||||
# downloads, 3-char `AB0` for direct downloads. We discover candidates
|
||||
# by length + last-char rather than enumerating every (A, B) pair.
|
||||
def _looks_like_bw_event(path: Path) -> bool:
|
||||
"""Heuristic: 3-char or 4-char extension, ends with W/H/0, and the
|
||||
file is at least 70 bytes (header + STRT + footer minimum)."""
|
||||
if not path.is_file():
|
||||
return False
|
||||
ext = path.suffix.lstrip(".")
|
||||
if not (3 <= len(ext) <= 4):
|
||||
return False
|
||||
if not (ext[-1].upper() in {"W", "H"} or ext.endswith("0")):
|
||||
return False
|
||||
try:
|
||||
return path.stat().st_size >= 70
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def _walk(path: Path) -> Iterator[Path]:
|
||||
"""Yield candidate BW event-file paths under `path` (file or dir)."""
|
||||
if path.is_file():
|
||||
if _looks_like_bw_event(path):
|
||||
yield path
|
||||
return
|
||||
if path.is_dir():
|
||||
for p in sorted(path.rglob("*")):
|
||||
if _looks_like_bw_event(p):
|
||||
yield p
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
p = argparse.ArgumentParser(
|
||||
description="Import Blastware-format event files into the SFM store + DB.",
|
||||
)
|
||||
p.add_argument("path", help="File or directory to import.")
|
||||
p.add_argument(
|
||||
"--serial", default=None, metavar="SERIAL",
|
||||
help="Override the serial-number hint (e.g. BE11529). Defaults to "
|
||||
"the value decoded from each BW filename's prefix.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--db-path",
|
||||
default=str(Path(__file__).resolve().parent.parent / "bridges" / "captures" / "seismo_relay.db"),
|
||||
help="Path to seismo_relay.db (default: bridges/captures/seismo_relay.db).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--store-root",
|
||||
default=None,
|
||||
help="Root of the waveform store (default: <db_dir>/waveforms).",
|
||||
)
|
||||
p.add_argument(
|
||||
"--dry-run", action="store_true",
|
||||
help="Parse and report per-file outcomes; don't write anything.",
|
||||
)
|
||||
p.add_argument("-v", "--verbose", action="store_true", help="Debug logging.")
|
||||
args = p.parse_args(argv)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||
format="%(asctime)s %(levelname)-7s %(name)s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
src = Path(args.path).expanduser().resolve()
|
||||
if not src.exists():
|
||||
print(f"error: {src} does not exist", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
db_path = Path(args.db_path).expanduser().resolve()
|
||||
store_root = (
|
||||
Path(args.store_root).expanduser().resolve()
|
||||
if args.store_root else db_path.parent / "waveforms"
|
||||
)
|
||||
|
||||
db = None if args.dry_run else SeismoDb(db_path)
|
||||
store = None if args.dry_run else WaveformStore(store_root)
|
||||
|
||||
candidates = list(_walk(src))
|
||||
if not candidates:
|
||||
print(f"No BW event-file candidates found under {src}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
print(f"Importing {len(candidates)} file(s) from {src}...")
|
||||
if args.dry_run:
|
||||
print("(dry-run — no writes will occur)")
|
||||
|
||||
ok = err = skipped = 0
|
||||
for path in candidates:
|
||||
try:
|
||||
bw_bytes = path.read_bytes()
|
||||
except Exception as exc:
|
||||
print(f" [ERR ] {path}: read failed: {exc}")
|
||||
err += 1
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
# Just parse to verify integrity; don't touch DB or store.
|
||||
from minimateplus import event_file_io
|
||||
try:
|
||||
ev = event_file_io.read_blastware_file(path)
|
||||
ts = ev.timestamp and (
|
||||
f"{ev.timestamp.year}-{ev.timestamp.month:02d}-{ev.timestamp.day:02d} "
|
||||
f"{ev.timestamp.hour:02d}:{ev.timestamp.minute:02d}:{ev.timestamp.second:02d}"
|
||||
) or "?"
|
||||
pv = ev.peak_values
|
||||
pvs = pv.peak_vector_sum if pv and pv.peak_vector_sum is not None else 0.0
|
||||
print(f" [OK ] {path.name} ts={ts} PVS={pvs:.4f}")
|
||||
ok += 1
|
||||
except Exception as exc:
|
||||
print(f" [ERR ] {path}: parse failed: {exc}")
|
||||
err += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
ev, rec = store.save_imported_bw(
|
||||
bw_bytes, source_path=path, serial_hint=args.serial,
|
||||
)
|
||||
# Resolve serial for the DB row. Prefer the hint, then the
|
||||
# one decoded from the filename (already done by the store).
|
||||
serial_used = args.serial or _infer_serial(path.name) or "UNKNOWN"
|
||||
ins, sk = db.insert_events(
|
||||
[ev], serial=serial_used,
|
||||
waveform_records=(
|
||||
{ev._waveform_key.hex(): rec}
|
||||
if ev._waveform_key else None
|
||||
),
|
||||
)
|
||||
tag = "OK " if ins else ("SKIP" if sk else "OK ")
|
||||
print(f" [{tag}] {path.name} → {rec['filename']} "
|
||||
f"({rec['filesize']} B, sha256={rec['sha256'][:12]}…) "
|
||||
f"serial={serial_used} ins={ins} skip={sk}")
|
||||
if ins:
|
||||
ok += 1
|
||||
else:
|
||||
skipped += 1
|
||||
except Exception as exc:
|
||||
print(f" [ERR ] {path}: import failed: {exc}")
|
||||
log.debug("traceback", exc_info=True)
|
||||
err += 1
|
||||
|
||||
print(f"\nDone. ok={ok} skipped={skipped} errors={err}")
|
||||
return 0 if err == 0 else 1
|
||||
|
||||
|
||||
def _infer_serial(filename: str):
|
||||
"""Reuse WaveformStore's filename → serial decoder for log output."""
|
||||
from sfm.waveform_store import _serial_from_bw_filename
|
||||
return _serial_from_bw_filename(filename)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user