Files
seismo-relay/sfm/waveform_store.py
T

165 lines
5.7 KiB
Python

"""
sfm/waveform_store.py — On-disk store for Blastware-format waveform files.
Layout (flat per-serial):
<root>/<serial>/<filename> ← .G10 / .W / .H / etc. (Blastware-readable)
<root>/<serial>/<filename>.a5.pkl ← pickled list of A5 S3Frame dicts
`<filename>` is whatever `minimateplus.blastware_file.blastware_filename`
produces for the event (encodes serial + timestamp + record type). Filenames
never collide for the same physical event.
The `.a5.pkl` sidecar lets the .G10 be regenerated later if the encoder
changes — captures the raw 5A frame stream as serializable dicts so the
schema isn't tied to the `S3Frame` dataclass layout.
"""
from __future__ import annotations
import logging
import pickle
from pathlib import Path
from typing import Optional
from minimateplus.blastware_file import blastware_filename, write_blastware_file
from minimateplus.framing import S3Frame
from minimateplus.models import Event
log = logging.getLogger("sfm.waveform_store")
A5_PICKLE_VERSION = 1
def _frame_to_dict(f: S3Frame) -> dict:
return {
"sub": f.sub,
"page_hi": f.page_hi,
"page_lo": f.page_lo,
"data": bytes(f.data),
"chk_byte": f.chk_byte,
"checksum_valid": f.checksum_valid,
}
def _dict_to_frame(d: dict) -> S3Frame:
return S3Frame(
sub=d["sub"],
page_hi=d["page_hi"],
page_lo=d["page_lo"],
data=bytes(d["data"]),
checksum_valid=d.get("checksum_valid", True),
chk_byte=d.get("chk_byte", 0),
)
class WaveformStore:
"""
Persistent store for Blastware-format waveform files + their A5 source frames.
Thread safety: write_blastware_file is single-shot; concurrent saves of the
*same* filename would race, but the filename encodes second-resolution
timestamps + serial, so collisions across threads/processes are vanishingly
unlikely in practice.
"""
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.root.mkdir(parents=True, exist_ok=True)
log.info("WaveformStore root=%s", self.root)
# ── path helpers ────────────────────────────────────────────────────────────
def _serial_dir(self, serial: str) -> Path:
d = self.root / serial
d.mkdir(parents=True, exist_ok=True)
return d
def paths_for(self, serial: str, filename: str) -> tuple[Path, Path]:
"""Return (blastware_path, a5_pickle_path) for a given serial+filename."""
d = self._serial_dir(serial)
return d / filename, d / f"{filename}.a5.pkl"
def open_blastware(self, serial: str, filename: str) -> Optional[Path]:
"""Return absolute path to an existing .G10 file or None."""
bw_path, _ = self.paths_for(serial, filename)
return bw_path if bw_path.exists() else None
# ── save / load ─────────────────────────────────────────────────────────────
def save(
self,
ev: Event,
serial: str,
a5_frames: list[S3Frame],
) -> dict:
"""
Write the .G10 file and the .a5.pkl sidecar for one event.
Returns a record dict suitable for persisting alongside the DB row:
{
"filename": "M529LKIQ.G10",
"filesize": 8708,
"a5_pickle_filename": "M529LKIQ.G10.a5.pkl",
}
Idempotent: if the .G10 already exists, it is overwritten with the
freshly-encoded version (same bytes for the same a5_frames).
"""
if not a5_frames:
raise ValueError("WaveformStore.save: a5_frames is empty")
if not serial:
raise ValueError("WaveformStore.save: serial is required")
filename = blastware_filename(ev, serial)
bw_path, a5_path = self.paths_for(serial, filename)
# 1. encode the .G10
# Delete any stale file at this path so partial writes never leak
# trailing bytes from a previous larger file (matches the live
# endpoint's defensive unlink).
try:
bw_path.unlink()
except FileNotFoundError:
pass
write_blastware_file(ev, a5_frames, bw_path)
filesize = bw_path.stat().st_size
# 2. write the .a5.pkl sidecar
try:
a5_path.unlink()
except FileNotFoundError:
pass
payload = {
"version": A5_PICKLE_VERSION,
"frames": [_frame_to_dict(f) for f in a5_frames],
}
with a5_path.open("wb") as fp:
pickle.dump(payload, fp, protocol=pickle.HIGHEST_PROTOCOL)
log.info(
"WaveformStore.save serial=%s filename=%s filesize=%d frames=%d",
serial, filename, filesize, len(a5_frames),
)
return {
"filename": filename,
"filesize": filesize,
"a5_pickle_filename": a5_path.name,
}
def load_a5(self, serial: str, filename: str) -> Optional[list[S3Frame]]:
"""
Re-hydrate the pickled A5 frame stream for a stored event.
Returns None if the sidecar is missing.
"""
_, a5_path = self.paths_for(serial, filename)
if not a5_path.exists():
return None
with a5_path.open("rb") as fp:
payload = pickle.load(fp)
if not isinstance(payload, dict) or "frames" not in payload:
log.warning("WaveformStore.load_a5: malformed sidecar at %s", a5_path)
return None
return [_dict_to_frame(d) for d in payload["frames"]]