768 lines
32 KiB
Python
768 lines
32 KiB
Python
"""
|
|
sfm/waveform_store.py — On-disk store for Blastware-format event files.
|
|
|
|
Layout (flat per-serial, four files per event):
|
|
|
|
<root>/<serial>/<filename> ← event file (BW-readable binary)
|
|
<root>/<serial>/<filename>.a5.pkl ← pickled list of A5 S3Frame dicts
|
|
<root>/<serial>/<filename>.h5 ← clean waveform arrays (HDF5)
|
|
<root>/<serial>/<filename>.sfm.json ← modern sidecar (peaks, project,
|
|
review state, extensions)
|
|
|
|
`<filename>` is whatever `minimateplus.blastware_file.blastware_filename`
|
|
produces for the event. The extension is NOT a fixed type tag — it
|
|
encodes the event timestamp (`AB0T` format).
|
|
|
|
Roles:
|
|
- BW binary: what Blastware reads. Untouched. The user-facing review
|
|
waveform viewer.
|
|
- .a5.pkl: regenerative source. Lets the BW binary be rebuilt
|
|
byte-for-byte if the encoder changes. Never delete.
|
|
- .h5: clean per-channel waveform arrays in physical units (in/s for
|
|
geo, psi for mic) plus event metadata. Canonical format for
|
|
downstream analysis tools and the `/device/event/{idx}/waveform`
|
|
endpoint's plot-JSON output.
|
|
- .sfm.json: small, queryable metadata + review state. SQL
|
|
`events.false_trigger` is a derived index kept in sync via
|
|
`patch_sidecar()`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime
|
|
import logging
|
|
import pickle
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import Optional, Union
|
|
|
|
from minimateplus import event_file_io
|
|
from minimateplus.blastware_file import blastware_filename, write_blastware_file
|
|
from minimateplus.framing import S3Frame
|
|
from minimateplus.models import Event
|
|
from sfm import event_hdf5
|
|
|
|
log = logging.getLogger("sfm.waveform_store")
|
|
|
|
A5_PICKLE_VERSION = 1
|
|
|
|
|
|
def _frame_to_dict(f: S3Frame) -> dict:
|
|
return {
|
|
"sub": f.sub,
|
|
"page_hi": f.page_hi,
|
|
"page_lo": f.page_lo,
|
|
"data": bytes(f.data),
|
|
"chk_byte": f.chk_byte,
|
|
"checksum_valid": f.checksum_valid,
|
|
}
|
|
|
|
|
|
def _dict_to_frame(d: dict) -> S3Frame:
|
|
return S3Frame(
|
|
sub=d["sub"],
|
|
page_hi=d["page_hi"],
|
|
page_lo=d["page_lo"],
|
|
data=bytes(d["data"]),
|
|
checksum_valid=d.get("checksum_valid", True),
|
|
chk_byte=d.get("chk_byte", 0),
|
|
)
|
|
|
|
|
|
class WaveformStore:
|
|
"""
|
|
Persistent store for Blastware-format waveform files + their A5 source frames.
|
|
|
|
Thread safety: write_blastware_file is single-shot; concurrent saves of the
|
|
*same* filename would race, but the filename encodes second-resolution
|
|
timestamps + serial, so collisions across threads/processes are vanishingly
|
|
unlikely in practice.
|
|
"""
|
|
|
|
def __init__(self, root: str | Path) -> None:
|
|
self.root = Path(root)
|
|
self.root.mkdir(parents=True, exist_ok=True)
|
|
log.info("WaveformStore root=%s", self.root)
|
|
|
|
# ── path helpers ────────────────────────────────────────────────────────────
|
|
|
|
def _serial_dir(self, serial: str) -> Path:
|
|
d = self.root / serial
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
return d
|
|
|
|
def paths_for(self, serial: str, filename: str) -> tuple[Path, Path]:
|
|
"""Return (blastware_path, a5_pickle_path) for a given serial+filename.
|
|
|
|
For the sidecar path use `sidecar_path_for()` — kept separate so
|
|
existing callers don't need to unpack a 3-tuple.
|
|
"""
|
|
d = self._serial_dir(serial)
|
|
return d / filename, d / f"{filename}.a5.pkl"
|
|
|
|
def sidecar_path_for(self, serial: str, filename: str) -> Path:
|
|
"""Return absolute path to the .sfm.json sidecar for a given event."""
|
|
return self._serial_dir(serial) / f"{filename}.sfm.json"
|
|
|
|
def hdf5_path_for(self, serial: str, filename: str) -> Path:
|
|
"""Return absolute path to the .h5 clean-waveform file for a given event."""
|
|
return self._serial_dir(serial) / f"{filename}.h5"
|
|
|
|
def txt_path_for(self, serial: str, filename: str) -> Path:
|
|
"""Return absolute path to the preserved BW ASCII report (.TXT)
|
|
for a given event.
|
|
|
|
We name it ``<filename>_ASCII.TXT`` to match BW's own filename
|
|
convention in the ACH folder. Saved at ingest time alongside
|
|
the binary so the parser bug fixes can be applied retroactively
|
|
by re-parsing without needing to re-forward from the watcher PC.
|
|
"""
|
|
return self._serial_dir(serial) / f"{filename}_ASCII.TXT"
|
|
|
|
def open_blastware(self, serial: str, filename: str) -> Optional[Path]:
|
|
"""Return absolute path to an existing event file or None."""
|
|
bw_path, _ = self.paths_for(serial, filename)
|
|
return bw_path if bw_path.exists() else None
|
|
|
|
def open_txt(self, serial: str, filename: str) -> Optional[Path]:
|
|
"""Return absolute path to the preserved BW ASCII report for an
|
|
event, or None if the .TXT wasn't saved at ingest time (events
|
|
ingested before .TXT preservation landed will show None until
|
|
re-forwarded)."""
|
|
p = self.txt_path_for(serial, filename)
|
|
return p if p.exists() else None
|
|
|
|
# ── save / load ─────────────────────────────────────────────────────────────
|
|
|
|
def save(
|
|
self,
|
|
ev: Event,
|
|
serial: str,
|
|
a5_frames: list[S3Frame],
|
|
*,
|
|
source_kind: str = "sfm-live",
|
|
geo_range = "normal",
|
|
) -> dict:
|
|
"""
|
|
Write all four event-file artifacts for one event:
|
|
- <filename> BW binary
|
|
- <filename>.a5.pkl raw A5 frame pickle
|
|
- <filename>.h5 clean waveform (HDF5)
|
|
- <filename>.sfm.json modern sidecar (metadata + review)
|
|
|
|
Returns a record dict suitable for persisting alongside the DB row:
|
|
|
|
{
|
|
"filename": "M529LKIQ.7M0W",
|
|
"filesize": 8708,
|
|
"sha256": "a1b2c3...",
|
|
"a5_pickle_filename": "M529LKIQ.7M0W.a5.pkl",
|
|
"hdf5_filename": "M529LKIQ.7M0W.h5",
|
|
"sidecar_filename": "M529LKIQ.7M0W.sfm.json",
|
|
}
|
|
|
|
`source_kind` flows into `sidecar.source.kind` — callers should
|
|
pass "sfm-live" (default) for the live endpoint and "sfm-ach" for
|
|
the ACH ingestion path. BW-imported events use save_imported_bw()
|
|
instead.
|
|
|
|
`geo_range` controls the ADC-counts → in/s scaling in the HDF5
|
|
file ("normal" = 10 in/s FS, "sensitive" = 1.25 in/s FS).
|
|
Defaults to "normal" — callers with compliance-config access
|
|
should pass the actual unit setting so the saved samples are in
|
|
the right units.
|
|
|
|
Idempotent: if the event file already exists, it is overwritten
|
|
with the freshly-encoded version (same bytes for the same
|
|
a5_frames) and the sidecar's review block is preserved across
|
|
re-saves.
|
|
"""
|
|
if not a5_frames:
|
|
raise ValueError("WaveformStore.save: a5_frames is empty")
|
|
if not serial:
|
|
raise ValueError("WaveformStore.save: serial is required")
|
|
|
|
filename = blastware_filename(ev, serial)
|
|
bw_path, a5_path = self.paths_for(serial, filename)
|
|
sidecar_path = self.sidecar_path_for(serial, filename)
|
|
hdf5_path = self.hdf5_path_for(serial, filename)
|
|
|
|
# 1. encode the event file (defensive unlink prevents trailing-byte
|
|
# leaks from a previous larger file on synced/odd filesystems).
|
|
try:
|
|
bw_path.unlink()
|
|
except FileNotFoundError:
|
|
pass
|
|
write_blastware_file(ev, a5_frames, bw_path)
|
|
filesize = bw_path.stat().st_size
|
|
sha256 = event_file_io.file_sha256(bw_path)
|
|
|
|
# 2. write the .a5.pkl sidecar
|
|
try:
|
|
a5_path.unlink()
|
|
except FileNotFoundError:
|
|
pass
|
|
payload = {
|
|
"version": A5_PICKLE_VERSION,
|
|
"frames": [_frame_to_dict(f) for f in a5_frames],
|
|
}
|
|
with a5_path.open("wb") as fp:
|
|
pickle.dump(payload, fp, protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
# 3. write the .h5 clean-waveform file (samples in physical units).
|
|
# Best-effort: a write failure shouldn't sink the rest of the save
|
|
# (the HDF5 can be regenerated later from the .a5.pkl).
|
|
hdf5_filename: Optional[str] = None
|
|
try:
|
|
event_hdf5.write_event_hdf5(
|
|
hdf5_path, ev,
|
|
serial=serial,
|
|
geo_range=geo_range,
|
|
source_kind=source_kind,
|
|
)
|
|
hdf5_filename = hdf5_path.name
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save: HDF5 write failed for %s: %s — continuing without .h5",
|
|
hdf5_path, exc,
|
|
)
|
|
|
|
# 4. write the .sfm.json sidecar. Preserve any existing review
|
|
# block + extensions across re-saves so user edits aren't lost
|
|
# when the same event is re-downloaded (e.g. via Force refresh).
|
|
existing_review = None
|
|
existing_extensions = None
|
|
if sidecar_path.exists():
|
|
try:
|
|
old = event_file_io.read_sidecar(sidecar_path)
|
|
existing_review = old.get("review")
|
|
existing_extensions = old.get("extensions")
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save: existing sidecar at %s unreadable (%s); overwriting",
|
|
sidecar_path, exc,
|
|
)
|
|
|
|
sidecar = event_file_io.event_to_sidecar_dict(
|
|
ev,
|
|
serial=serial,
|
|
blastware_filename=filename,
|
|
blastware_filesize=filesize,
|
|
blastware_sha256=sha256,
|
|
source_kind=source_kind,
|
|
a5_pickle_filename=a5_path.name,
|
|
review=existing_review,
|
|
extensions=existing_extensions,
|
|
)
|
|
event_file_io.write_sidecar(sidecar_path, sidecar)
|
|
|
|
log.info(
|
|
"WaveformStore.save serial=%s filename=%s filesize=%d frames=%d "
|
|
"h5=%s sidecar=%s",
|
|
serial, filename, filesize, len(a5_frames),
|
|
hdf5_filename or "(skipped)", sidecar_path.name,
|
|
)
|
|
return {
|
|
"filename": filename,
|
|
"filesize": filesize,
|
|
"sha256": sha256,
|
|
"a5_pickle_filename": a5_path.name,
|
|
"hdf5_filename": hdf5_filename,
|
|
"sidecar_filename": sidecar_path.name,
|
|
}
|
|
|
|
def save_imported_bw(
|
|
self,
|
|
bw_bytes: bytes,
|
|
source_path: Path,
|
|
*,
|
|
serial_hint: Optional[str] = None,
|
|
bw_report_text: Optional[Union[str, bytes]] = None,
|
|
) -> tuple[Event, dict]:
|
|
"""
|
|
Ingest a Blastware event file produced by an external tool
|
|
(Blastware's own ACH, manual download, etc.) where the source A5
|
|
frames aren't available.
|
|
|
|
Workflow:
|
|
1. Parse the bytes via event_file_io.read_blastware_file (writes
|
|
a temp file to do that, since the parser takes a path).
|
|
2. Optionally parse a paired BW ASCII event report (the .TXT
|
|
file BW writes alongside the binary). When supplied, its
|
|
decoded fields land in the sidecar's `bw_report` block AND
|
|
overlay the device-authoritative peak values into the
|
|
top-level `peak_values` block. This is the right path for
|
|
the ACH-forwarder daemon use case where Blastware's own
|
|
ACH writes both files into the watch folder.
|
|
3. Resolve serial from BW filename (`<P><serial3>...`) or use
|
|
serial_hint. Falls back to "UNKNOWN".
|
|
4. Copy the BW bytes verbatim into <root>/<serial>/<filename>.
|
|
5. Write the .sfm.json sidecar with source.kind = "bw-import"
|
|
and a5_pickle_filename = None. Does NOT write a .a5.pkl
|
|
(no A5 source available; byte-for-byte regeneration not
|
|
possible — the on-disk BW file IS the byte-for-byte source).
|
|
|
|
Returns (event, record_dict) so callers can both insert into
|
|
SeismoDb and surface the parsed Event.
|
|
"""
|
|
# Stash the bytes to a temp path so read_blastware_file (path-based)
|
|
# can parse without us duplicating its logic.
|
|
import tempfile
|
|
with tempfile.NamedTemporaryFile(suffix=".bw", delete=False) as tmp:
|
|
tmp.write(bw_bytes)
|
|
tmp_path = Path(tmp.name)
|
|
try:
|
|
ev = event_file_io.read_blastware_file(tmp_path)
|
|
finally:
|
|
try:
|
|
tmp_path.unlink()
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
# read_blastware_file derives record_type from its path arg, but
|
|
# that arg is the tmp file (suffix ".bw") — so override with the
|
|
# original filename's encoded type (H/W/M/E/C in the BW AB0T
|
|
# scheme). Without this override every BW-imported event lands
|
|
# in the DB with record_type="Waveform" regardless of the actual
|
|
# type (Histogram, Manual, etc.).
|
|
ev.record_type = event_file_io.derive_record_type_from_filename(
|
|
source_path.name
|
|
)
|
|
|
|
# Parse the BW ASCII report if one was supplied. Failures here
|
|
# are non-fatal: we still write the binary + sidecar without the
|
|
# rich derived fields.
|
|
bw_report = None
|
|
if bw_report_text is not None:
|
|
try:
|
|
from minimateplus.bw_ascii_report import parse_report
|
|
bw_report = parse_report(bw_report_text)
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_bw: BW report parse failed: %s — continuing without it",
|
|
exc,
|
|
)
|
|
|
|
# If we have a report, overlay its device-authoritative fields
|
|
# (peaks, project, sample_rate, record_time) onto the Event
|
|
# BEFORE handing it to db.insert_events(). Without this overlay
|
|
# the DB row gets `peak_values` from _peaks_from_samples(), which
|
|
# runs the still-undecoded waveform codec on the BW body and
|
|
# produces ±10 in/s saturation values on every channel for every
|
|
# event. The sidecar JSON had the correct values via
|
|
# event_to_sidecar_dict(bw_report=...) but the DB columns didn't.
|
|
if bw_report is not None:
|
|
try:
|
|
event_file_io.apply_report_to_event(ev, bw_report)
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_bw: failed to overlay report onto event: %s",
|
|
exc,
|
|
)
|
|
|
|
# Resolve serial. blastware_filename derives a 4-char prefix from
|
|
# the numeric serial (e.g. BE11529 → M529); we go the other way
|
|
# via the source filename if a hint wasn't given.
|
|
serial = serial_hint or _serial_from_bw_filename(source_path.name) or "UNKNOWN"
|
|
|
|
# Use the source filename verbatim — it already encodes timestamp
|
|
# + record type per BW's AB0T scheme, and we want to preserve it
|
|
# so the file BW knows about can be opened back in BW.
|
|
filename = source_path.name
|
|
bw_path = self._serial_dir(serial) / filename
|
|
|
|
# 1. copy bytes
|
|
bw_path.write_bytes(bw_bytes)
|
|
filesize = bw_path.stat().st_size
|
|
sha256 = event_file_io.file_sha256(bw_path)
|
|
|
|
# 1b. preserve the raw BW ASCII report (.TXT) alongside the binary.
|
|
# Saved at <root>/<serial>/<filename>_ASCII.TXT. Lets us re-parse
|
|
# offline after parser fixes without needing to re-forward from
|
|
# the watcher PC. Negligible storage cost (~15 KB per event).
|
|
# Skipped silently when no report was supplied (live download path,
|
|
# manual upload without paired TXT).
|
|
txt_filename: Optional[str] = None
|
|
if bw_report_text is not None:
|
|
try:
|
|
txt_path = self.txt_path_for(serial, filename)
|
|
if isinstance(bw_report_text, bytes):
|
|
txt_path.write_bytes(bw_report_text)
|
|
else:
|
|
txt_path.write_text(bw_report_text)
|
|
txt_filename = txt_path.name
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_bw: failed to save TXT for %s: %s — "
|
|
"continuing without it",
|
|
filename, exc,
|
|
)
|
|
|
|
# 2. write the .h5 clean-waveform file from the parsed Event.
|
|
# Note: peaks here are computed from raw samples (the BW file
|
|
# doesn't carry the device-authoritative 0C peaks). Best-effort.
|
|
hdf5_path = self.hdf5_path_for(serial, filename)
|
|
hdf5_filename: Optional[str] = None
|
|
try:
|
|
event_hdf5.write_event_hdf5(
|
|
hdf5_path, ev,
|
|
serial=serial,
|
|
geo_range="normal", # BW file doesn't carry the range; assume Normal
|
|
source_kind="bw-import",
|
|
)
|
|
hdf5_filename = hdf5_path.name
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_bw: HDF5 write failed for %s: %s — continuing",
|
|
hdf5_path, exc,
|
|
)
|
|
|
|
# 3. write sidecar with source.kind = bw-import
|
|
sidecar_path = self.sidecar_path_for(serial, filename)
|
|
existing_review = None
|
|
if sidecar_path.exists():
|
|
try:
|
|
existing_review = event_file_io.read_sidecar(sidecar_path).get("review")
|
|
except Exception:
|
|
pass
|
|
|
|
sidecar = event_file_io.event_to_sidecar_dict(
|
|
ev,
|
|
serial=serial,
|
|
blastware_filename=filename,
|
|
blastware_filesize=filesize,
|
|
blastware_sha256=sha256,
|
|
source_kind="bw-import",
|
|
a5_pickle_filename=None,
|
|
txt_filename=txt_filename,
|
|
review=existing_review,
|
|
bw_report=bw_report,
|
|
)
|
|
event_file_io.write_sidecar(sidecar_path, sidecar)
|
|
|
|
log.info(
|
|
"WaveformStore.save_imported_bw serial=%s filename=%s filesize=%d "
|
|
"h5=%s (no .a5.pkl — A5 source unavailable for BW-imported files)",
|
|
serial, filename, filesize, hdf5_filename or "(skipped)",
|
|
)
|
|
return ev, {
|
|
"filename": filename,
|
|
"filesize": filesize,
|
|
"sha256": sha256,
|
|
"a5_pickle_filename": None,
|
|
"hdf5_filename": hdf5_filename,
|
|
"sidecar_filename": sidecar_path.name,
|
|
"serial": serial,
|
|
}
|
|
|
|
def save_imported_idf(
|
|
self,
|
|
idf_bytes: bytes,
|
|
source_path: Path,
|
|
*,
|
|
serial_hint: Optional[str] = None,
|
|
idf_report_text: Optional[Union[str, bytes]] = None,
|
|
) -> tuple[Optional["Event"], dict]:
|
|
"""
|
|
Ingest a Thor (Micromate Series IV) IDF event file (`.IDFW` or
|
|
`.IDFH`) produced by Thor's TXT exporter.
|
|
|
|
Workflow:
|
|
1. For sig-A `.IDFW` binaries, decode samples + binary metadata
|
|
via ``micromate.idf_file.read_idf_file()``. Failure or
|
|
non-IDFW path falls through to the .txt-only flow.
|
|
2. Parse the paired TXT report (when supplied) via
|
|
``micromate.parse_idf_report`` → dict. TXT remains the
|
|
source of truth for fields the binary doesn't yet supply
|
|
(full peak set with ZC freq / Time of Peak, sensor self-check,
|
|
firmware string, project strings).
|
|
3. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``.
|
|
4. Copy bytes verbatim into ``<root>/<serial>/<filename>``.
|
|
5. Bridge IdfEvent → ``minimateplus.Event`` and attach
|
|
``raw_samples`` from the binary decoder (when available).
|
|
6. Write the `.h5` clean-waveform file when samples decoded.
|
|
7. Write the ``.sfm.json`` sidecar with
|
|
``source.kind = "idf-import"`` and the full raw IDF report
|
|
under ``extensions.idf_report``.
|
|
|
|
Returns ``(event, record_dict)`` so the endpoint can both insert
|
|
into SeismoDb and surface the parsed event.
|
|
"""
|
|
from micromate import IdfEvent, parse_idf_report
|
|
|
|
# 1. Binary decode (sig-A IDFW and IDFH). Non-fatal: any failure
|
|
# leaves samples / binary metadata unfilled and we proceed with
|
|
# the .txt path as before.
|
|
idf_samples: Optional[dict] = None
|
|
idf_intervals: Optional[list] = None
|
|
binary_md = None
|
|
binary_peaks = None
|
|
is_histogram = False
|
|
try:
|
|
from micromate.idf_file import read_idf_file
|
|
res = read_idf_file(source_path)
|
|
idf_samples = res.samples or None
|
|
idf_intervals = res.intervals
|
|
is_histogram = res.intervals is not None
|
|
binary_md = res.binary_metadata
|
|
binary_peaks = res.event.peaks
|
|
except NotImplementedError:
|
|
# sig-B — codec doesn't handle this yet.
|
|
pass
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_idf: binary codec failed for %s: %s — "
|
|
"falling back to .txt-only ingest",
|
|
source_path.name, exc,
|
|
)
|
|
|
|
# 2. Parse the .txt sidecar (best-effort; non-fatal on failure).
|
|
report_dict: dict = {}
|
|
if idf_report_text is not None:
|
|
try:
|
|
report_dict = parse_idf_report(idf_report_text)
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_idf: report parse failed: %s — continuing without it",
|
|
exc,
|
|
)
|
|
|
|
# 3. Backfill report_dict with binary metadata for fields the
|
|
# .txt didn't supply. Binary takes precedence on tied fields
|
|
# where the binary is more reliable (timestamp, sample_rate),
|
|
# and fills in fields entirely missing from the .txt.
|
|
if binary_md is not None:
|
|
if binary_md.serial and not report_dict.get("serial_number"):
|
|
report_dict["serial_number"] = binary_md.serial
|
|
if binary_md.event_datetime and not report_dict.get("event_datetime"):
|
|
report_dict["event_datetime"] = binary_md.event_datetime
|
|
if binary_md.sample_rate and not report_dict.get("sample_rate"):
|
|
report_dict["sample_rate"] = binary_md.sample_rate
|
|
if binary_md.record_time_sec and not report_dict.get("record_time_sec"):
|
|
report_dict["record_time_sec"] = binary_md.record_time_sec
|
|
# Calibration date (binary) vs calibration text (.txt) cohabit
|
|
# under different keys; no overwrite needed.
|
|
if binary_md.event_datetime and not report_dict.get("event_type"):
|
|
report_dict["event_type"] = (
|
|
"Full Histogram" if is_histogram else "Full Waveform"
|
|
)
|
|
|
|
# Binary-derived peaks fill in when the .txt didn't supply them.
|
|
# They're ~3% low vs the device-authoritative .txt values (residual
|
|
# codec drift), so .txt always wins when present.
|
|
if binary_peaks is not None:
|
|
if binary_peaks.transverse_ips and not report_dict.get("tran_ppv"):
|
|
report_dict["tran_ppv"] = binary_peaks.transverse_ips
|
|
if binary_peaks.vertical_ips and not report_dict.get("vert_ppv"):
|
|
report_dict["vert_ppv"] = binary_peaks.vertical_ips
|
|
if binary_peaks.longitudinal_ips and not report_dict.get("long_ppv"):
|
|
report_dict["long_ppv"] = binary_peaks.longitudinal_ips
|
|
|
|
# 4. Build the typed IdfEvent. Filename is authoritative for
|
|
# (serial, timestamp, kind); the report's event_datetime takes
|
|
# precedence over the filename timestamp inside from_report().
|
|
idf_event = IdfEvent.from_report(report_dict, source_path.name)
|
|
|
|
# Operator-supplied serial_hint wins over the binary's filename
|
|
# prefix when both are present (e.g. callers passing a known-good
|
|
# serial that overrides a misnamed export).
|
|
serial = serial_hint or idf_event.serial or "UNKNOWN"
|
|
|
|
# 5. Filesystem write of binary bytes.
|
|
filename = source_path.name
|
|
bw_path = self._serial_dir(serial) / filename
|
|
bw_path.write_bytes(idf_bytes)
|
|
filesize = bw_path.stat().st_size
|
|
sha256 = event_file_io.file_sha256(bw_path)
|
|
|
|
# _waveform_key dedups (serial, timestamp) rows in the events
|
|
# table. Use the binary's sha256 (first 16 bytes) as a stable
|
|
# surrogate — every distinct binary maps to a distinct row.
|
|
waveform_key = bytes.fromhex(sha256)[:16]
|
|
|
|
# 6. Bridge to minimateplus.Event for the existing sidecar / DB
|
|
# insert paths. See IdfEvent.to_minimateplus_event() for the
|
|
# caveats of this bridge (mic units, missing fields → sidecar).
|
|
ev = idf_event.to_minimateplus_event(waveform_key)
|
|
|
|
# Attach the decoded sample arrays. Thor's decoder counts use
|
|
# LSB = 0.0003 in/s for geo (vs BW's 16-count units at 0.005 in/s)
|
|
# — the .h5 writer's geo_range="normal" yields LSB = 10/32768
|
|
# ≈ 0.000305 in/s, so plotted samples come out ~1.7% high.
|
|
# Acceptable known offset; refine with a Thor-aware h5 path later.
|
|
if idf_samples is not None:
|
|
ev.raw_samples = idf_samples
|
|
n_samples = max((len(idf_samples.get(ch, [])) for ch in ("Tran", "Vert", "Long", "MicL")), default=0)
|
|
ev.total_samples = ev.total_samples or n_samples
|
|
|
|
# 7. Write the .h5 clean-waveform file when we actually have samples.
|
|
# Histograms (IDFH) don't have waveform samples — skip h5 for those.
|
|
hdf5_filename: Optional[str] = None
|
|
if idf_samples is not None and not is_histogram:
|
|
hdf5_path = self.hdf5_path_for(serial, filename)
|
|
try:
|
|
event_hdf5.write_event_hdf5(
|
|
hdf5_path, ev,
|
|
serial=serial,
|
|
geo_range="normal", # Thor's geo full scale is also 10 in/s (Normal)
|
|
source_kind="idf-import",
|
|
)
|
|
hdf5_filename = hdf5_path.name
|
|
except Exception as exc:
|
|
log.warning(
|
|
"save_imported_idf: HDF5 write failed for %s: %s — continuing without .h5",
|
|
hdf5_path, exc,
|
|
)
|
|
|
|
# 8. Write the sidecar. Source kind "idf-import" is on the allow-list.
|
|
sidecar_path = self.sidecar_path_for(serial, filename)
|
|
existing_review = None
|
|
if sidecar_path.exists():
|
|
try:
|
|
existing_review = event_file_io.read_sidecar(sidecar_path).get("review")
|
|
except Exception:
|
|
pass
|
|
|
|
sidecar = event_file_io.event_to_sidecar_dict(
|
|
ev,
|
|
serial=serial,
|
|
blastware_filename=filename,
|
|
blastware_filesize=filesize,
|
|
blastware_sha256=sha256,
|
|
source_kind="idf-import",
|
|
a5_pickle_filename=None,
|
|
review=existing_review,
|
|
)
|
|
# Stash the full parsed IDF report under extensions so downstream
|
|
# consumers can recover the rich derived fields that don't fit
|
|
# the BW-shaped event model (Peak Acceleration / Displacement,
|
|
# Time of Peak, sensor self-check, calibration, firmware).
|
|
if report_dict:
|
|
sidecar["extensions"]["idf_report"] = report_dict
|
|
# For histograms, also stash the binary-decoded per-interval
|
|
# records so the UI / report layer doesn't need to re-walk the
|
|
# IDFH file at render time.
|
|
if idf_intervals is not None:
|
|
sidecar["extensions"]["idf_intervals"] = [
|
|
{
|
|
"offset": iv.offset,
|
|
"tran_peak": iv.peak_count("Tran"),
|
|
"tran_halfp": iv.tran_halfp,
|
|
"tran_freq": iv.freq_hz("Tran"),
|
|
"vert_peak": iv.peak_count("Vert"),
|
|
"vert_halfp": iv.vert_halfp,
|
|
"vert_freq": iv.freq_hz("Vert"),
|
|
"long_peak": iv.peak_count("Long"),
|
|
"long_halfp": iv.long_halfp,
|
|
"long_freq": iv.freq_hz("Long"),
|
|
"mic_peak": iv.peak_count("MicL"),
|
|
"mic_halfp": iv.micl_halfp,
|
|
"mic_freq": iv.freq_hz("MicL"),
|
|
}
|
|
for iv in idf_intervals
|
|
]
|
|
event_file_io.write_sidecar(sidecar_path, sidecar)
|
|
|
|
log.info(
|
|
"WaveformStore.save_imported_idf serial=%s filename=%s filesize=%d "
|
|
"kind=%s report_attached=%s binary_decoded=%s h5=%s intervals=%d",
|
|
serial, filename, filesize,
|
|
"histogram" if is_histogram else "waveform",
|
|
bool(report_dict),
|
|
(idf_samples is not None) or (idf_intervals is not None),
|
|
hdf5_filename or "(skipped)",
|
|
len(idf_intervals) if idf_intervals else 0,
|
|
)
|
|
return ev, {
|
|
"filename": filename,
|
|
"filesize": filesize,
|
|
"sha256": sha256,
|
|
"a5_pickle_filename": None,
|
|
"hdf5_filename": hdf5_filename,
|
|
"sidecar_filename": sidecar_path.name,
|
|
"serial": serial,
|
|
}
|
|
|
|
def load_a5(self, serial: str, filename: str) -> Optional[list[S3Frame]]:
|
|
"""
|
|
Re-hydrate the pickled A5 frame stream for a stored event.
|
|
Returns None if the sidecar is missing.
|
|
"""
|
|
_, a5_path = self.paths_for(serial, filename)
|
|
if not a5_path.exists():
|
|
return None
|
|
with a5_path.open("rb") as fp:
|
|
payload = pickle.load(fp)
|
|
if not isinstance(payload, dict) or "frames" not in payload:
|
|
log.warning("WaveformStore.load_a5: malformed sidecar at %s", a5_path)
|
|
return None
|
|
return [_dict_to_frame(d) for d in payload["frames"]]
|
|
|
|
# ── modern .sfm.json sidecar accessors ──────────────────────────────────────
|
|
|
|
def load_sidecar(self, serial: str, filename: str) -> Optional[dict]:
|
|
"""Return the parsed .sfm.json sidecar dict, or None if missing."""
|
|
path = self.sidecar_path_for(serial, filename)
|
|
if not path.exists():
|
|
return None
|
|
try:
|
|
return event_file_io.read_sidecar(path)
|
|
except Exception as exc:
|
|
log.warning("load_sidecar: failed to read %s: %s", path, exc)
|
|
return None
|
|
|
|
def patch_sidecar(
|
|
self,
|
|
serial: str,
|
|
filename: str,
|
|
*,
|
|
review: Optional[dict] = None,
|
|
extensions: Optional[dict] = None,
|
|
reviewer_now: bool = True,
|
|
) -> Optional[dict]:
|
|
"""
|
|
JSON-merge-patch the .sfm.json sidecar's review/extensions blocks.
|
|
Returns the new full dict, or None if the sidecar doesn't exist.
|
|
"""
|
|
path = self.sidecar_path_for(serial, filename)
|
|
if not path.exists():
|
|
return None
|
|
return event_file_io.patch_sidecar(
|
|
path,
|
|
review=review,
|
|
extensions=extensions,
|
|
reviewer_now=reviewer_now,
|
|
)
|
|
|
|
|
|
# ── helpers ─────────────────────────────────────────────────────────────────────
|
|
|
|
def _serial_from_bw_filename(name: str) -> Optional[str]:
|
|
"""
|
|
Reverse of `blastware_filename`'s serial-prefix encoding.
|
|
|
|
BW filename format (V10.72): `<P><serial3><stem4>.<ext>`
|
|
where P = chr(ord('B') + floor(serial // 1000))
|
|
and serial3 = f"{serial % 1000:03d}".
|
|
|
|
Examples (from CLAUDE.md verification archive):
|
|
P036... → BE14036 H907... → BE6907
|
|
M529... → BE11529 T003... → BE18003
|
|
|
|
Returns the inferred BE-prefix serial (e.g. "BE11529") or None when
|
|
the filename doesn't match the expected pattern.
|
|
"""
|
|
if not name:
|
|
return None
|
|
# First letter encodes the thousands group; next 3 chars encode the
|
|
# last 3 digits of the serial.
|
|
base = name.split(".", 1)[0]
|
|
if len(base) < 4 or not base[0].isalpha() or not base[1:4].isdigit():
|
|
return None
|
|
prefix_letter = base[0].upper()
|
|
if prefix_letter < "B":
|
|
return None
|
|
thousands = ord(prefix_letter) - ord("B")
|
|
serial_num = thousands * 1000 + int(base[1:4])
|
|
return f"BE{serial_num}"
|