534 lines
19 KiB
Python
534 lines
19 KiB
Python
"""
|
|
minimateplus/event_file_io.py — modern event-file (.sfm.json sidecar) IO.
|
|
|
|
This module is the single home for event-file conversion code that doesn't
|
|
fit cleanly inside `blastware_file.py` (which is the BW binary codec):
|
|
|
|
- sidecar JSON read/write (the modern per-event metadata file)
|
|
- read_blastware_file() — reverse of write_blastware_file, used by
|
|
the BW-importer flow when SFM is ingesting files produced by
|
|
Blastware's own ACH (where the source A5 frames aren't available).
|
|
|
|
Sidecar schema v1 layout — see docs in the project plan or the schema
|
|
declared in `event_to_sidecar_dict()`.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import datetime
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import struct
|
|
from pathlib import Path
|
|
from typing import Optional, Union
|
|
|
|
from .models import Event, PeakValues, ProjectInfo, Timestamp
|
|
from . import blastware_file as _bw # avoid circular reference at module load
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Schema version for the sidecar JSON. Bump when fields change shape.
|
|
# Older readers must reject anything > SCHEMA_VERSION; newer fields added
|
|
# inside `extensions` are forward-compatible without a bump.
|
|
SCHEMA_VERSION = 1
|
|
SIDECAR_KIND = "sfm.event"
|
|
|
|
# Default tool_version stamp; callers can override. Hard-coded here
|
|
# rather than read via importlib.metadata because the latter reflects the
|
|
# *installed* dist-info, which doesn't update when pyproject.toml is
|
|
# bumped without a `pip install` re-run — leading to confusing stale
|
|
# version stamps in sidecars. Bump this constant and CHANGELOG.md
|
|
# together at release time.
|
|
TOOL_VERSION = "0.15.0"
|
|
|
|
try:
|
|
# Best-effort: prefer the installed metadata when it's NEWER than the
|
|
# baked-in constant (e.g. a downstream packager bumped the wheel
|
|
# without editing this file). Otherwise fall back to TOOL_VERSION.
|
|
from importlib.metadata import version as _pkg_version
|
|
_meta_v = _pkg_version("seismo-relay")
|
|
def _vtuple(s):
|
|
try:
|
|
return tuple(int(p) for p in s.split(".")[:3])
|
|
except Exception:
|
|
return (0, 0, 0)
|
|
_TOOL_VERSION_DEFAULT = (
|
|
_meta_v if _vtuple(_meta_v) > _vtuple(TOOL_VERSION) else TOOL_VERSION
|
|
)
|
|
except Exception:
|
|
_TOOL_VERSION_DEFAULT = TOOL_VERSION
|
|
|
|
|
|
# ── Sidecar dict construction ─────────────────────────────────────────────────
|
|
|
|
|
|
def _ts_iso(ts: Optional[Timestamp]) -> Optional[str]:
|
|
if ts is None:
|
|
return None
|
|
try:
|
|
return datetime.datetime(
|
|
ts.year, ts.month, ts.day,
|
|
ts.hour or 0, ts.minute or 0, ts.second or 0,
|
|
).isoformat()
|
|
except Exception:
|
|
return str(ts)
|
|
|
|
|
|
def _peak_values_to_dict(pv: Optional[PeakValues]) -> dict:
|
|
if pv is None:
|
|
return {
|
|
"transverse": None,
|
|
"vertical": None,
|
|
"longitudinal": None,
|
|
"vector_sum": None,
|
|
"mic_psi": None,
|
|
}
|
|
return {
|
|
"transverse": pv.tran,
|
|
"vertical": pv.vert,
|
|
"longitudinal": pv.long,
|
|
"vector_sum": pv.peak_vector_sum,
|
|
"mic_psi": pv.micl,
|
|
}
|
|
|
|
|
|
def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict:
|
|
if pi is None:
|
|
return {
|
|
"project": None,
|
|
"client": None,
|
|
"operator": None,
|
|
"sensor_location": None,
|
|
}
|
|
return {
|
|
"project": pi.project,
|
|
"client": pi.client,
|
|
"operator": pi.operator,
|
|
"sensor_location": pi.sensor_location,
|
|
}
|
|
|
|
|
|
def event_to_sidecar_dict(
|
|
event: Event,
|
|
*,
|
|
serial: str,
|
|
blastware_filename: str,
|
|
blastware_filesize: int,
|
|
blastware_sha256: str,
|
|
source_kind: str = "sfm-live",
|
|
a5_pickle_filename: Optional[str] = None,
|
|
tool_version: str = _TOOL_VERSION_DEFAULT,
|
|
captured_at: Optional[datetime.datetime] = None,
|
|
review: Optional[dict] = None,
|
|
extensions: Optional[dict] = None,
|
|
) -> dict:
|
|
"""
|
|
Build a v1 sidecar dict from an Event + the surrounding metadata.
|
|
|
|
Pure helper — no file I/O. Callers stitch the result into a sidecar
|
|
via `write_sidecar()` (or POST it back via the PATCH endpoint).
|
|
"""
|
|
if source_kind not in {"sfm-live", "sfm-ach", "bw-import"}:
|
|
raise ValueError(f"unknown source_kind: {source_kind!r}")
|
|
|
|
captured_at = captured_at or datetime.datetime.utcnow()
|
|
|
|
# Stash raw 0C record bytes in `extensions.raw_records` so future
|
|
# field-decoding work (Peak Acceleration, ZC Freq, Time of Peak,
|
|
# sensor self-check results, etc.) can run offline against committed
|
|
# sidecars without a live device. Cheap (~280 bytes base64) and
|
|
# forward-compatible (older readers ignore unknown extensions keys).
|
|
ext_dict: dict = dict(extensions) if extensions else {}
|
|
raw_0c = getattr(event, "_raw_record", None)
|
|
if raw_0c:
|
|
rr = ext_dict.setdefault("raw_records", {})
|
|
# Don't clobber a raw_0c that callers explicitly passed in via
|
|
# `extensions=...` (e.g. round-trip preservation in patch_sidecar).
|
|
rr.setdefault("waveform_record_b64", base64.b64encode(raw_0c).decode("ascii"))
|
|
rr.setdefault("waveform_record_len", len(raw_0c))
|
|
|
|
return {
|
|
"schema_version": SCHEMA_VERSION,
|
|
"kind": SIDECAR_KIND,
|
|
|
|
"event": {
|
|
"serial": serial,
|
|
"timestamp": _ts_iso(event.timestamp),
|
|
"waveform_key": event._waveform_key.hex() if event._waveform_key else None,
|
|
"record_type": event.record_type,
|
|
"sample_rate": event.sample_rate,
|
|
"rectime_seconds": event.rectime_seconds,
|
|
"total_samples": event.total_samples,
|
|
"pretrig_samples": event.pretrig_samples,
|
|
},
|
|
|
|
"peak_values": _peak_values_to_dict(event.peak_values),
|
|
"project_info": _project_info_to_dict(event.project_info),
|
|
|
|
"blastware": {
|
|
"filename": blastware_filename,
|
|
"filesize": blastware_filesize,
|
|
"sha256": blastware_sha256,
|
|
"available": True,
|
|
},
|
|
|
|
"source": {
|
|
"kind": source_kind,
|
|
"captured_at": captured_at.isoformat() + "Z" if captured_at.tzinfo is None else captured_at.isoformat(),
|
|
"tool_version": tool_version,
|
|
"a5_pickle_filename": a5_pickle_filename,
|
|
},
|
|
|
|
"review": review or {
|
|
"false_trigger": False,
|
|
"reviewer": None,
|
|
"reviewed_at": None,
|
|
"notes": "",
|
|
},
|
|
|
|
"extensions": ext_dict,
|
|
}
|
|
|
|
|
|
# ── Sidecar IO ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def write_sidecar(path: Union[str, Path], data: dict) -> None:
|
|
"""
|
|
Atomic write of a sidecar dict to <path>.
|
|
|
|
Validates schema_version is supported before writing so we don't
|
|
silently drop a future-format sidecar over the wire.
|
|
"""
|
|
path = Path(path)
|
|
sv = data.get("schema_version")
|
|
if not isinstance(sv, int) or sv < 1 or sv > SCHEMA_VERSION:
|
|
raise ValueError(
|
|
f"write_sidecar: unsupported schema_version={sv!r} "
|
|
f"(this build supports 1..{SCHEMA_VERSION})"
|
|
)
|
|
|
|
tmp = path.with_suffix(path.suffix + ".tmp")
|
|
with tmp.open("w", encoding="utf-8") as f:
|
|
json.dump(data, f, indent=2, sort_keys=False, default=str)
|
|
f.write("\n")
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
os.replace(tmp, path)
|
|
|
|
|
|
def read_sidecar(path: Union[str, Path]) -> dict:
|
|
"""
|
|
Load a sidecar JSON file.
|
|
|
|
Raises FileNotFoundError if missing, ValueError on bad shape /
|
|
unsupported schema_version. Unknown keys at the top level are
|
|
preserved in the returned dict (forward-compat).
|
|
"""
|
|
path = Path(path)
|
|
with path.open("r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
if not isinstance(data, dict):
|
|
raise ValueError(f"sidecar at {path}: top-level is not a JSON object")
|
|
sv = data.get("schema_version")
|
|
if not isinstance(sv, int) or sv < 1:
|
|
raise ValueError(f"sidecar at {path}: missing or invalid schema_version")
|
|
if sv > SCHEMA_VERSION:
|
|
raise ValueError(
|
|
f"sidecar at {path}: schema_version={sv} > supported {SCHEMA_VERSION}; "
|
|
"upgrade seismo-relay to read this file"
|
|
)
|
|
if data.get("kind") != SIDECAR_KIND:
|
|
raise ValueError(f"sidecar at {path}: unexpected kind={data.get('kind')!r}")
|
|
return data
|
|
|
|
|
|
def patch_sidecar(
|
|
path: Union[str, Path],
|
|
*,
|
|
review: Optional[dict] = None,
|
|
extensions: Optional[dict] = None,
|
|
reviewer_now: bool = True,
|
|
) -> dict:
|
|
"""
|
|
Atomically apply a JSON-merge-patch to a sidecar file's `review`
|
|
and/or `extensions` blocks. Other top-level keys are untouched.
|
|
|
|
`review_now`: when True (default) and `review` is non-empty, stamps
|
|
`review.reviewed_at` with the current UTC time so the review-time is
|
|
auditable without the caller having to pass it.
|
|
|
|
Returns the new full sidecar dict.
|
|
"""
|
|
path = Path(path)
|
|
data = read_sidecar(path)
|
|
|
|
if review:
|
|
merged = dict(data.get("review") or {})
|
|
merged.update({k: v for k, v in review.items() if v is not None or k in merged})
|
|
if reviewer_now:
|
|
merged["reviewed_at"] = datetime.datetime.utcnow().isoformat() + "Z"
|
|
data["review"] = merged
|
|
|
|
if extensions:
|
|
merged_ext = dict(data.get("extensions") or {})
|
|
merged_ext.update(extensions)
|
|
data["extensions"] = merged_ext
|
|
|
|
write_sidecar(path, data)
|
|
return data
|
|
|
|
|
|
def sidecar_path_for(blastware_path: Union[str, Path]) -> Path:
|
|
"""Convention: <bw_path>.sfm.json sits next to the BW binary."""
|
|
p = Path(blastware_path)
|
|
return p.with_name(p.name + ".sfm.json")
|
|
|
|
|
|
def file_sha256(path: Union[str, Path], chunk_size: int = 65536) -> str:
|
|
"""Compute SHA-256 of a file as a hex string."""
|
|
h = hashlib.sha256()
|
|
with open(path, "rb") as f:
|
|
while True:
|
|
chunk = f.read(chunk_size)
|
|
if not chunk:
|
|
break
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
# ── Blastware-file reader ─────────────────────────────────────────────────────
|
|
#
|
|
# Reverse of `blastware_file.write_blastware_file`. Used by the BW-import
|
|
# flow to ingest files produced by Blastware's own ACH (where the source
|
|
# A5 frames are not available).
|
|
#
|
|
# File structure (recap):
|
|
# [22B header] [21B STRT record] [body bytes] [26B footer]
|
|
#
|
|
# The body holds:
|
|
# - 6B preamble (00 00 ff ff ff ff) immediately after the STRT
|
|
# - 4-channel interleaved int16 LE samples
|
|
# - Embedded ASCII metadata strings (Project: / Client: / User Name: /
|
|
# Seis Loc: / Extended Notes) from the device's session-start config
|
|
#
|
|
# The 0C waveform record (per-event peaks, project name) is NOT in the
|
|
# BW file — those are computed by the device firmware and only carried
|
|
# in the live SUB 0C response. read_blastware_file() therefore computes
|
|
# peaks from the raw samples assuming Normal-range (10 in/s full-scale)
|
|
# geophone sensitivity. Imported events surface that assumption via the
|
|
# sidecar's `peak_values.computed_from_samples` flag.
|
|
|
|
|
|
# Geophone scale factor, in/s per ADC unit, for Normal range (10 in/s FS).
|
|
# Confirmed from CLAUDE.md (geo_hardware_constant = 6.206053 in/s per V,
|
|
# ADC full-scale = 1.61133 V Normal range = 10.0 in/s peak; per-count
|
|
# resolution ≈ 10.0 / 32768).
|
|
_GEO_NORMAL_FS_INS = 10.0
|
|
_GEO_SENSITIVE_FS_INS = 1.250
|
|
_INT16_FS = 32768.0
|
|
|
|
# Microphone scale factor, psi per ADC count. Approximate — exact factor
|
|
# depends on the geophone-vs-mic ADC scaling and the firmware reference.
|
|
# We mark mic_psi as "computed approximate" in the sidecar.
|
|
_MIC_FS_PSI = 0.0125 / _INT16_FS # ~0.5 psi full-scale assumption
|
|
|
|
|
|
def _decode_strt(strt: bytes) -> dict:
|
|
"""
|
|
Decode the 21-byte STRT record from a BW file.
|
|
|
|
Returns dict with waveform_key (4B), total_samples, pretrig_samples,
|
|
rectime_seconds. Falls back to None on truncated/missing fields.
|
|
"""
|
|
if len(strt) < 21 or strt[0:4] != b"STRT":
|
|
return {}
|
|
return {
|
|
"waveform_key": strt[6:10].hex(),
|
|
"total_samples": struct.unpack_from(">H", strt, 8)[0],
|
|
"pretrig_samples": struct.unpack_from(">H", strt, 16)[0],
|
|
"rectime_seconds": strt[18],
|
|
}
|
|
|
|
|
|
def _find_first_string(buf: bytes, label: bytes, max_len: int = 256) -> Optional[str]:
|
|
"""
|
|
Search `buf` for `label` (e.g. b"Project:") and return the
|
|
null-terminated ASCII string that follows, stripped.
|
|
"""
|
|
pos = buf.find(label)
|
|
if pos < 0:
|
|
return None
|
|
start = pos + len(label)
|
|
end = buf.find(b"\x00", start, start + max_len)
|
|
if end < 0:
|
|
end = start + max_len
|
|
text = buf[start:end].decode("ascii", errors="replace").strip()
|
|
return text or None
|
|
|
|
|
|
def _decode_samples_4ch_int16_le(stream: bytes) -> dict[str, list[int]]:
|
|
"""
|
|
Decode a 4-channel interleaved int16 LE byte stream into per-channel
|
|
lists. Channels are [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3].
|
|
Truncates to a multiple of 8 bytes (one full sample-set).
|
|
"""
|
|
n_complete = (len(stream) // 8) * 8
|
|
if n_complete == 0:
|
|
return {"Tran": [], "Vert": [], "Long": [], "MicL": []}
|
|
fmt = "<" + "h" * (n_complete // 2)
|
|
flat = list(struct.unpack(fmt, stream[:n_complete]))
|
|
return {
|
|
"Tran": flat[0::4],
|
|
"Vert": flat[1::4],
|
|
"Long": flat[2::4],
|
|
"MicL": flat[3::4],
|
|
}
|
|
|
|
|
|
def _peaks_from_samples(samples: dict[str, list[int]]) -> PeakValues:
|
|
"""
|
|
Compute approximate peaks from raw int16 samples assuming Normal-range
|
|
geophone sensitivity. Used by the BW-importer when the 0C waveform
|
|
record (the device's authoritative peaks) is unavailable.
|
|
"""
|
|
def _peak_ins(ch: list[int]) -> float:
|
|
if not ch:
|
|
return 0.0
|
|
m = max(abs(int(v)) for v in ch)
|
|
return m / _INT16_FS * _GEO_NORMAL_FS_INS
|
|
|
|
tran = _peak_ins(samples.get("Tran", []))
|
|
vert = _peak_ins(samples.get("Vert", []))
|
|
long_ = _peak_ins(samples.get("Long", []))
|
|
|
|
# Mic in psi (approximate)
|
|
mic_ch = samples.get("MicL", []) or []
|
|
mic = max((abs(int(v)) for v in mic_ch), default=0) * _MIC_FS_PSI
|
|
|
|
# Peak vector sum: max over time of sqrt(T^2 + V^2 + L^2)
|
|
pvs = 0.0
|
|
n = min(len(samples.get("Tran", [])), len(samples.get("Vert", [])), len(samples.get("Long", [])))
|
|
if n:
|
|
scale = _GEO_NORMAL_FS_INS / _INT16_FS
|
|
T = samples["Tran"]; V = samples["Vert"]; L = samples["Long"]
|
|
for i in range(n):
|
|
t = T[i] * scale
|
|
v = V[i] * scale
|
|
l = L[i] * scale
|
|
mag = (t*t + v*v + l*l) ** 0.5
|
|
if mag > pvs:
|
|
pvs = mag
|
|
|
|
return PeakValues(
|
|
tran=tran, vert=vert, long=long_,
|
|
peak_vector_sum=pvs, micl=mic,
|
|
)
|
|
|
|
|
|
def read_blastware_file(path: Union[str, Path]) -> Event:
|
|
"""
|
|
Parse a Blastware waveform file into an Event.
|
|
|
|
Recovers:
|
|
- waveform_key, rectime_seconds, total_samples, pretrig_samples
|
|
(from the STRT record)
|
|
- timestamp (from the footer's start-time field)
|
|
- project_info (from ASCII labels embedded in the body)
|
|
- raw_samples (Tran/Vert/Long/MicL int16 lists)
|
|
- peak_values (computed from raw_samples; approximate — see notes
|
|
on _peaks_from_samples about Normal-range assumption)
|
|
|
|
Does NOT recover the source A5 frames (they aren't in the BW file).
|
|
The returned Event has `_a5_frames = None`, signalling that
|
|
byte-for-byte regeneration of the BW file from this Event alone is
|
|
not possible — the on-disk BW file IS the byte-for-byte source.
|
|
"""
|
|
path = Path(path)
|
|
raw = path.read_bytes()
|
|
if len(raw) < _bw._WAVEFORM_HEADER_SIZE + 21 + 26:
|
|
raise ValueError(f"{path}: file too short ({len(raw)} bytes) to be a BW event")
|
|
|
|
# Header: validate magic prefix.
|
|
header = raw[:_bw._WAVEFORM_HEADER_SIZE]
|
|
if not header.startswith(_bw._FILE_HEADER_PREFIX):
|
|
raise ValueError(f"{path}: not a Blastware file (bad header prefix)")
|
|
|
|
# STRT record: 21 bytes immediately after the header.
|
|
strt_raw = raw[_bw._WAVEFORM_HEADER_SIZE : _bw._WAVEFORM_HEADER_SIZE + 21]
|
|
strt_fields = _decode_strt(strt_raw)
|
|
if not strt_fields:
|
|
raise ValueError(f"{path}: STRT record missing or malformed")
|
|
|
|
# Footer: locate the 0e 08 marker, validating the year is in a sane range.
|
|
body_start = _bw._WAVEFORM_HEADER_SIZE + 21
|
|
footer_pos = -1
|
|
pos = body_start
|
|
while True:
|
|
pos = raw.find(b"\x0e\x08", pos)
|
|
if pos < 0 or pos + 26 > len(raw):
|
|
break
|
|
yr = (raw[pos + 4] << 8) | raw[pos + 5]
|
|
if 2015 <= yr <= 2050:
|
|
footer_pos = pos
|
|
break
|
|
pos += 1
|
|
|
|
if footer_pos < 0 and len(raw) >= 26:
|
|
footer_pos = len(raw) - 26
|
|
if footer_pos < body_start:
|
|
raise ValueError(f"{path}: footer not found")
|
|
|
|
body = raw[body_start : footer_pos]
|
|
footer = raw[footer_pos : footer_pos + 26]
|
|
|
|
# Footer layout:
|
|
# [0:2] 0e 08 marker
|
|
# [2:10] ts1 (start) BE 8B
|
|
# [10:18] ts2 (stop) BE 8B
|
|
# [18:24] 00 01 00 02 00 00
|
|
# [24:26] crc
|
|
ts1 = _bw._decode_ts_be(footer[2:10])
|
|
ts2 = _bw._decode_ts_be(footer[10:18])
|
|
|
|
# Body: first 6 bytes are the preamble (00 00 ff ff ff ff). Strip
|
|
# them before decoding samples. Any trailing tail past the last
|
|
# full sample-set is silently truncated by _decode_samples_4ch.
|
|
sample_bytes = body[6:] if body[:6].hex() in ("0000ffffffff", "0000FFFFFFFF") else body
|
|
samples = _decode_samples_4ch_int16_le(sample_bytes)
|
|
|
|
# Metadata strings (label-anchored search across the body).
|
|
project = _find_first_string(body, b"Project:")
|
|
client = _find_first_string(body, b"Client:")
|
|
user = _find_first_string(body, b"User Name:")
|
|
seisloc = _find_first_string(body, b"Seis Loc:")
|
|
|
|
# Build the Event.
|
|
ev = Event(index=-1)
|
|
if strt_fields.get("waveform_key"):
|
|
ev._waveform_key = bytes.fromhex(strt_fields["waveform_key"])
|
|
ev.record_type = "Waveform"
|
|
ev.rectime_seconds = strt_fields.get("rectime_seconds")
|
|
ev.total_samples = strt_fields.get("total_samples")
|
|
ev.pretrig_samples = strt_fields.get("pretrig_samples")
|
|
|
|
if ts1 is not None:
|
|
ev.timestamp = Timestamp(
|
|
raw=footer[2:10],
|
|
flag=0x10,
|
|
year=ts1.year, unknown_byte=0, month=ts1.month, day=ts1.day,
|
|
hour=ts1.hour, minute=ts1.minute, second=ts1.second,
|
|
)
|
|
|
|
ev.project_info = ProjectInfo(
|
|
project=project, client=client, operator=user, sensor_location=seisloc,
|
|
)
|
|
ev.raw_samples = samples
|
|
ev.peak_values = _peaks_from_samples(samples)
|
|
ev._a5_frames = None # not recoverable from BW file
|
|
|
|
return ev
|