feat(import): v0.16.0 - Fully implemented series 3 BW-ACH pipeline stablized. #19

Merged
serversdown merged 9 commits from ach-report-ingestion into main 2026-05-11 15:55:24 -04:00
6 changed files with 1097 additions and 24 deletions
Showing only changes of commit cdfe4ad3c8 - Show all commits
+468
View File
@@ -0,0 +1,468 @@
"""
minimateplus/bw_ascii_report.py — parser for Blastware's per-event ASCII
report (the .TXT file BW writes alongside each saved event binary).
The ASCII export is the authoritative source for every "rich" per-event
field that BW computes from the waveform but never persists in the BW
binary itself:
- Per-channel PPV (Tran / Vert / Long / MicL)
- Peak Vector Sum + Peak Vector Sum Time
- Per-channel ZC Freq, Time of Peak, Peak Acceleration, Peak Displacement
- MicL PSPL, MicL Time of Peak, MicL ZC Freq
- Per-channel Sensor Self-Check (Test Freq / Test Ratio / Test Results)
- MicL Test Amplitude (mV)
- Battery, calibration date, monitor-log timestamps
Persisting these values into the SFM database lets the monthly-summary
review workflow ("show me events at Location X with PVS > 0.5") work
without depending on the (still-undecoded) waveform body codec.
Format (verified against decode-re/5-8-26 4-event bundle):
- One field per line, wrapped in double quotes: `"Field Name : Value"`
- Field/value separator: literal ` : ` (space-colon-space).
- Some field names contain an internal `:` already (e.g. `"Project:"`),
so we split on the FIRST ` : ` only.
- Some fields have unit suffixes: `"0.500 in/s"` / `"7.5 Hz"` / `"533 mv"`.
- A `"Monitor Log(s)"` marker line is followed by tab-separated rows
of `start_time<TAB>stop_time<TAB>description`.
- Final `"PC SW Version : ..."` line ends the metadata block.
- A blank line separates metadata from the sample table.
- Sample table starts with ` Tran <TAB> Vert <TAB>...`, then
one row per sample (tab-separated, right-padded numeric values).
- Geo channel values are in in/s; MicL in dB(L) (or 0.000 below threshold).
Because some metadata fields have whitespace quirks ("MicL Time of
Peak" has two spaces; the leading "Project:" value has its own colon),
we normalise whitespace in the key before lookup.
"""
from __future__ import annotations
import datetime
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
# ─────────────────────────────────────────────────────────────────────────────
# Output dataclasses
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class ChannelStats:
"""Per-channel derived stats, populated from an event report."""
ppv_ips: Optional[float] = None # in/s (geo channels only)
zc_freq_hz: Optional[float] = None # Hz
time_of_peak_s: Optional[float] = None # seconds (relative to trigger; can be negative)
peak_accel_g: Optional[float] = None # g (geo channels only)
peak_disp_in: Optional[float] = None # in (geo channels only)
@dataclass
class MicStats:
"""MicL-specific stats."""
weighting: Optional[str] = None # e.g. "Linear Weighting"
pspl_dbl: Optional[float] = None # dB(L)
zc_freq_hz: Optional[float] = None
time_of_peak_s: Optional[float] = None
@dataclass
class SensorCheck:
"""Per-channel sensor self-check result.
Geo channels report a frequency + ratio; MicL reports a frequency +
amplitude (mV). All channels also have a Pass/Fail string.
"""
test_freq_hz: Optional[float] = None
test_ratio: Optional[float] = None # geo channels only
test_amplitude_mv: Optional[float] = None # MicL only
test_results: Optional[str] = None # "Passed" / "Failed"
@dataclass
class MonitorLogEntry:
"""One row of the trailing Monitor Log(s) block."""
start_time: Optional[datetime.datetime] = None
stop_time: Optional[datetime.datetime] = None
description: Optional[str] = None
@dataclass
class BwAsciiReport:
"""Structured representation of one BW per-event ASCII export."""
# ── Identity ─────────────────────────────────────────────────────────────
event_type: Optional[str] = None # e.g. "Full Waveform"
serial: Optional[str] = None # e.g. "BE11529"
version: Optional[str] = None # firmware version line
file_name: Optional[str] = None # e.g. "M529LK44.AB0"
event_datetime: Optional[datetime.datetime] = None # parsed from Event Time + Event Date
# ── Trigger / recording config ──────────────────────────────────────────
trigger_channel: Optional[str] = None # e.g. "Vert" or "From Unit"
geo_trigger_level_ips: Optional[float] = None
pretrig_s: Optional[float] = None # negative seconds
record_time_s: Optional[float] = None
record_stop_mode: Optional[str] = None
sample_rate_sps: Optional[int] = None
battery_volts: Optional[float] = None
calibration_date: Optional[datetime.date] = None
calibration_by: Optional[str] = None # e.g. "Instantel"
units: Optional[str] = None # e.g. "in/s and dB(L)"
# ── Operator-supplied metadata ──────────────────────────────────────────
project: Optional[str] = None
client: Optional[str] = None
operator: Optional[str] = None # User Name
sensor_location: Optional[str] = None # Seis Loc
# ── Geo channel scaling ─────────────────────────────────────────────────
geo_range_ips: Optional[float] = None # 10.000 / 1.250
# ── Per-channel derived stats (geo + mic) ───────────────────────────────
channels: Dict[str, ChannelStats] = field(default_factory=dict)
mic: MicStats = field(default_factory=MicStats)
# ── Vector sum ──────────────────────────────────────────────────────────
peak_vector_sum_ips: Optional[float] = None
peak_vector_sum_time_s: Optional[float] = None
# ── Sensor self-check (per channel) ─────────────────────────────────────
sensor_check: Dict[str, SensorCheck] = field(default_factory=dict)
# ── Monitor log + tooling version ───────────────────────────────────────
monitor_log: List[MonitorLogEntry] = field(default_factory=list)
pc_sw_version: Optional[str] = None
# ── Sample table (optional; only parsed if requested) ───────────────────
# Each entry: (Tran, Vert, Long, MicL) in the report's units (geo
# channels in in/s, MicL in dB(L)). None when parse_samples=False.
samples: Optional[List[Tuple[float, float, float, float]]] = None
# ─────────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────────
_KEY_NORMALISE_RE = re.compile(r"\s+")
_NUMERIC_RE = re.compile(r"^-?\d+(?:\.\d+)?")
def _normalise_key(k: str) -> str:
"""Collapse whitespace runs (incl. tabs) and strip — handles BW's
"MicL Time of Peak" double-space and leading-colon quirks."""
return _KEY_NORMALISE_RE.sub(" ", k).strip()
def _strip_quotes(line: str) -> str:
line = line.rstrip("\r\n")
if len(line) >= 2 and line.startswith('"') and line.endswith('"'):
return line[1:-1]
return line
def _parse_number(value: str) -> Optional[float]:
"""Pull the leading numeric portion out of a value like "0.500 in/s"."""
m = _NUMERIC_RE.match(value.strip())
if not m:
return None
try:
return float(m.group(0))
except ValueError:
return None
def _parse_int(value: str) -> Optional[int]:
n = _parse_number(value)
return None if n is None else int(round(n))
# Months exactly as BW writes them.
_MONTHS = {
"January": 1, "February": 2, "March": 3, "April": 4,
"May": 5, "June": 6, "July": 7, "August": 8,
"September": 9, "October": 10, "November": 11, "December": 12,
# Short forms used in monitor-log rows ("Apr 23 /26").
"Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "Jun": 6, "Jul": 7,
"Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12,
}
def _parse_event_date(s: str) -> Optional[datetime.date]:
"""Parse "April 23, 2026" or "May 8, 2026" → date."""
s = s.strip()
parts = s.replace(",", " ").split()
if len(parts) < 3:
return None
month_name, day_str, year_str = parts[0], parts[1], parts[2]
month = _MONTHS.get(month_name)
if month is None:
return None
try:
return datetime.date(int(year_str), month, int(day_str))
except ValueError:
return None
def _parse_event_time(s: str) -> Optional[datetime.time]:
"""Parse "15:56:35" → time."""
s = s.strip()
try:
h, m, sec = s.split(":")
return datetime.time(int(h), int(m), int(sec))
except (ValueError, IndexError):
return None
def _parse_calibration(value: str) -> Tuple[Optional[datetime.date], Optional[str]]:
"""Parse "April 29, 2025 by Instantel" → (date, "Instantel")."""
parts = value.split(" by ", 1)
date = _parse_event_date(parts[0])
by = parts[1].strip() if len(parts) > 1 else None
return date, by
def _parse_monitor_row(line: str) -> Optional[MonitorLogEntry]:
"""Parse a tab-separated monitor log row.
Format: `<start>\t<stop>\t<desc>` where each timestamp is BW's
short form "Mon DD /YY HH:MM:SS" (e.g. "Apr 23 /26 15:46:16").
Year is encoded as a 2-digit suffix; we expand "/26" → 2026.
"""
parts = line.split("\t")
if len(parts) < 2:
return None
start = _parse_monitor_ts(parts[0])
stop = _parse_monitor_ts(parts[1])
desc = parts[2].strip() if len(parts) > 2 else None
if start is None and stop is None and not desc:
return None
return MonitorLogEntry(start_time=start, stop_time=stop, description=desc)
def _parse_monitor_ts(s: str) -> Optional[datetime.datetime]:
"""Parse "Apr 23 /26 15:46:16" → datetime."""
s = s.strip()
parts = s.split()
if len(parts) < 4:
return None
month = _MONTHS.get(parts[0])
if month is None:
return None
try:
day = int(parts[1])
# parts[2] looks like "/26" → century-flip to 2026
yy = int(parts[2].lstrip("/"))
year = 2000 + yy if yy < 80 else 1900 + yy
h, m, sec = (int(x) for x in parts[3].split(":"))
return datetime.datetime(year, month, day, h, m, sec)
except (ValueError, IndexError):
return None
# ─────────────────────────────────────────────────────────────────────────────
# Top-level parser
# ─────────────────────────────────────────────────────────────────────────────
def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwAsciiReport:
"""Parse a BW per-event ASCII export into a structured BwAsciiReport.
Set ``parse_samples=True`` to also populate ``report.samples`` with
the trailing sample table. Default False because the table is
huge and most callers only want metadata for indexing.
"""
if isinstance(text, bytes):
text = text.decode("ascii", errors="replace")
report = BwAsciiReport()
# Pre-create channel stat slots so callers can rely on them existing.
for ch in ("Tran", "Vert", "Long", "MicL"):
report.channels.setdefault(ch, ChannelStats())
report.sensor_check.setdefault(ch, SensorCheck())
lines = text.splitlines()
i = 0
n = len(lines)
in_monitor_log_section = False
event_time_str: Optional[str] = None
event_date: Optional[datetime.date] = None
while i < n:
raw_line = lines[i]
i += 1
# Blank line marks the start of the sample table.
if raw_line.strip() == "":
break
line = _strip_quotes(raw_line)
# Monitor log section: "Monitor Log(s)" header followed by N rows
# (still inside double-quoted lines), terminated by a non-row line
# like "PC SW Version : ..." or a blank line.
if not in_monitor_log_section and line.strip() == "Monitor Log(s)":
in_monitor_log_section = True
continue
if in_monitor_log_section:
# Heuristic: monitor rows contain a tab; the next "Field : Value"
# line ends the section.
if "\t" in line:
entry = _parse_monitor_row(line)
if entry:
report.monitor_log.append(entry)
continue
# Falls through to the field parser below; clear the flag.
in_monitor_log_section = False
# "Field : Value" — split on FIRST occurrence of " : "
idx = line.find(" : ")
if idx < 0:
continue
key = _normalise_key(line[:idx])
value = line[idx + 3 :].strip()
# ── Identity / config ────────────────────────────────────────────────
if key == "Event Type": report.event_type = value
elif key == "Serial Number": report.serial = value
elif key == "Version": report.version = value
elif key == "File Name": report.file_name = value
elif key == "Event Time": event_time_str = value
elif key == "Event Date": event_date = _parse_event_date(value)
elif key == "Trigger": report.trigger_channel = value
elif key == "Geo Trigger Level": report.geo_trigger_level_ips = _parse_number(value)
elif key == "Pre-trigger Length": report.pretrig_s = _parse_number(value)
elif key == "Record Time": report.record_time_s = _parse_number(value)
elif key == "Record Stop Mode": report.record_stop_mode = value
elif key == "Sample Rate": report.sample_rate_sps = _parse_int(value)
elif key == "Battery Level": report.battery_volts = _parse_number(value)
elif key == "Calibration":
report.calibration_date, report.calibration_by = _parse_calibration(value)
elif key == "Units": report.units = value
# Project labels in BW carry their own trailing colon — after
# _normalise_key we just strip it for matching.
elif key.rstrip(":") == "Project": report.project = value
elif key.rstrip(":") == "Client": report.client = value
elif key.rstrip(":") == "User Name":report.operator = value
elif key.rstrip(":") == "Seis Loc": report.sensor_location = value
elif key == "Geo Range": report.geo_range_ips = _parse_number(value)
# ── Per-channel stats ────────────────────────────────────────────────
# All match the pattern "{Channel} <stat-name>"
elif key in (
"Tran PPV", "Vert PPV", "Long PPV",
"Tran ZC Freq", "Vert ZC Freq", "Long ZC Freq",
"Tran Time of Peak", "Vert Time of Peak", "Long Time of Peak",
"Tran Peak Acceleration", "Vert Peak Acceleration", "Long Peak Acceleration",
"Tran Peak Displacement", "Vert Peak Displacement", "Long Peak Displacement",
):
ch_name, stat = key.split(" ", 1)
cs = report.channels.setdefault(ch_name, ChannelStats())
num = _parse_number(value)
if stat == "PPV": cs.ppv_ips = num
elif stat == "ZC Freq": cs.zc_freq_hz = num
elif stat == "Time of Peak": cs.time_of_peak_s = num
elif stat == "Peak Acceleration": cs.peak_accel_g = num
elif stat == "Peak Displacement": cs.peak_disp_in = num
# ── Vector Sum ───────────────────────────────────────────────────────
elif key == "Peak Vector Sum":
report.peak_vector_sum_ips = _parse_number(value)
elif key == "Peak Vector Sum Time":
report.peak_vector_sum_time_s = _parse_number(value)
# ── Microphone block ────────────────────────────────────────────────
elif key == "Microphone":
report.mic.weighting = value
elif key == "MicL PSPL":
report.mic.pspl_dbl = _parse_number(value)
# Mirror onto the "MicL" entry in channels so callers querying
# `channels["MicL"].ppv_ips` see something — but it's dB(L), not
# in/s, so we store as-is in the MicStats and mark the channel.
elif key == "MicL Time of Peak":
report.mic.time_of_peak_s = _parse_number(value)
cs = report.channels.setdefault("MicL", ChannelStats())
cs.time_of_peak_s = report.mic.time_of_peak_s
elif key == "MicL ZC Freq":
report.mic.zc_freq_hz = _parse_number(value)
cs = report.channels.setdefault("MicL", ChannelStats())
cs.zc_freq_hz = report.mic.zc_freq_hz
# ── Sensor self-check ────────────────────────────────────────────────
elif key in (
"Tran Test Freq", "Vert Test Freq", "Long Test Freq", "MicL Test Freq",
"Tran Test Ratio", "Vert Test Ratio", "Long Test Ratio",
"MicL Test Amplitude",
"Tran Test Results", "Vert Test Results", "Long Test Results", "MicL Test Results",
):
ch_name, stat = key.split(" ", 1)
sc = report.sensor_check.setdefault(ch_name, SensorCheck())
if stat == "Test Freq": sc.test_freq_hz = _parse_number(value)
elif stat == "Test Ratio": sc.test_ratio = _parse_number(value)
elif stat == "Test Amplitude": sc.test_amplitude_mv = _parse_number(value)
elif stat == "Test Results": sc.test_results = value
# ── Trailer ─────────────────────────────────────────────────────────
elif key == "PC SW Version":
report.pc_sw_version = value
# Unknown keys are silently dropped — forward-compat for future
# BW versions that may add fields.
# Combine event date + time into a datetime
if event_date is not None and event_time_str is not None:
t = _parse_event_time(event_time_str)
if t is not None:
report.event_datetime = datetime.datetime.combine(event_date, t)
if parse_samples:
report.samples = _parse_sample_table(lines, i)
return report
def _parse_sample_table(
lines: List[str], start: int,
) -> List[Tuple[float, float, float, float]]:
"""Parse the trailing sample table.
The table starts with a header row (" Tran <TAB>...") and continues
until EOF. Each data row is a tab-separated quartet of numeric values.
"""
samples: List[Tuple[float, float, float, float]] = []
seen_header = False
for line in lines[start:]:
line = line.rstrip("\r\n")
if not line.strip():
continue
cols = [c.strip() for c in line.split("\t") if c.strip()]
if not seen_header:
# Header row contains channel names; numeric rows don't.
if any(c in ("Tran", "Vert", "Long", "MicL") for c in cols):
seen_header = True
continue
if len(cols) < 4:
continue
try:
samples.append((
float(cols[0]), float(cols[1]),
float(cols[2]), float(cols[3]),
))
except ValueError:
continue
return samples
def parse_report_file(
path: Union[str, Path], *, parse_samples: bool = False,
) -> BwAsciiReport:
"""Convenience: read a .TXT file from disk and parse it."""
return parse_report(Path(path).read_bytes(), parse_samples=parse_samples)
+183 -8
View File
@@ -26,6 +26,12 @@ from typing import Optional, Union
from .models import Event, PeakValues, ProjectInfo, Timestamp
from . import blastware_file as _bw # avoid circular reference at module load
from .bw_ascii_report import BwAsciiReport
# Reference pressure for dB(L) → psi conversion (20 µPa expressed in psi).
# Same constant as sfm/sfm_webapp.html so server-side and browser-side
# conversions agree.
_DBL_REF_PSI = 2.9e-9
log = logging.getLogger(__name__)
@@ -94,6 +100,101 @@ def _peak_values_to_dict(pv: Optional[PeakValues]) -> dict:
}
def _bw_report_to_dict(report: BwAsciiReport) -> dict:
"""Project a parsed BW ASCII report into the sidecar's `bw_report` block.
All fields are rendered as plain JSON-compatible types (no datetime
objects). Channels are uniformly lowercased for stable JSON keys.
"""
def _ch(ch_name: str) -> dict:
cs = report.channels.get(ch_name)
if cs is None:
return {}
out = {
"ppv_ips": cs.ppv_ips,
"zc_freq_hz": cs.zc_freq_hz,
"time_of_peak_s": cs.time_of_peak_s,
"peak_accel_g": cs.peak_accel_g,
"peak_disp_in": cs.peak_disp_in,
}
# Drop all-None entries — keeps the JSON tidy for partial reports.
return {k: v for k, v in out.items() if v is not None}
def _sc(ch_name: str) -> dict:
sc = report.sensor_check.get(ch_name)
if sc is None:
return {}
out = {
"freq_hz": sc.test_freq_hz,
"ratio": sc.test_ratio,
"amplitude_mv": sc.test_amplitude_mv,
"result": sc.test_results,
}
return {k: v for k, v in out.items() if v is not None}
monitor_log = []
for entry in report.monitor_log:
e = {
"start": entry.start_time.isoformat() if entry.start_time else None,
"stop": entry.stop_time.isoformat() if entry.stop_time else None,
"description": entry.description,
}
monitor_log.append({k: v for k, v in e.items() if v is not None})
return {
"available": True,
"event_type": report.event_type,
"version": report.version,
"trigger": {
"channel": report.trigger_channel,
"geo_level_ips": report.geo_trigger_level_ips,
},
"recording": {
"sample_rate_sps": report.sample_rate_sps,
"record_time_s": report.record_time_s,
"pretrig_s": report.pretrig_s,
"stop_mode": report.record_stop_mode,
"geo_range_ips": report.geo_range_ips,
"units": report.units,
},
"device": {
"battery_volts": report.battery_volts,
"calibration_date": report.calibration_date.isoformat() if report.calibration_date else None,
"calibration_by": report.calibration_by,
},
"peaks": {
"tran": _ch("Tran"),
"vert": _ch("Vert"),
"long": _ch("Long"),
"vector_sum": {
"ips": report.peak_vector_sum_ips,
"time_s": report.peak_vector_sum_time_s,
},
},
"mic": {
"weighting": report.mic.weighting,
"pspl_dbl": report.mic.pspl_dbl,
"zc_freq_hz": report.mic.zc_freq_hz,
"time_of_peak_s": report.mic.time_of_peak_s,
},
"sensor_check": {
"tran": _sc("Tran"),
"vert": _sc("Vert"),
"long": _sc("Long"),
"mic": _sc("MicL"),
},
"monitor_log": monitor_log,
"pc_sw_version": report.pc_sw_version,
}
def _dbl_to_psi(pspl_dbl: float) -> float:
"""Convert dB(L) sound pressure level back to psi. Uses the same
20 µPa reference (= 2.9e-9 psi) as the webapp so server-side and
browser-side conversions agree."""
return _DBL_REF_PSI * (10.0 ** (pspl_dbl / 20.0))
def _project_info_to_dict(pi: Optional[ProjectInfo]) -> dict:
if pi is None:
return {
@@ -123,35 +224,104 @@ def event_to_sidecar_dict(
captured_at: Optional[datetime.datetime] = None,
review: Optional[dict] = None,
extensions: Optional[dict] = None,
bw_report: Optional[BwAsciiReport] = None,
) -> dict:
"""
Build a v1 sidecar dict from an Event + the surrounding metadata.
Pure helper — no file I/O. Callers stitch the result into a sidecar
via `write_sidecar()` (or POST it back via the PATCH endpoint).
When *bw_report* is supplied (e.g. by the ACH-forwarded import path
where Blastware writes a per-event ASCII report alongside the binary),
its decoded fields are folded into the sidecar:
- A new top-level ``bw_report`` block carries the rich derived
per-channel stats (Peak Acceleration, Peak Displacement, ZC Freq,
Time of Peak), the Peak Vector Sum + time, the per-channel sensor
self-check results, and monitor-log timestamps.
- ``peak_values`` is overlaid from the report (the report's PPV/PVS
values are computed by the device firmware and are authoritative;
anything ``read_blastware_file()`` derived from samples is
approximate at best until the body codec is decoded).
- ``project_info`` is overlaid from the report when the report
supplies a non-empty value (the report mirrors the device's
compliance config, which is what BW shows in its event report).
- ``event.timestamp`` is overlaid from the report's Event Date +
Event Time (BW's report timestamps are second-resolution and
match the binary's footer; we prefer the report value because
the BW-binary footer timestamp can drift on some firmware).
"""
if source_kind not in {"sfm-live", "sfm-ach", "bw-import"}:
raise ValueError(f"unknown source_kind: {source_kind!r}")
captured_at = captured_at or datetime.datetime.utcnow()
return {
"schema_version": SCHEMA_VERSION,
"kind": SIDECAR_KIND,
# ── Overlay event fields from the report when present ───────────────────
timestamp_iso = _ts_iso(event.timestamp)
if bw_report and bw_report.event_datetime:
timestamp_iso = bw_report.event_datetime.isoformat()
"event": {
# Build peak_values, optionally overlaid from the report. The report
# stores Mic peak as PSPL (dB(L)); we convert to psi to match the
# existing peak_values.mic_psi field.
peak_dict = _peak_values_to_dict(event.peak_values)
if bw_report:
ch = bw_report.channels
if (t := ch.get("Tran")) and t.ppv_ips is not None: peak_dict["transverse"] = t.ppv_ips
if (v := ch.get("Vert")) and v.ppv_ips is not None: peak_dict["vertical"] = v.ppv_ips
if (l := ch.get("Long")) and l.ppv_ips is not None: peak_dict["longitudinal"] = l.ppv_ips
if bw_report.peak_vector_sum_ips is not None:
peak_dict["vector_sum"] = bw_report.peak_vector_sum_ips
if bw_report.mic.pspl_dbl is not None and bw_report.mic.pspl_dbl > 0:
peak_dict["mic_psi"] = _dbl_to_psi(bw_report.mic.pspl_dbl)
# Project info: overlay from report (the report mirrors the
# session-start compliance config that BW renders in event reports).
proj_dict = _project_info_to_dict(event.project_info)
if bw_report:
if bw_report.project: proj_dict["project"] = bw_report.project
if bw_report.client: proj_dict["client"] = bw_report.client
if bw_report.operator: proj_dict["operator"] = bw_report.operator
if bw_report.sensor_location: proj_dict["sensor_location"] = bw_report.sensor_location
# Event-block fields: overlay from report where available.
event_block = {
"serial": serial,
"timestamp": _ts_iso(event.timestamp),
"timestamp": timestamp_iso,
"waveform_key": event._waveform_key.hex() if event._waveform_key else None,
"record_type": event.record_type,
"sample_rate": event.sample_rate,
"rectime_seconds": event.rectime_seconds,
"total_samples": event.total_samples,
"pretrig_samples": event.pretrig_samples,
},
}
if bw_report:
# Report values are authoritative — they're the user-configured
# values BW reads back, not STRT-derived guesses. In particular
# `event.rectime_seconds` from `read_blastware_file()` reads
# STRT[18] which is actually the `0x46` record-type marker (= 70)
# rather than the user's Record Time setting. Always overwrite.
if bw_report.sample_rate_sps:
event_block["sample_rate"] = bw_report.sample_rate_sps
if bw_report.record_time_s is not None:
event_block["rectime_seconds"] = bw_report.record_time_s
# Derive total_samples + pretrig_samples per channel from the
# report's sample_rate × times. These match the row count of
# the report's sample table (verified: event-c reports 1024 sps
# × (1.0 + 0.25) = 1280 rows).
if (sr := bw_report.sample_rate_sps) and bw_report.record_time_s is not None:
pretrig_s = abs(bw_report.pretrig_s) if bw_report.pretrig_s is not None else 0.0
event_block["total_samples"] = int(round(sr * (bw_report.record_time_s + pretrig_s)))
event_block["pretrig_samples"] = int(round(sr * pretrig_s))
"peak_values": _peak_values_to_dict(event.peak_values),
"project_info": _project_info_to_dict(event.project_info),
out = {
"schema_version": SCHEMA_VERSION,
"kind": SIDECAR_KIND,
"event": event_block,
"peak_values": peak_dict,
"project_info": proj_dict,
"blastware": {
"filename": blastware_filename,
@@ -177,6 +347,11 @@ def event_to_sidecar_dict(
"extensions": extensions or {},
}
if bw_report:
out["bw_report"] = _bw_report_to_dict(bw_report)
return out
# ── Sidecar IO ────────────────────────────────────────────────────────────────
+45 -5
View File
@@ -1619,6 +1619,21 @@ async def db_import_blastware_file(
writes a .sfm.json sidecar with source.kind = "bw-import".
2. Upsert a row into `events` (dedup'd on serial+timestamp).
**Paired BW ASCII reports.** When Blastware's ACH writes events,
it also emits a per-event report alongside each binary as
``<binary>.TXT`` (e.g. ``M529LK44.AB0`` + ``M529LK44.AB0.TXT``).
If a request includes ``.TXT`` files matching a binary's filename,
the report is parsed and its decoded fields land in the sidecar's
``bw_report`` block — including device-authoritative peaks, ZC
Freq, Peak Acceleration, Peak Displacement, Time of Peak, sensor
self-check results, and monitor-log timestamps. The daemon-
forwarded ACH workflow should always send both files together
so the SFM database has the rich metadata for sort/filter/report.
Pairing is by exact filename match (case-insensitive on the
extension): a binary named ``foo.AB0`` is paired with a report
named ``foo.AB0.TXT`` or ``foo.AB0.txt``.
Response includes per-file outcomes so the caller can see which
landed cleanly and which failed (e.g. malformed file, unknown
serial, etc.).
@@ -1627,21 +1642,36 @@ async def db_import_blastware_file(
db = _get_db()
results: list[dict] = []
# Read every upload up front (UploadFile.read() is one-shot under
# FastAPI's spooled-tempfile backing) and split into binaries vs
# paired ASCII reports.
binaries: list[tuple[str, bytes]] = []
reports: dict[str, bytes] = {} # keyed by lower-cased stem (without .txt)
for upload in files:
name = upload.filename or ""
try:
content = await upload.read()
except Exception as exc:
results.append({
"filename": upload.filename, "status": "error",
"filename": name or "<unnamed>", "status": "error",
"detail": f"read failed: {exc}",
})
continue
if name.lower().endswith(".txt"):
# Strip the ".txt" suffix to get the binary's filename.
reports[name[:-4].lower()] = content
else:
binaries.append((name, content))
for filename, content in binaries:
report_bytes = reports.get(filename.lower())
try:
ev, rec = store.save_imported_bw(
content,
source_path=Path(upload.filename or "imported.bw"),
source_path=Path(filename or "imported.bw"),
serial_hint=serial,
bw_report_text=report_bytes,
)
inserted, skipped = db.insert_events(
[ev],
@@ -1652,21 +1682,31 @@ async def db_import_blastware_file(
} if ev._waveform_key else None,
)
results.append({
"filename": upload.filename,
"filename": filename,
"status": "ok",
"stored_filename": rec["filename"],
"filesize": rec["filesize"],
"sha256": rec["sha256"],
"report_attached": report_bytes is not None,
"inserted": inserted,
"skipped": skipped,
})
except Exception as exc:
log.error("import failed for %s: %s", upload.filename, exc, exc_info=True)
log.error("import failed for %s: %s", filename, exc, exc_info=True)
results.append({
"filename": upload.filename, "status": "error",
"filename": filename, "status": "error",
"detail": str(exc),
})
# Surface unmatched .txt uploads so the daemon can detect mis-pairings.
used_report_keys = {fn.lower() for fn, _ in binaries}
for stem in reports.keys() - used_report_keys:
results.append({
"filename": stem + ".txt",
"status": "warning",
"detail": "BW ASCII report supplied but no matching binary in this upload",
})
return {"count": len(results), "results": results}
+27 -4
View File
@@ -34,7 +34,7 @@ import logging
import pickle
import shutil
from pathlib import Path
from typing import Optional
from typing import Optional, Union
from minimateplus import event_file_io
from minimateplus.blastware_file import blastware_filename, write_blastware_file
@@ -258,6 +258,7 @@ class WaveformStore:
source_path: Path,
*,
serial_hint: Optional[str] = None,
bw_report_text: Optional[Union[str, bytes]] = None,
) -> tuple[Event, dict]:
"""
Ingest a Blastware event file produced by an external tool
@@ -267,10 +268,17 @@ class WaveformStore:
Workflow:
1. Parse the bytes via event_file_io.read_blastware_file (writes
a temp file to do that, since the parser takes a path).
2. Resolve serial from BW filename (`<P><serial3>...`) or use
2. Optionally parse a paired BW ASCII event report (the .TXT
file BW writes alongside the binary). When supplied, its
decoded fields land in the sidecar's `bw_report` block AND
overlay the device-authoritative peak values into the
top-level `peak_values` block. This is the right path for
the ACH-forwarder daemon use case where Blastware's own
ACH writes both files into the watch folder.
3. Resolve serial from BW filename (`<P><serial3>...`) or use
serial_hint. Falls back to "UNKNOWN".
3. Copy the BW bytes verbatim into <root>/<serial>/<filename>.
4. Write the .sfm.json sidecar with source.kind = "bw-import"
4. Copy the BW bytes verbatim into <root>/<serial>/<filename>.
5. Write the .sfm.json sidecar with source.kind = "bw-import"
and a5_pickle_filename = None. Does NOT write a .a5.pkl
(no A5 source available; byte-for-byte regeneration not
possible — the on-disk BW file IS the byte-for-byte source).
@@ -292,6 +300,20 @@ class WaveformStore:
except FileNotFoundError:
pass
# Parse the BW ASCII report if one was supplied. Failures here
# are non-fatal: we still write the binary + sidecar without the
# rich derived fields.
bw_report = None
if bw_report_text is not None:
try:
from minimateplus.bw_ascii_report import parse_report
bw_report = parse_report(bw_report_text)
except Exception as exc:
log.warning(
"save_imported_bw: BW report parse failed: %s — continuing without it",
exc,
)
# Resolve serial. blastware_filename derives a 4-char prefix from
# the numeric serial (e.g. BE11529 → M529); we go the other way
# via the source filename if a hint wasn't given.
@@ -345,6 +367,7 @@ class WaveformStore:
source_kind="bw-import",
a5_pickle_filename=None,
review=existing_review,
bw_report=bw_report,
)
event_file_io.write_sidecar(sidecar_path, sidecar)
+259
View File
@@ -0,0 +1,259 @@
"""
test_bw_ascii_report.py — parser for Blastware's per-event ASCII export.
Run:
python -m pytest tests/test_bw_ascii_report.py -q
"""
from __future__ import annotations
import datetime
import os
import sys
from pathlib import Path
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from minimateplus.bw_ascii_report import (
BwAsciiReport,
parse_report,
parse_report_file,
)
FIXTURES = Path(__file__).parent.parent / "decode-re" / "5-8-26"
def _fixture(event_name: str) -> Path:
"""Find the .TXT file inside a fixture event folder."""
for p in (FIXTURES / event_name).iterdir():
if p.suffix.lower() == ".txt":
return p
raise FileNotFoundError(f"no .TXT in {FIXTURES / event_name}")
# ── Identity / config ───────────────────────────────────────────────────────
def test_event_c_identity_and_config():
r = parse_report_file(_fixture("event-c"))
assert r.event_type == "Full Waveform"
assert r.serial == "BE11529"
assert r.file_name == "M529LK44.AB0"
assert r.event_datetime == datetime.datetime(2026, 4, 23, 15, 56, 35)
assert r.trigger_channel == "Vert"
assert r.geo_trigger_level_ips == pytest.approx(0.5)
assert r.pretrig_s == pytest.approx(-0.25)
assert r.record_time_s == pytest.approx(1.0)
assert r.record_stop_mode == "Fixed"
assert r.sample_rate_sps == 1024
assert r.battery_volts == pytest.approx(6.8)
assert r.calibration_date == datetime.date(2025, 4, 29)
assert r.calibration_by == "Instantel"
assert r.units == "in/s and dB(L)"
def test_event_c_operator_metadata():
r = parse_report_file(_fixture("event-c"))
# The "Project: : value" pattern (key has its own trailing colon)
# is handled by stripping the colon at lookup time.
assert r.project == "Test4-21-26"
assert r.client == "Test-Client1"
assert r.operator == "Brian and claude"
assert r.sensor_location == "catbed"
def test_event_c_geo_range():
r = parse_report_file(_fixture("event-c"))
assert r.geo_range_ips == pytest.approx(10.0)
# ── Per-channel derived stats ───────────────────────────────────────────────
def test_event_c_per_channel_stats():
r = parse_report_file(_fixture("event-c"))
tran = r.channels["Tran"]
assert tran.ppv_ips == pytest.approx(0.065)
assert tran.zc_freq_hz == pytest.approx(47.0)
assert tran.time_of_peak_s == pytest.approx(0.007)
assert tran.peak_accel_g == pytest.approx(0.066)
assert tran.peak_disp_in == pytest.approx(0.001)
vert = r.channels["Vert"]
assert vert.ppv_ips == pytest.approx(0.610)
assert vert.zc_freq_hz == pytest.approx(16.0)
assert vert.time_of_peak_s == pytest.approx(0.024)
assert vert.peak_accel_g == pytest.approx(0.437)
assert vert.peak_disp_in == pytest.approx(0.006)
long_ = r.channels["Long"]
assert long_.ppv_ips == pytest.approx(0.070)
assert long_.zc_freq_hz == pytest.approx(22.0)
assert long_.time_of_peak_s == pytest.approx(0.019)
assert long_.peak_accel_g == pytest.approx(0.040)
assert long_.peak_disp_in == pytest.approx(0.001)
def test_event_c_micl_stats():
r = parse_report_file(_fixture("event-c"))
# MicL specific block
assert r.mic.weighting == "Linear Weighting"
assert r.mic.pspl_dbl == pytest.approx(88.0)
assert r.mic.zc_freq_hz == pytest.approx(57.0)
assert r.mic.time_of_peak_s == pytest.approx(-0.004)
# Mirrored onto channels["MicL"] for uniform per-channel access
micl_ch = r.channels["MicL"]
assert micl_ch.zc_freq_hz == pytest.approx(57.0)
assert micl_ch.time_of_peak_s == pytest.approx(-0.004)
def test_event_c_vector_sum():
r = parse_report_file(_fixture("event-c"))
assert r.peak_vector_sum_ips == pytest.approx(0.612)
assert r.peak_vector_sum_time_s == pytest.approx(0.024)
# ── Sensor self-check ───────────────────────────────────────────────────────
def test_event_c_sensor_check_geo_channels():
r = parse_report_file(_fixture("event-c"))
for ch_name, expected_freq, expected_ratio in [
("Tran", 7.4, 3.7),
("Vert", 7.6, 3.5),
("Long", 7.5, 3.8),
]:
sc = r.sensor_check[ch_name]
assert sc.test_freq_hz == pytest.approx(expected_freq), ch_name
assert sc.test_ratio == pytest.approx(expected_ratio), ch_name
assert sc.test_results == "Passed", ch_name
# Geo channels don't have an Test Amplitude
assert sc.test_amplitude_mv is None
def test_event_c_sensor_check_micl():
r = parse_report_file(_fixture("event-c"))
sc = r.sensor_check["MicL"]
assert sc.test_freq_hz == pytest.approx(20.1)
assert sc.test_amplitude_mv == pytest.approx(533.0)
assert sc.test_results == "Passed"
# MicL doesn't have a ratio — it has amplitude instead
assert sc.test_ratio is None
# ── Monitor log + tooling ───────────────────────────────────────────────────
def test_event_c_monitor_log_and_pc_version():
r = parse_report_file(_fixture("event-c"))
assert len(r.monitor_log) == 1
e = r.monitor_log[0]
assert e.start_time == datetime.datetime(2026, 4, 23, 15, 46, 16)
assert e.stop_time == datetime.datetime(2026, 4, 23, 15, 56, 36)
assert e.description == "Event recorded."
assert r.pc_sw_version == "V 10.74"
# ── Sample table ─────────────────────────────────────────────────────────────
def test_event_c_sample_table_parsed_when_requested():
r = parse_report_file(_fixture("event-c"), parse_samples=True)
# 1 sec event @ 1024 sps + 0.25 sec pretrig = 1280 samples
assert r.samples is not None
assert len(r.samples) == 1280, f"expected 1280 samples, got {len(r.samples)}"
# First row: "0.000 \t0.005 \t0.005 \t-81.94"
t, v, l, m = r.samples[0]
assert t == pytest.approx(0.000)
assert v == pytest.approx(0.005)
assert l == pytest.approx(0.005)
assert m == pytest.approx(-81.94)
def test_event_c_sample_table_skipped_by_default():
r = parse_report_file(_fixture("event-c"))
assert r.samples is None
# ── Cross-event smoke ───────────────────────────────────────────────────────
@pytest.mark.parametrize("event_name", ["event-a", "event-b", "event-c", "event-d"])
def test_all_fixtures_parse_without_error(event_name):
"""Every fixture in the bundle must parse cleanly with the same parser."""
r = parse_report_file(_fixture(event_name))
# Common invariants: serial, event_datetime, sample rate, all four
# channels surfaced.
assert r.serial == "BE11529"
assert r.event_datetime is not None
assert r.sample_rate_sps in (1024, 2048, 4096)
for ch in ("Tran", "Vert", "Long", "MicL"):
assert ch in r.channels
assert ch in r.sensor_check
# PVS should be present and positive on triggered events
if r.peak_vector_sum_ips is not None:
assert r.peak_vector_sum_ips >= 0
# ── Edge cases / defensive parsing ──────────────────────────────────────────
def test_parse_empty_input():
r = parse_report("")
assert r.serial is None
assert r.event_datetime is None
assert all(cs.ppv_ips is None for cs in r.channels.values())
def test_parse_unknown_keys_ignored():
"""Forward-compat: future BW versions may add fields we don't recognise.
Those should be silently dropped, not raise."""
text = (
'"Serial Number : BE99999"\n'
'"Future Field That Does Not Exist : 42 widgets"\n'
'"Tran PPV : 0.123 in/s"\n'
)
r = parse_report(text)
assert r.serial == "BE99999"
assert r.channels["Tran"].ppv_ips == pytest.approx(0.123)
def test_parse_numeric_with_units_strips_unit():
text = (
'"Vert PPV : 1.275 in/s"\n'
'"Vert ZC Freq : 23 Hz"\n'
'"MicL Test Amplitude : 569 mv"\n'
)
r = parse_report(text)
assert r.channels["Vert"].ppv_ips == pytest.approx(1.275)
assert r.channels["Vert"].zc_freq_hz == pytest.approx(23.0)
assert r.sensor_check["MicL"].test_amplitude_mv == pytest.approx(569.0)
def test_parse_handles_micl_double_space_in_key():
"""BW writes "MicL Time of Peak" with TWO spaces; the parser must
normalise whitespace before key lookup."""
text = (
'"MicL Time of Peak : 0.012 sec"\n'
'"MicL ZC Freq : 51 Hz"\n'
)
r = parse_report(text)
assert r.mic.time_of_peak_s == pytest.approx(0.012)
assert r.mic.zc_freq_hz == pytest.approx(51.0)
+108
View File
@@ -294,6 +294,114 @@ def test_read_blastware_file_round_trip(tmp_path: Path):
assert parsed.peak_values.peak_vector_sum == 0.0
def test_save_imported_bw_with_paired_report(tmp_path: Path):
"""save_imported_bw + a paired BW ASCII report fold the report's
rich derived fields into the sidecar. This is the daemon-forwarded
ACH workflow: BW writes <event>.AB0 and <event>.AB0.TXT side by side;
the daemon ships both; we overlay the report-decoded values onto the
sidecar (peaks, project, plus the rich `bw_report` block)."""
from minimateplus.blastware_file import write_blastware_file, blastware_filename
from sfm.waveform_store import WaveformStore
ev, frames = _make_synthetic_event()
fname = blastware_filename(ev, "BE11529")
src = tmp_path / fname
write_blastware_file(ev, frames, src)
# Use one of the real BW ASCII exports as the paired report.
report_path = (
Path(__file__).parent.parent
/ "decode-re" / "5-8-26" / "event-c" / "M529LK44.AB0.TXT"
)
if not report_path.exists():
import pytest as _pt
_pt.skip("decode-re fixtures not present")
report_bytes = report_path.read_bytes()
store = WaveformStore(tmp_path / "waveforms")
parsed_ev, rec = store.save_imported_bw(
src.read_bytes(),
source_path=src,
bw_report_text=report_bytes,
)
sc = store.load_sidecar("BE11529", fname)
assert sc is not None
# ── bw_report block populated with the rich fields ──────────────────
assert "bw_report" in sc
br = sc["bw_report"]
assert br["available"] is True
assert br["event_type"] == "Full Waveform"
assert br["recording"]["sample_rate_sps"] == 1024
assert br["recording"]["geo_range_ips"] == 10.0
# Per-channel derived stats
assert br["peaks"]["tran"]["ppv_ips"] == 0.065
assert br["peaks"]["vert"]["ppv_ips"] == 0.610
assert br["peaks"]["long"]["ppv_ips"] == 0.070
assert br["peaks"]["vert"]["peak_accel_g"] == 0.437
assert br["peaks"]["vert"]["peak_disp_in"] == 0.006
assert br["peaks"]["tran"]["zc_freq_hz"] == 47.0
assert br["peaks"]["vector_sum"]["ips"] == 0.612
assert br["peaks"]["vector_sum"]["time_s"] == 0.024
# Sensor self-check per channel
assert br["sensor_check"]["tran"]["freq_hz"] == 7.4
assert br["sensor_check"]["tran"]["ratio"] == 3.7
assert br["sensor_check"]["tran"]["result"] == "Passed"
assert br["sensor_check"]["mic"]["amplitude_mv"] == 533.0
# Mic block
assert br["mic"]["weighting"] == "Linear Weighting"
assert br["mic"]["pspl_dbl"] == 88.0
# Monitor log roundtripped
assert len(br["monitor_log"]) == 1
assert "2026-04-23T15:46:16" in br["monitor_log"][0]["start"]
assert br["pc_sw_version"] == "V 10.74"
# ── Overlay onto canonical peak_values ──────────────────────────────
# Report values win over the broken-codec samples-derived peaks.
assert sc["peak_values"]["transverse"] == 0.065
assert sc["peak_values"]["vertical"] == 0.610
assert sc["peak_values"]["longitudinal"] == 0.070
assert sc["peak_values"]["vector_sum"] == 0.612
# Mic PSPL converted to psi (dbl=88 → 10^(88/20) * 2.9e-9)
assert sc["peak_values"]["mic_psi"] is not None
assert 1e-5 < sc["peak_values"]["mic_psi"] < 1e-3
# ── Overlay onto project_info ───────────────────────────────────────
assert sc["project_info"]["project"] == "Test4-21-26"
assert sc["project_info"]["client"] == "Test-Client1"
assert sc["project_info"]["operator"] == "Brian and claude"
assert sc["project_info"]["sensor_location"] == "catbed"
# ── Event timestamp overlaid from report ───────────────────────────
assert sc["event"]["timestamp"] == "2026-04-23T15:56:35"
def test_save_imported_bw_without_report_works_unchanged(tmp_path: Path):
"""Calling save_imported_bw with no bw_report_text behaves exactly
as before — no `bw_report` block, peak_values come from samples."""
from minimateplus.blastware_file import write_blastware_file, blastware_filename
from sfm.waveform_store import WaveformStore
ev, frames = _make_synthetic_event()
fname = blastware_filename(ev, "BE11529")
src = tmp_path / fname
write_blastware_file(ev, frames, src)
store = WaveformStore(tmp_path / "waveforms")
store.save_imported_bw(src.read_bytes(), source_path=src)
sc = store.load_sidecar("BE11529", fname)
assert sc is not None
assert "bw_report" not in sc # block is absent without a report
# Synthetic event has zero samples → peaks all zero (was true before this change)
assert sc["peak_values"]["transverse"] == 0.0
def test_save_imported_bw_round_trip(tmp_path: Path):
"""save_imported_bw stores a copy + sidecar with source.kind = bw-import."""
from minimateplus.blastware_file import write_blastware_file, blastware_filename