Files
seismo-relay/tests/test_idf_ascii_report.py
T

235 lines
7.9 KiB
Python

"""
test_idf_ascii_report.py — parser for Thor's per-event IDF ASCII export.
Run:
python -m pytest tests/test_idf_ascii_report.py -q
Tests use real Thor sample data shipped under
`thor-watcher/example-data/THORDATA_example/`. When that path is not
available (e.g. running from a checkout where the watcher repo isn't
sibling), tests gracefully skip.
"""
from __future__ import annotations
import datetime
import os
import sys
from pathlib import Path
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from sfm.idf_ascii_report import (
parse_event_filename,
parse_idf_report,
serial_from_filename,
)
# ── Sample data ──────────────────────────────────────────────────────────────
SAMPLE_REPO = Path("/home/serversdown/thor-watcher/example-data/"
"THORDATA_example/THORDATA_example")
def _sample_path(rel: str) -> Path:
return SAMPLE_REPO / rel
@pytest.fixture
def upmc_waveform_txt() -> str:
p = _sample_path("UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt")
if not p.exists():
pytest.skip(f"sample missing: {p}")
return p.read_text()
@pytest.fixture
def upmc_histogram_txt() -> str:
p = _sample_path("UPMC Presby/UM11719/TXT/UM11719_20231219163444.IDFH.txt")
if not p.exists():
pytest.skip(f"sample missing: {p}")
return p.read_text()
# ── Filename parsing ─────────────────────────────────────────────────────────
def test_parse_event_filename_waveform():
parsed = parse_event_filename("UM11719_20231219163444.IDFW")
assert parsed is not None
serial, ts, kind = parsed
assert serial == "UM11719"
assert ts == datetime.datetime(2023, 12, 19, 16, 34, 44)
assert kind == "IDFW"
def test_parse_event_filename_histogram():
parsed = parse_event_filename("BE9439_20200713124251.IDFH")
assert parsed is not None
serial, ts, kind = parsed
assert serial == "BE9439"
assert kind == "IDFH"
def test_parse_event_filename_case_insensitive():
parsed = parse_event_filename("um11719_20231219163444.idfw")
assert parsed is not None
assert parsed[0] == "UM11719"
assert parsed[2] == "IDFW"
def test_parse_event_filename_rejects_invalid():
for name in [
"UM11719_20231219163444.MLG",
"UM11719.IDFW",
"UM11719_20231219163444.IDFW.txt", # report sidecar — not a binary
"UM11719_2023121916344X.IDFW",
"garbage",
"",
]:
assert parse_event_filename(name) is None, name
def test_serial_from_filename():
assert serial_from_filename("UM11719_20231219163444.IDFW") == "UM11719"
assert serial_from_filename("BE9439_20200713124251.IDFH") == "BE9439"
# Works on the .txt sidecar name too — handy in pairing code paths
assert serial_from_filename("UM11719_20231219163444.IDFW.txt") == "UM11719"
assert serial_from_filename("not_a_thor_file.bin") is None
# ── Report parsing — derived fields against real Thor sample ─────────────────
def test_waveform_report_derives_serial_event_type_and_datetime(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["serial_number"] == "UM11719"
assert r["event_type"] == "Full Waveform"
assert r["event_datetime"] == "2023-12-19T16:27:23"
assert r["filename"] == "UM11719_20231219162723.IDFW"
def test_waveform_report_parses_peak_velocities(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["tran_ppv"] == pytest.approx(0.0251)
assert r["vert_ppv"] == pytest.approx(0.2119)
assert r["long_ppv"] == pytest.approx(0.0282)
assert r["peak_vector_sum"] == pytest.approx(0.2131)
def test_waveform_report_parses_zc_freq_and_mic(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["tran_zc_freq"] == pytest.approx(6.5)
assert r["vert_zc_freq"] == pytest.approx(73.1)
assert r["long_zc_freq"] == pytest.approx(85.3)
assert r["mic_ppv"] == pytest.approx(99.4)
def test_waveform_report_parses_record_and_pretrigger_durations(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["record_time_sec"] == pytest.approx(2.0)
assert r["pre_trigger_sec"] == pytest.approx(0.25)
def test_waveform_report_parses_sample_rate(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["sample_rate"] == 1024
def test_waveform_report_extracts_title_strings(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
# TitleString1 (location) → project
assert r["project"] == "UPMC Presby-Loc 3-Level1-1R Elevator Rm"
# TitleString2 → client
assert r["client"] == "Whiting-Turner - PJ Dick - Joint Venture"
# TitleString3 → operator (company)
assert r["operator"] == "Terra-Mechanics, Inc. - D. Harrsion"
def test_waveform_report_extracts_setup_version_and_calibration(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["setup"] == "UPMC Loc 3.mmb"
assert r["version"] == "Micromate ISEE 11.0AK"
assert r["calibration_text"] == "November 22, 2023 by Instantel"
assert r["battery_volts"] == pytest.approx(3.8)
def test_waveform_report_decodes_sensor_self_check(upmc_waveform_txt):
r = parse_idf_report(upmc_waveform_txt)
assert r["tran_test_passed"] is True
assert r["vert_test_passed"] is True
assert r["long_test_passed"] is True
assert r["mic_test_passed"] is True
def test_histogram_report_parses(upmc_histogram_txt):
"""Histogram sidecars have the same shape as waveform — both
decode through the same parser without errors."""
r = parse_idf_report(upmc_histogram_txt)
assert r["serial_number"] == "UM11719"
# IDFH timestamp in the sample
assert r["event_datetime"] == "2023-12-19T16:34:44"
assert r["event_type"] .lower().startswith("full histogram") or \
r["event_type"] .lower().startswith("histogram")
# Sample rate present
assert "sample_rate" in r
# ── Edge cases ───────────────────────────────────────────────────────────────
def test_parses_bytes_input():
text = (
'"SerialNumber : UM11719"\n'
'"TranPPV : 0.0251 in/s"\n'
)
r = parse_idf_report(text.encode("utf-8"))
assert r["serial_number"] == "UM11719"
assert r["tran_ppv"] == pytest.approx(0.0251)
def test_parses_latin1_fallback():
"""Garbled non-UTF8 bytes fall back to latin-1 instead of crashing."""
text = b'"SerialNumber : UM11719"\n"Operator : Caf\xe9"\n'
r = parse_idf_report(text)
assert r["serial_number"] == "UM11719"
assert r["operator"] == "Café"
def test_stops_at_waveform_data_marker():
"""Lines after the 'Waveform Data Channels' marker are not parsed
as key/value pairs — they're tabular sample data."""
text = (
'"SerialNumber : UM11719"\n'
'"TranPPV : 0.0251 in/s"\n'
'Waveform Data Channels\n'
' Tran Vert Long MicL\n'
' 0.0003 -0.0003 0.0003 0.00013\n'
)
r = parse_idf_report(text)
assert r["serial_number"] == "UM11719"
assert r["tran_ppv"] == pytest.approx(0.0251)
# No spurious entries from the table body
assert "tran" not in r
assert "0.0003" not in r
def test_missing_event_time_omits_datetime():
r = parse_idf_report('"SerialNumber : UM11719"\n')
assert r["serial_number"] == "UM11719"
assert "event_datetime" not in r
def test_handles_empty_input():
r = parse_idf_report("")
assert r == {
"project": None,
"client": None,
"operator": None,
"notes": None,
}