""" test_idf_ascii_report.py — parser for Thor's per-event IDF ASCII export. Run: python -m pytest tests/test_idf_ascii_report.py -q Tests use real Thor sample data shipped under `thor-watcher/example-data/THORDATA_example/`. When that path is not available (e.g. running from a checkout where the watcher repo isn't sibling), tests gracefully skip. """ from __future__ import annotations import datetime import os import sys from pathlib import Path import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from micromate.idf_ascii_report import ( parse_event_filename, parse_idf_report, serial_from_filename, ) # ── Sample data ────────────────────────────────────────────────────────────── SAMPLE_REPO = Path("/home/serversdown/thor-watcher/example-data/" "THORDATA_example/THORDATA_example") def _sample_path(rel: str) -> Path: return SAMPLE_REPO / rel @pytest.fixture def upmc_waveform_txt() -> str: p = _sample_path("UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt") if not p.exists(): pytest.skip(f"sample missing: {p}") return p.read_text() @pytest.fixture def upmc_histogram_txt() -> str: p = _sample_path("UPMC Presby/UM11719/TXT/UM11719_20231219163444.IDFH.txt") if not p.exists(): pytest.skip(f"sample missing: {p}") return p.read_text() # ── Filename parsing ───────────────────────────────────────────────────────── def test_parse_event_filename_waveform(): parsed = parse_event_filename("UM11719_20231219163444.IDFW") assert parsed is not None serial, ts, kind = parsed assert serial == "UM11719" assert ts == datetime.datetime(2023, 12, 19, 16, 34, 44) assert kind == "IDFW" def test_parse_event_filename_histogram(): parsed = parse_event_filename("BE9439_20200713124251.IDFH") assert parsed is not None serial, ts, kind = parsed assert serial == "BE9439" assert kind == "IDFH" def test_parse_event_filename_case_insensitive(): parsed = parse_event_filename("um11719_20231219163444.idfw") assert parsed is not None assert parsed[0] == "UM11719" assert parsed[2] == "IDFW" def test_parse_event_filename_rejects_invalid(): for name in [ "UM11719_20231219163444.MLG", "UM11719.IDFW", "UM11719_20231219163444.IDFW.txt", # report sidecar — not a binary "UM11719_2023121916344X.IDFW", "garbage", "", ]: assert parse_event_filename(name) is None, name def test_serial_from_filename(): assert serial_from_filename("UM11719_20231219163444.IDFW") == "UM11719" assert serial_from_filename("BE9439_20200713124251.IDFH") == "BE9439" # Works on the .txt sidecar name too — handy in pairing code paths assert serial_from_filename("UM11719_20231219163444.IDFW.txt") == "UM11719" assert serial_from_filename("not_a_thor_file.bin") is None # ── Report parsing — derived fields against real Thor sample ───────────────── def test_waveform_report_derives_serial_event_type_and_datetime(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["serial_number"] == "UM11719" assert r["event_type"] == "Full Waveform" assert r["event_datetime"] == "2023-12-19T16:27:23" assert r["filename"] == "UM11719_20231219162723.IDFW" def test_waveform_report_parses_peak_velocities(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["tran_ppv"] == pytest.approx(0.0251) assert r["vert_ppv"] == pytest.approx(0.2119) assert r["long_ppv"] == pytest.approx(0.0282) assert r["peak_vector_sum"] == pytest.approx(0.2131) def test_waveform_report_parses_zc_freq_and_mic(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["tran_zc_freq"] == pytest.approx(6.5) assert r["vert_zc_freq"] == pytest.approx(73.1) assert r["long_zc_freq"] == pytest.approx(85.3) assert r["mic_ppv"] == pytest.approx(99.4) def test_waveform_report_parses_record_and_pretrigger_durations(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["record_time_sec"] == pytest.approx(2.0) assert r["pre_trigger_sec"] == pytest.approx(0.25) def test_waveform_report_parses_sample_rate(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["sample_rate"] == 1024 def test_waveform_report_extracts_title_strings(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) # TitleString1 (location) → project assert r["project"] == "UPMC Presby-Loc 3-Level1-1R Elevator Rm" # TitleString2 → client assert r["client"] == "Whiting-Turner - PJ Dick - Joint Venture" # TitleString3 → operator (company) assert r["operator"] == "Terra-Mechanics, Inc. - D. Harrsion" def test_waveform_report_extracts_setup_version_and_calibration(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["setup"] == "UPMC Loc 3.mmb" assert r["version"] == "Micromate ISEE 11.0AK" assert r["calibration_text"] == "November 22, 2023 by Instantel" assert r["battery_volts"] == pytest.approx(3.8) def test_waveform_report_decodes_sensor_self_check(upmc_waveform_txt): r = parse_idf_report(upmc_waveform_txt) assert r["tran_test_passed"] is True assert r["vert_test_passed"] is True assert r["long_test_passed"] is True assert r["mic_test_passed"] is True def test_histogram_report_parses(upmc_histogram_txt): """Histogram sidecars have the same shape as waveform — both decode through the same parser without errors.""" r = parse_idf_report(upmc_histogram_txt) assert r["serial_number"] == "UM11719" # IDFH timestamp in the sample assert r["event_datetime"] == "2023-12-19T16:34:44" assert r["event_type"] .lower().startswith("full histogram") or \ r["event_type"] .lower().startswith("histogram") # Sample rate present assert "sample_rate" in r # ── Edge cases ─────────────────────────────────────────────────────────────── def test_parses_bytes_input(): text = ( '"SerialNumber : UM11719"\n' '"TranPPV : 0.0251 in/s"\n' ) r = parse_idf_report(text.encode("utf-8")) assert r["serial_number"] == "UM11719" assert r["tran_ppv"] == pytest.approx(0.0251) def test_parses_latin1_fallback(): """Garbled non-UTF8 bytes fall back to latin-1 instead of crashing.""" text = b'"SerialNumber : UM11719"\n"Operator : Caf\xe9"\n' r = parse_idf_report(text) assert r["serial_number"] == "UM11719" assert r["operator"] == "Café" def test_stops_at_waveform_data_marker(): """Lines after the 'Waveform Data Channels' marker are not parsed as key/value pairs — they're tabular sample data.""" text = ( '"SerialNumber : UM11719"\n' '"TranPPV : 0.0251 in/s"\n' 'Waveform Data Channels\n' ' Tran Vert Long MicL\n' ' 0.0003 -0.0003 0.0003 0.00013\n' ) r = parse_idf_report(text) assert r["serial_number"] == "UM11719" assert r["tran_ppv"] == pytest.approx(0.0251) # No spurious entries from the table body assert "tran" not in r assert "0.0003" not in r def test_missing_event_time_omits_datetime(): r = parse_idf_report('"SerialNumber : UM11719"\n') assert r["serial_number"] == "UM11719" assert "event_datetime" not in r def test_handles_empty_input(): r = parse_idf_report("") assert r == { "project": None, "client": None, "operator": None, "notes": None, }