diff --git a/CHANGELOG.md b/CHANGELOG.md index a1c60e2..7e7ceae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,28 @@ All notable changes to seismo-relay are documented here. --- +## v0.18.0 — 2026-05-19 + +The "Thor / Series IV ingest adapter" release. Seismo-relay can now accept event files from Instantel Micromate Series IV (Thor) units alongside the existing MiniMate Plus (Series III) Blastware pipeline. + +### Added — Thor (Series IV) IDF ingest + +- **`POST /db/import/idf_file`** (`sfm/server.py`) — multipart upload endpoint for `.IDFH` (histogram) and `.IDFW` (waveform) event files plus their `.IDFH.txt` / `.IDFW.txt` ASCII sidecars. Mirrors the shape of `/db/import/blastware_file`: pairing by filename, optional `serial` query hint, per-file outcome reporting. +- **`sfm/idf_ascii_report.py`** — parser for Thor's TXT sidecars (verified against 1,014 real-world samples). Extracts device-authoritative PPV, ZC Freq, Peak Vector Sum, Mic PSPL, calibration date, firmware version, sensor self-check results, and project/client/operator strings. +- **`WaveformStore.save_imported_idf()`** (`sfm/waveform_store.py`) — stores Thor binaries verbatim in `//`, writes a `.sfm.json` sidecar with `source.kind = "idf-import"` and the full parsed report under `extensions.idf_report`. Reuses the existing `events` table — Thor events dedupe on (serial, timestamp) and surface in `/db/events` alongside BW events. +- **`tests/test_idf_ascii_report.py`** — parser tests against the `thor-watcher/example-data/` corpus. + +### Changed + +- `event_to_sidecar_dict()` (`minimateplus/event_file_io.py`) allow-list for `source_kind` now includes `"idf-import"` so the existing sidecar machinery can carry Thor imports. +- Bumped `pyproject.toml` version to `0.18.0`. + +### Companion release + +This release ships alongside **thor-watcher v0.3.0**, which adds the SFM forwarder that targets the new `/db/import/idf_file` endpoint. Operators flip the switch in thor-watcher's new "SFM Forward" Settings tab; events POST to seismo-relay just like the series3-watcher BW forwarder does today. + +--- + ## v0.17.0 — 2026-05-17 The "field rescue + DB management" release. Hardened against units that are stuck in a runaway call-home loop, and added an operator-facing path for purging bogus events that those same units dump into the DB before recovery. All work in this release was driven by the BE9558H incident (full incident log + recovery procedure at `docs/runbooks/wedged_unit_recovery.md`). diff --git a/minimateplus/event_file_io.py b/minimateplus/event_file_io.py index 12ad2db..9c82718 100644 --- a/minimateplus/event_file_io.py +++ b/minimateplus/event_file_io.py @@ -309,7 +309,7 @@ def event_to_sidecar_dict( match the binary's footer; we prefer the report value because the BW-binary footer timestamp can drift on some firmware). """ - if source_kind not in {"sfm-live", "sfm-ach", "bw-import"}: + if source_kind not in {"sfm-live", "sfm-ach", "bw-import", "idf-import"}: raise ValueError(f"unknown source_kind: {source_kind!r}") captured_at = captured_at or datetime.datetime.utcnow() diff --git a/pyproject.toml b/pyproject.toml index 922e5f9..f22c178 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "seismo-relay" -version = "0.17.0" +version = "0.18.0" description = "Python client and REST server for MiniMate Plus seismographs" requires-python = ">=3.10" dependencies = [ diff --git a/sfm/idf_ascii_report.py b/sfm/idf_ascii_report.py new file mode 100644 index 0000000..ea26293 --- /dev/null +++ b/sfm/idf_ascii_report.py @@ -0,0 +1,291 @@ +""" +sfm/idf_ascii_report.py — parse Thor (Micromate Series IV) IDF ASCII reports. + +Thor exports a `.IDFW.txt` or `.IDFH.txt` sidecar next to each `.IDFW` +(waveform) or `.IDFH` (histogram) event binary. Each sidecar is a +plain-text file with `"Key : Value"` lines covering the full device- +authoritative event metadata — PPV per channel, ZC Freq, Time of Peak, +Peak Acceleration / Displacement, sensor self-check results, project +strings, calibration date, battery level, etc. — followed by a raw +waveform-samples block headed by the literal line "Waveform Data Channels". + +This is the Thor analogue of `minimateplus/bw_ascii_report.py` for the +Blastware (Series III) report format. The parser is intentionally +permissive: we extract everything we recognise into a flat dict and +silently ignore anything we don't. Downstream callers parse units +(`"0.2119 in/s"` → 0.2119) only on the fields they need. + +Example input (truncated): + + "EventType : Full Waveform" + "SampleRate : 1024 sps" + "EventTime : 16:27:23" + "EventDate : 2023-12-19" + "TranPPV : 0.0251 in/s" + "VertPPV : 0.2119 in/s" + "LongPPV : 0.0282 in/s" + "PeakVectorSum : 0.2131 in/s" + "MicPSPL : 99.4 dB(L)" + "TranZCFreq : 6.5 Hz" + "SerialNumber : UM11719" + "Version : Micromate ISEE 11.0AK" + "FileName : UM11719_20231219162723.IDFW" + "BatteryLevel : 3.8 volts" + "Calibration : November 22, 2023 by Instantel" + "TranTestResults : Passed" + "TitleString1 : UPMC Presby-Loc 3-Level1-1R Elevator Rm" + Waveform Data Channels + Tran Vert Long MicL + 0.0003 -0.0003 0.0003 0.00013 + ... +""" + +from __future__ import annotations + +import datetime +import re +from typing import Any, Dict, Optional, Tuple, Union + + +# Lines look like: "Key : Value" (quotes literal, single ":" separator) +_LINE_RE = re.compile(r'^\s*"?([^":]+?)"?\s*:\s*"?(.*?)"?\s*$') + +# Marker that ends the metadata block — everything after is raw sample data. +_WAVEFORM_BLOCK_MARKER = "waveform data channels" + + +def _normalize_key(raw: str) -> str: + """Convert "TranPPV" / "PreTriggerLength" → snake_case.""" + s = raw.strip() + # Insert underscore between lower→upper / digit→letter transitions + s = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", "_", s) + s = re.sub(r"(?<=[A-Z])(?=[A-Z][a-z])", "_", s) + s = s.replace("-", "_").replace(" ", "_") + return s.lower() + + +def _strip_unit_suffix(value: str) -> str: + """Return the numeric part of values like "0.2119 in/s" → "0.2119".""" + parts = value.strip().split() + return parts[0] if parts else value.strip() + + +def _parse_float(value: str) -> Optional[float]: + try: + return float(_strip_unit_suffix(value)) + except (ValueError, TypeError): + return None + + +def _parse_int(value: str) -> Optional[int]: + try: + return int(float(_strip_unit_suffix(value))) + except (ValueError, TypeError): + return None + + +def parse_idf_report(text: Union[str, bytes]) -> Dict[str, Any]: + """ + Parse a Thor IDFW.txt / IDFH.txt sidecar. + + Returns a flat dict with two kinds of entries: + + - **Raw fields** — every `Key : Value` line, keyed by snake_case + of the original key, value as a string (unit suffix preserved). + Lets callers grab any field we haven't explicitly normalised. + + - **Derived fields** — a curated set with parsed types: + * `serial_number` str + * `event_type` str ("Full Waveform" / "Full Histogram") + * `event_datetime` ISO-8601 string ("YYYY-MM-DDTHH:MM:SS") when + both EventDate and EventTime are present + * `sample_rate` int (samples/sec) + * `tran_ppv`,`vert_ppv`,`long_ppv` float (in/s) + * `mic_ppv` float (dB or psi — same units as MicPSPL) + * `peak_vector_sum` float (in/s) + * `tran_zc_freq`,`vert_zc_freq`,`long_zc_freq` float (Hz) + * `record_time_sec` float (seconds) + * `pre_trigger_sec` float (seconds) + * `project` str (from TitleString1 — Thor's location) + * `client` str (TitleString2) + * `operator` str (TitleString3 — company/operator) + * `notes` str (TitleString4) + * `setup` str + * `version` str (firmware) + * `battery_volts` float + * `calibration_text` str (e.g. "November 22, 2023 by Instantel") + * `tran_test_passed`, `vert_test_passed`, `long_test_passed`, + `mic_test_passed` bool ("Passed" → True; anything else → False) + * `filename` str (FileName line — useful sanity check) + + Stops parsing at the literal "Waveform Data Channels" line; the + raw-samples block is left to whoever wants to decode the binary. + + Input may be `str` or `bytes` (`utf-8`/`latin-1` tolerant). + """ + if isinstance(text, bytes): + try: + text = text.decode("utf-8") + except UnicodeDecodeError: + text = text.decode("latin-1", errors="replace") + + raw: Dict[str, str] = {} + + for line in text.splitlines(): + stripped = line.strip() + if not stripped: + continue + if stripped.lower().startswith(_WAVEFORM_BLOCK_MARKER): + break + m = _LINE_RE.match(stripped) + if not m: + continue + key = _normalize_key(m.group(1)) + value = m.group(2).strip() + # Multi-value lines (Channel, Units, etc.) — coalesce by appending. + if key in raw: + raw[key] = raw[key] + "; " + value + else: + raw[key] = value + + out: Dict[str, Any] = dict(raw) # keep all raw fields + + # ── Derived fields ─────────────────────────────────────────────────────── + + def _take(*candidates: str) -> Optional[str]: + for c in candidates: + if c in raw: + return raw[c] + return None + + # Event identity + if "serial_number" in raw: + out["serial_number"] = raw["serial_number"] + if "event_type" in raw: + out["event_type"] = raw["event_type"] + if "file_name" in raw: + out["filename"] = raw["file_name"] + + # Combined date+time. Waveform sidecars use "EventDate" / "EventTime"; + # histogram sidecars use "HistogramStartDate" / "HistogramStartTime". + # Prefer the event_* names when both are present. + ed = raw.get("event_date") or raw.get("histogram_start_date") + et = raw.get("event_time") or raw.get("histogram_start_time") + if ed and et: + try: + dt = datetime.datetime.strptime(f"{ed} {et}", "%Y-%m-%d %H:%M:%S") + out["event_datetime"] = dt.isoformat() + except ValueError: + pass + + # Numeric scalars + for key in ("sample_rate",): + v = raw.get(key) + if v is not None: + iv = _parse_int(v) + if iv is not None: + out[key] = iv + + for key in ("tran_ppv", "vert_ppv", "long_ppv", "peak_vector_sum", + "tran_zc_freq", "vert_zc_freq", "long_zc_freq", + "tran_peak_acceleration", "vert_peak_acceleration", + "long_peak_acceleration", + "tran_peak_displacement", "vert_peak_displacement", + "long_peak_displacement", + "tran_time_of_peak", "vert_time_of_peak", "long_time_of_peak", + "mic_time_of_peak", "mic_zc_freq"): + v = raw.get(key) + if v is not None: + fv = _parse_float(v) + if fv is not None: + out[key] = fv + + # Microphone — Thor reports MicPSPL (dB(L)) which is the closest + # analogue to BW's mic_ppv. Stored as a float; units are in the + # original raw field (`mic_pspl` raw entry preserves "99.4 dB(L)"). + mic = raw.get("mic_pspl") + if mic is not None: + fv = _parse_float(mic) + if fv is not None: + out["mic_ppv"] = fv + + # Record / pre-trigger duration + rt = raw.get("record_time") + if rt is not None: + fv = _parse_float(rt) + if fv is not None: + out["record_time_sec"] = fv + pt = raw.get("pre_trigger_length") + if pt is not None: + fv = _parse_float(pt) + if fv is not None: + out["pre_trigger_sec"] = fv + + # Project / client / operator / location strings. Thor's title + # strings are operator-defined; conventional mapping (per Thor's + # default TitleNote labels in the example data): + # TitleString1 = Location → project (sensor location identifier) + # TitleString2 = Client → client + # TitleString3 = Company → operator (the monitoring company) + # TitleString4 = Notes → notes + out["project"] = _take("title_string1") + out["client"] = _take("title_string2") + out["operator"] = _take("title_string3", "operator") + out["notes"] = _take("title_string4", "post_event_note") + + if "setup" in raw: + out["setup"] = raw["setup"] + if "version" in raw: + out["version"] = raw["version"] + + # Battery (e.g. "3.8 volts" → 3.8) + bl = raw.get("battery_level") + if bl is not None: + fv = _parse_float(bl) + if fv is not None: + out["battery_volts"] = fv + + # Calibration line is free-form (e.g. "November 22, 2023 by Instantel"). + if "calibration" in raw: + out["calibration_text"] = raw["calibration"] + + # Sensor self-check results — bool flags + for key, out_key in ( + ("tran_test_results", "tran_test_passed"), + ("vert_test_results", "vert_test_passed"), + ("long_test_results", "long_test_passed"), + ("mic_test_results", "mic_test_passed"), + ): + v = raw.get(key) + if v is not None: + out[out_key] = v.strip().lower() == "passed" + + return out + + +def serial_from_filename(name: str) -> Optional[str]: + """Convenience: pull the serial prefix from a Thor event filename. + + Thor uses the literal serial as the filename prefix: + UM11719_20231219163444.IDFW → "UM11719" + BE9439_20200713124251.IDFH → "BE9439" + """ + m = re.match(r"^([A-Z]{2}\d+)_\d{14}\.(IDFH|IDFW)(?:\.txt)?$", + name, re.IGNORECASE) + return m.group(1).upper() if m else None + + +def parse_event_filename(name: str) -> Optional[Tuple[str, datetime.datetime, str]]: + """Parse `_.` → (serial, datetime, kind). + + `kind` is "IDFH" or "IDFW" (upper-case). Returns None on no match. + """ + m = re.match(r"^([A-Z]{2}\d+)_(\d{14})\.(IDFH|IDFW)$", + name, re.IGNORECASE) + if not m: + return None + try: + ts = datetime.datetime.strptime(m.group(2), "%Y%m%d%H%M%S") + except ValueError: + return None + return m.group(1).upper(), ts, m.group(3).upper() diff --git a/sfm/server.py b/sfm/server.py index 603e2d2..2378ab9 100644 --- a/sfm/server.py +++ b/sfm/server.py @@ -2472,6 +2472,123 @@ def _serial_from_event(ev) -> Optional[str]: return None +# ── /db/import/idf_file — ingest Thor (Series IV) IDF event files ──────────── + + +@app.post("/db/import/idf_file") +async def db_import_idf_file( + files: list[UploadFile] = File(...), + serial: Optional[str] = Query(None, description="Optional serial-number hint (e.g. UM11719); falls back to the IDF filename's literal prefix when omitted"), +) -> dict: + """ + Multipart upload of one or more Thor (Micromate Series IV) IDF event + file binaries (`.IDFH` histogram, `.IDFW` waveform), typically + forwarded by `thor-watcher`'s SFM forwarder. + + For each file: + + 1. Pair the binary with its `.txt` ASCII report when one + is present in the same upload. + 2. Parse the report via `sfm.idf_ascii_report.parse_idf_report` + and copy the binary into the persistent store via + `WaveformStore.save_imported_idf`, writing a `.sfm.json` + sidecar with `source.kind = "idf-import"`. + 3. Upsert a row into `events` (dedup'd on serial+timestamp). + + **Paired Thor TXT reports.** Thor's TXT exporter writes a + per-event ASCII report next to each binary as `.txt` + (e.g. `UM11719_20231219163444.IDFW` + `UM11719_20231219163444.IDFW.txt`). + The thor-watcher forwarder ships both files in a single multipart + upload. If the report is present, its decoded fields (Tran/Vert/Long + PPV, ZC Freq, Peak Vector Sum, Mic PSPL, calibration, sensor + self-check results, project strings) land in the sidecar's + `extensions.idf_report` block and the SFM `events` row's + device-authoritative columns. + + Pairing is by exact filename match (case-insensitive): a binary + named `foo.IDFW` is paired with a report named `foo.IDFW.txt` or + `foo.IDFW.TXT`. + + Response includes per-file outcomes so the watcher can see which + landed cleanly and which failed (e.g. malformed file, unknown + serial, etc.). + """ + store = _get_store() + db = _get_db() + results: list[dict] = [] + + binaries: list[tuple[str, bytes]] = [] + reports: dict[str, bytes] = {} # keyed by lower-cased binary filename + for upload in files: + name = upload.filename or "" + try: + content = await upload.read() + except Exception as exc: + results.append({ + "filename": name or "", "status": "error", + "detail": f"read failed: {exc}", + }) + continue + + if name.lower().endswith(".txt"): + # Thor convention: .txt — strip the trailing ".txt" + # to recover the binary's filename. + stripped = name[:-4] + reports[stripped.lower()] = content + else: + binaries.append((name, content)) + + for filename, content in binaries: + report_bytes = reports.get(filename.lower()) + try: + ev, rec = store.save_imported_idf( + content, + source_path=Path(filename or "imported.idf"), + serial_hint=serial, + idf_report_text=report_bytes, + ) + resolved_serial = ( + serial + or rec.get("serial") + or "UNKNOWN" + ) + inserted, skipped = db.insert_events( + [ev], + serial=resolved_serial, + waveform_records={ + ev._waveform_key.hex(): rec + } if ev._waveform_key else None, + ) + results.append({ + "filename": filename, + "status": "ok", + "stored_filename": rec["filename"], + "filesize": rec["filesize"], + "sha256": rec["sha256"], + "serial": resolved_serial, + "report_attached": report_bytes is not None, + "inserted": inserted, + "skipped": skipped, + }) + except Exception as exc: + log.error("idf import failed for %s: %s", filename, exc, exc_info=True) + results.append({ + "filename": filename, "status": "error", + "detail": str(exc), + }) + + # Surface unmatched .txt uploads so the daemon can detect mis-pairings. + used_report_keys = {fn.lower() for fn, _ in binaries} + for stem in reports.keys() - used_report_keys: + results.append({ + "filename": stem + ".txt", + "status": "warning", + "detail": "Thor TXT report supplied but no matching binary in this upload", + }) + + return {"count": len(results), "results": results} + + @app.get("/db/units/{serial}/waveforms.zip") def db_unit_waveforms_zip( serial: str, diff --git a/sfm/waveform_store.py b/sfm/waveform_store.py index 0d04460..4e0addc 100644 --- a/sfm/waveform_store.py +++ b/sfm/waveform_store.py @@ -413,6 +413,179 @@ class WaveformStore: "serial": serial, } + def save_imported_idf( + self, + idf_bytes: bytes, + source_path: Path, + *, + serial_hint: Optional[str] = None, + idf_report_text: Optional[Union[str, bytes]] = None, + ) -> tuple[Optional["Event"], dict]: + """ + Ingest a Thor (Micromate Series IV) IDF event file (`.IDFW` or + `.IDFH`) produced by Thor's TXT exporter. + + Thor binaries are stored as opaque bytes — seismo-relay doesn't + decode the proprietary IDF binary format. Device-authoritative + metadata comes from the paired `.IDFW.txt` / `.IDFH.txt` sidecar + when supplied; we parse that text and surface its fields onto + the returned Event so the SFM database row has real PPV/project + values instead of NULLs. + + Workflow: + 1. Parse the paired TXT report (when supplied) via + `sfm.idf_ascii_report.parse_idf_report`. + 2. Build a minimal `Event` populated from the report fields + (timestamp, peaks, project info, sample_rate, record_type). + 3. Resolve serial from filename prefix or `serial_hint`. + 4. Copy bytes verbatim into //. + 5. Write the `.sfm.json` sidecar with source.kind = "idf-import". + + Returns (event, record_dict) so the endpoint can both insert + into SeismoDb and surface the parsed event. + """ + from sfm.idf_ascii_report import ( + parse_idf_report, + parse_event_filename, + serial_from_filename as _idf_serial_from_filename, + ) + from minimateplus.models import ( + Event, PeakValues, ProjectInfo, Timestamp, + ) + + # Parse the .txt sidecar (best-effort; non-fatal on failure). + report: dict = {} + if idf_report_text is not None: + try: + report = parse_idf_report(idf_report_text) + except Exception as exc: + log.warning( + "save_imported_idf: report parse failed: %s — continuing without it", + exc, + ) + + # Resolve serial: prefer the explicit hint, fall back to filename prefix. + serial = ( + serial_hint + or report.get("serial_number") + or _idf_serial_from_filename(source_path.name) + or "UNKNOWN" + ) + + # Resolve event timestamp + kind from the filename (always present). + parsed_name = parse_event_filename(source_path.name) + kind = "Waveform" + ts_dt: Optional[datetime.datetime] = None + if parsed_name is not None: + _, ts_dt, kind_token = parsed_name + kind = "Histogram" if kind_token == "IDFH" else "Waveform" + # Report's event_datetime is the device-authoritative value; prefer it. + if "event_datetime" in report: + try: + ts_dt = datetime.datetime.fromisoformat(report["event_datetime"]) + except (TypeError, ValueError): + pass + + ts_obj: Optional[Timestamp] = None + if ts_dt is not None: + ts_obj = Timestamp( + raw=bytes(9), + flag=0, + year=ts_dt.year, + unknown_byte=0, + month=ts_dt.month, + day=ts_dt.day, + hour=ts_dt.hour, + minute=ts_dt.minute, + second=ts_dt.second, + ) + + # Build PeakValues from the report (fields are None when absent). + pv = PeakValues( + tran=report.get("tran_ppv"), + vert=report.get("vert_ppv"), + long=report.get("long_ppv"), + micl=report.get("mic_ppv"), + peak_vector_sum=report.get("peak_vector_sum"), + ) + + # Build ProjectInfo. See idf_ascii_report — Thor's title strings + # carry project / client / company / notes in TitleString1..4. + pi = ProjectInfo( + setup_name=report.get("setup"), + project=report.get("project"), + client=report.get("client"), + operator=report.get("operator"), + sensor_location=None, # Thor folds location into TitleString1 = project + notes=report.get("notes"), + ) + + # Filesystem write. + filename = source_path.name + bw_path = self._serial_dir(serial) / filename + bw_path.write_bytes(idf_bytes) + filesize = bw_path.stat().st_size + sha256 = event_file_io.file_sha256(bw_path) + + # _waveform_key dedups (serial, timestamp) rows in the events + # table. Use the binary's sha256 (first 16 bytes) as a stable + # surrogate — every distinct binary maps to a distinct row. + waveform_key = bytes.fromhex(sha256)[:16] + + ev = Event( + index=0, + timestamp=ts_obj, + sample_rate=report.get("sample_rate"), + peak_values=pv, + project_info=pi, + record_type=kind, + rectime_seconds=report.get("record_time_sec"), + ) + ev._waveform_key = waveform_key + + # Write the sidecar. Source kind "idf-import" was added to the + # allow-list in event_file_io.event_to_sidecar_dict for this. + sidecar_path = self.sidecar_path_for(serial, filename) + existing_review = None + if sidecar_path.exists(): + try: + existing_review = event_file_io.read_sidecar(sidecar_path).get("review") + except Exception: + pass + + sidecar = event_file_io.event_to_sidecar_dict( + ev, + serial=serial, + blastware_filename=filename, + blastware_filesize=filesize, + blastware_sha256=sha256, + source_kind="idf-import", + a5_pickle_filename=None, + review=existing_review, + ) + # Stash the full parsed IDF report under extensions so downstream + # consumers can recover the rich derived fields that don't fit + # the BW-shaped event model (Peak Acceleration / Displacement, + # Time of Peak, sensor self-check, calibration, firmware). + if report: + sidecar["extensions"]["idf_report"] = report + event_file_io.write_sidecar(sidecar_path, sidecar) + + log.info( + "WaveformStore.save_imported_idf serial=%s filename=%s filesize=%d " + "report_attached=%s", + serial, filename, filesize, bool(report), + ) + return ev, { + "filename": filename, + "filesize": filesize, + "sha256": sha256, + "a5_pickle_filename": None, + "hdf5_filename": None, + "sidecar_filename": sidecar_path.name, + "serial": serial, + } + def load_a5(self, serial: str, filename: str) -> Optional[list[S3Frame]]: """ Re-hydrate the pickled A5 frame stream for a stored event. diff --git a/tests/test_idf_ascii_report.py b/tests/test_idf_ascii_report.py new file mode 100644 index 0000000..2ba79f0 --- /dev/null +++ b/tests/test_idf_ascii_report.py @@ -0,0 +1,234 @@ +""" +test_idf_ascii_report.py — parser for Thor's per-event IDF ASCII export. + +Run: + python -m pytest tests/test_idf_ascii_report.py -q + +Tests use real Thor sample data shipped under +`thor-watcher/example-data/THORDATA_example/`. When that path is not +available (e.g. running from a checkout where the watcher repo isn't +sibling), tests gracefully skip. +""" + +from __future__ import annotations + +import datetime +import os +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sfm.idf_ascii_report import ( + parse_event_filename, + parse_idf_report, + serial_from_filename, +) + + +# ── Sample data ────────────────────────────────────────────────────────────── + + +SAMPLE_REPO = Path("/home/serversdown/thor-watcher/example-data/" + "THORDATA_example/THORDATA_example") + + +def _sample_path(rel: str) -> Path: + return SAMPLE_REPO / rel + + +@pytest.fixture +def upmc_waveform_txt() -> str: + p = _sample_path("UPMC Presby/UM11719/TXT/UM11719_20231219162723.IDFW.txt") + if not p.exists(): + pytest.skip(f"sample missing: {p}") + return p.read_text() + + +@pytest.fixture +def upmc_histogram_txt() -> str: + p = _sample_path("UPMC Presby/UM11719/TXT/UM11719_20231219163444.IDFH.txt") + if not p.exists(): + pytest.skip(f"sample missing: {p}") + return p.read_text() + + +# ── Filename parsing ───────────────────────────────────────────────────────── + + +def test_parse_event_filename_waveform(): + parsed = parse_event_filename("UM11719_20231219163444.IDFW") + assert parsed is not None + serial, ts, kind = parsed + assert serial == "UM11719" + assert ts == datetime.datetime(2023, 12, 19, 16, 34, 44) + assert kind == "IDFW" + + +def test_parse_event_filename_histogram(): + parsed = parse_event_filename("BE9439_20200713124251.IDFH") + assert parsed is not None + serial, ts, kind = parsed + assert serial == "BE9439" + assert kind == "IDFH" + + +def test_parse_event_filename_case_insensitive(): + parsed = parse_event_filename("um11719_20231219163444.idfw") + assert parsed is not None + assert parsed[0] == "UM11719" + assert parsed[2] == "IDFW" + + +def test_parse_event_filename_rejects_invalid(): + for name in [ + "UM11719_20231219163444.MLG", + "UM11719.IDFW", + "UM11719_20231219163444.IDFW.txt", # report sidecar — not a binary + "UM11719_2023121916344X.IDFW", + "garbage", + "", + ]: + assert parse_event_filename(name) is None, name + + +def test_serial_from_filename(): + assert serial_from_filename("UM11719_20231219163444.IDFW") == "UM11719" + assert serial_from_filename("BE9439_20200713124251.IDFH") == "BE9439" + # Works on the .txt sidecar name too — handy in pairing code paths + assert serial_from_filename("UM11719_20231219163444.IDFW.txt") == "UM11719" + assert serial_from_filename("not_a_thor_file.bin") is None + + +# ── Report parsing — derived fields against real Thor sample ───────────────── + + +def test_waveform_report_derives_serial_event_type_and_datetime(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["serial_number"] == "UM11719" + assert r["event_type"] == "Full Waveform" + assert r["event_datetime"] == "2023-12-19T16:27:23" + assert r["filename"] == "UM11719_20231219162723.IDFW" + + +def test_waveform_report_parses_peak_velocities(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["tran_ppv"] == pytest.approx(0.0251) + assert r["vert_ppv"] == pytest.approx(0.2119) + assert r["long_ppv"] == pytest.approx(0.0282) + assert r["peak_vector_sum"] == pytest.approx(0.2131) + + +def test_waveform_report_parses_zc_freq_and_mic(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["tran_zc_freq"] == pytest.approx(6.5) + assert r["vert_zc_freq"] == pytest.approx(73.1) + assert r["long_zc_freq"] == pytest.approx(85.3) + assert r["mic_ppv"] == pytest.approx(99.4) + + +def test_waveform_report_parses_record_and_pretrigger_durations(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["record_time_sec"] == pytest.approx(2.0) + assert r["pre_trigger_sec"] == pytest.approx(0.25) + + +def test_waveform_report_parses_sample_rate(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["sample_rate"] == 1024 + + +def test_waveform_report_extracts_title_strings(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + # TitleString1 (location) → project + assert r["project"] == "UPMC Presby-Loc 3-Level1-1R Elevator Rm" + # TitleString2 → client + assert r["client"] == "Whiting-Turner - PJ Dick - Joint Venture" + # TitleString3 → operator (company) + assert r["operator"] == "Terra-Mechanics, Inc. - D. Harrsion" + + +def test_waveform_report_extracts_setup_version_and_calibration(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["setup"] == "UPMC Loc 3.mmb" + assert r["version"] == "Micromate ISEE 11.0AK" + assert r["calibration_text"] == "November 22, 2023 by Instantel" + assert r["battery_volts"] == pytest.approx(3.8) + + +def test_waveform_report_decodes_sensor_self_check(upmc_waveform_txt): + r = parse_idf_report(upmc_waveform_txt) + assert r["tran_test_passed"] is True + assert r["vert_test_passed"] is True + assert r["long_test_passed"] is True + assert r["mic_test_passed"] is True + + +def test_histogram_report_parses(upmc_histogram_txt): + """Histogram sidecars have the same shape as waveform — both + decode through the same parser without errors.""" + r = parse_idf_report(upmc_histogram_txt) + assert r["serial_number"] == "UM11719" + # IDFH timestamp in the sample + assert r["event_datetime"] == "2023-12-19T16:34:44" + assert r["event_type"] .lower().startswith("full histogram") or \ + r["event_type"] .lower().startswith("histogram") + # Sample rate present + assert "sample_rate" in r + + +# ── Edge cases ─────────────────────────────────────────────────────────────── + + +def test_parses_bytes_input(): + text = ( + '"SerialNumber : UM11719"\n' + '"TranPPV : 0.0251 in/s"\n' + ) + r = parse_idf_report(text.encode("utf-8")) + assert r["serial_number"] == "UM11719" + assert r["tran_ppv"] == pytest.approx(0.0251) + + +def test_parses_latin1_fallback(): + """Garbled non-UTF8 bytes fall back to latin-1 instead of crashing.""" + text = b'"SerialNumber : UM11719"\n"Operator : Caf\xe9"\n' + r = parse_idf_report(text) + assert r["serial_number"] == "UM11719" + assert r["operator"] == "Café" + + +def test_stops_at_waveform_data_marker(): + """Lines after the 'Waveform Data Channels' marker are not parsed + as key/value pairs — they're tabular sample data.""" + text = ( + '"SerialNumber : UM11719"\n' + '"TranPPV : 0.0251 in/s"\n' + 'Waveform Data Channels\n' + ' Tran Vert Long MicL\n' + ' 0.0003 -0.0003 0.0003 0.00013\n' + ) + r = parse_idf_report(text) + assert r["serial_number"] == "UM11719" + assert r["tran_ppv"] == pytest.approx(0.0251) + # No spurious entries from the table body + assert "tran" not in r + assert "0.0003" not in r + + +def test_missing_event_time_omits_datetime(): + r = parse_idf_report('"SerialNumber : UM11719"\n') + assert r["serial_number"] == "UM11719" + assert "event_datetime" not in r + + +def test_handles_empty_input(): + r = parse_idf_report("") + assert r == { + "project": None, + "client": None, + "operator": None, + "notes": None, + }