316 lines
12 KiB
Python
316 lines
12 KiB
Python
"""
|
|
sfm/idf_ascii_report.py — parse Thor (Micromate Series IV) IDF ASCII reports.
|
|
|
|
Thor exports a `.IDFW.txt` or `.IDFH.txt` sidecar next to each `.IDFW`
|
|
(waveform) or `.IDFH` (histogram) event binary. Each sidecar is a
|
|
plain-text file with `"Key : Value"` lines covering the full device-
|
|
authoritative event metadata — PPV per channel, ZC Freq, Time of Peak,
|
|
Peak Acceleration / Displacement, sensor self-check results, project
|
|
strings, calibration date, battery level, etc. — followed by a raw
|
|
waveform-samples block headed by the literal line "Waveform Data Channels".
|
|
|
|
This is the Thor analogue of `minimateplus/bw_ascii_report.py` for the
|
|
Blastware (Series III) report format. The parser is intentionally
|
|
permissive: we extract everything we recognise into a flat dict and
|
|
silently ignore anything we don't. Downstream callers parse units
|
|
(`"0.2119 in/s"` → 0.2119) only on the fields they need.
|
|
|
|
Example input (truncated):
|
|
|
|
"EventType : Full Waveform"
|
|
"SampleRate : 1024 sps"
|
|
"EventTime : 16:27:23"
|
|
"EventDate : 2023-12-19"
|
|
"TranPPV : 0.0251 in/s"
|
|
"VertPPV : 0.2119 in/s"
|
|
"LongPPV : 0.0282 in/s"
|
|
"PeakVectorSum : 0.2131 in/s"
|
|
"MicPSPL : 99.4 dB(L)"
|
|
"TranZCFreq : 6.5 Hz"
|
|
"SerialNumber : UM11719"
|
|
"Version : Micromate ISEE 11.0AK"
|
|
"FileName : UM11719_20231219162723.IDFW"
|
|
"BatteryLevel : 3.8 volts"
|
|
"Calibration : November 22, 2023 by Instantel"
|
|
"TranTestResults : Passed"
|
|
"TitleString1 : UPMC Presby-Loc 3-Level1-1R Elevator Rm"
|
|
Waveform Data Channels
|
|
Tran Vert Long MicL
|
|
0.0003 -0.0003 0.0003 0.00013
|
|
...
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime
|
|
import re
|
|
from typing import Any, Dict, Optional, Tuple, Union
|
|
|
|
|
|
# Lines look like: "Key : Value" (quotes literal, single ":" separator)
|
|
_LINE_RE = re.compile(r'^\s*"?([^":]+?)"?\s*:\s*"?(.*?)"?\s*$')
|
|
|
|
# Marker that ends the metadata block — everything after is raw sample data.
|
|
_WAVEFORM_BLOCK_MARKER = "waveform data channels"
|
|
|
|
|
|
def _normalize_key(raw: str) -> str:
|
|
"""Convert "TranPPV" / "PreTriggerLength" → snake_case."""
|
|
s = raw.strip()
|
|
# Insert underscore between lower→upper / digit→letter transitions
|
|
s = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", "_", s)
|
|
s = re.sub(r"(?<=[A-Z])(?=[A-Z][a-z])", "_", s)
|
|
s = s.replace("-", "_").replace(" ", "_")
|
|
return s.lower()
|
|
|
|
|
|
def _strip_unit_suffix(value: str) -> str:
|
|
"""Return the numeric part of values like "0.2119 in/s" → "0.2119".
|
|
|
|
Also strips Thor's below/above-threshold prefixes:
|
|
"<0.005 in/s" → "0.005" (below-noise-floor reading)
|
|
">100 Hz" → "100" (above-measurement-range reading)
|
|
"""
|
|
parts = value.strip().split()
|
|
token = parts[0] if parts else value.strip()
|
|
if token.startswith("<") or token.startswith(">"):
|
|
token = token[1:]
|
|
return token
|
|
|
|
|
|
def _parse_float(value: str) -> Optional[float]:
|
|
try:
|
|
return float(_strip_unit_suffix(value))
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def _parse_int(value: str) -> Optional[int]:
|
|
try:
|
|
return int(float(_strip_unit_suffix(value)))
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def parse_idf_report(text: Union[str, bytes]) -> Dict[str, Any]:
|
|
"""
|
|
Parse a Thor IDFW.txt / IDFH.txt sidecar.
|
|
|
|
Returns a flat dict with two kinds of entries:
|
|
|
|
- **Raw fields** — every `Key : Value` line, keyed by snake_case
|
|
of the original key, value as a string (unit suffix preserved).
|
|
Lets callers grab any field we haven't explicitly normalised.
|
|
|
|
- **Derived fields** — a curated set with parsed types:
|
|
* `serial_number` str
|
|
* `event_type` str ("Full Waveform" / "Full Histogram")
|
|
* `event_datetime` ISO-8601 string ("YYYY-MM-DDTHH:MM:SS") when
|
|
both EventDate and EventTime are present
|
|
* `sample_rate` int (samples/sec)
|
|
* `tran_ppv`,`vert_ppv`,`long_ppv` float (in/s)
|
|
* `mic_ppv` float (dB or psi — same units as MicPSPL)
|
|
* `peak_vector_sum` float (in/s)
|
|
* `tran_zc_freq`,`vert_zc_freq`,`long_zc_freq` float (Hz)
|
|
* `record_time_sec` float (seconds)
|
|
* `pre_trigger_sec` float (seconds)
|
|
* `project` str (from TitleString1 — Thor's location)
|
|
* `client` str (TitleString2)
|
|
* `operator` str (TitleString3 — company/operator)
|
|
* `notes` str (TitleString4)
|
|
* `setup` str
|
|
* `version` str (firmware)
|
|
* `battery_volts` float
|
|
* `calibration_text` str (e.g. "November 22, 2023 by Instantel")
|
|
* `tran_test_passed`, `vert_test_passed`, `long_test_passed`,
|
|
`mic_test_passed` bool ("Passed" → True; anything else → False)
|
|
* `filename` str (FileName line — useful sanity check)
|
|
|
|
Stops parsing at the literal "Waveform Data Channels" line; the
|
|
raw-samples block is left to whoever wants to decode the binary.
|
|
|
|
Input may be `str` or `bytes` (`utf-8`/`latin-1` tolerant).
|
|
"""
|
|
if isinstance(text, bytes):
|
|
try:
|
|
text = text.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
text = text.decode("latin-1", errors="replace")
|
|
|
|
raw: Dict[str, str] = {}
|
|
|
|
for line in text.splitlines():
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
if stripped.lower().startswith(_WAVEFORM_BLOCK_MARKER):
|
|
break
|
|
m = _LINE_RE.match(stripped)
|
|
if not m:
|
|
continue
|
|
key = _normalize_key(m.group(1))
|
|
value = m.group(2).strip()
|
|
# Multi-value lines (Channel, Units, etc.) — coalesce by appending.
|
|
if key in raw:
|
|
raw[key] = raw[key] + "; " + value
|
|
else:
|
|
raw[key] = value
|
|
|
|
out: Dict[str, Any] = dict(raw) # keep all raw fields
|
|
|
|
# ── Derived fields ───────────────────────────────────────────────────────
|
|
|
|
def _take(*candidates: str) -> Optional[str]:
|
|
for c in candidates:
|
|
if c in raw:
|
|
return raw[c]
|
|
return None
|
|
|
|
# Event identity
|
|
if "serial_number" in raw:
|
|
out["serial_number"] = raw["serial_number"]
|
|
if "event_type" in raw:
|
|
out["event_type"] = raw["event_type"]
|
|
if "file_name" in raw:
|
|
out["filename"] = raw["file_name"]
|
|
|
|
# Combined date+time. Waveform sidecars use "EventDate" / "EventTime";
|
|
# histogram sidecars use "HistogramStartDate" / "HistogramStartTime".
|
|
# Prefer the event_* names when both are present.
|
|
ed = raw.get("event_date") or raw.get("histogram_start_date")
|
|
et = raw.get("event_time") or raw.get("histogram_start_time")
|
|
if ed and et:
|
|
try:
|
|
dt = datetime.datetime.strptime(f"{ed} {et}", "%Y-%m-%d %H:%M:%S")
|
|
out["event_datetime"] = dt.isoformat()
|
|
except ValueError:
|
|
pass
|
|
|
|
# Numeric scalars. For every field we typify here, we MUST drop the
|
|
# raw string copy from `out` when parsing fails — Thor writes things
|
|
# like "<0.005 in/s" (below threshold) and "N/A" (not measured) that
|
|
# would otherwise linger in `out` as strings, sneak into SQLite REAL
|
|
# columns via permissive type affinity, and then crash the JS
|
|
# frontend on `.toFixed(...)`.
|
|
int_fields = ("sample_rate",)
|
|
for key in int_fields:
|
|
v = raw.get(key)
|
|
if v is None:
|
|
continue
|
|
iv = _parse_int(v)
|
|
if iv is not None:
|
|
out[key] = iv
|
|
else:
|
|
out.pop(key, None)
|
|
|
|
float_fields = (
|
|
"tran_ppv", "vert_ppv", "long_ppv", "peak_vector_sum",
|
|
"tran_zc_freq", "vert_zc_freq", "long_zc_freq",
|
|
"tran_peak_acceleration", "vert_peak_acceleration",
|
|
"long_peak_acceleration",
|
|
"tran_peak_displacement", "vert_peak_displacement",
|
|
"long_peak_displacement",
|
|
"tran_time_of_peak", "vert_time_of_peak", "long_time_of_peak",
|
|
"mic_time_of_peak", "mic_zc_freq",
|
|
)
|
|
for key in float_fields:
|
|
v = raw.get(key)
|
|
if v is None:
|
|
continue
|
|
fv = _parse_float(v)
|
|
if fv is not None:
|
|
out[key] = fv
|
|
else:
|
|
out.pop(key, None)
|
|
|
|
# Microphone — Thor reports MicPSPL (dB(L)) which is the closest
|
|
# analogue to BW's mic_ppv. The raw "99.4 dB(L)" string stays in
|
|
# `out` under the original `mic_pspl` key for display; the parsed
|
|
# float goes in `mic_ppv`.
|
|
mic = raw.get("mic_pspl")
|
|
if mic is not None:
|
|
fv = _parse_float(mic)
|
|
if fv is not None:
|
|
out["mic_ppv"] = fv
|
|
|
|
# Record / pre-trigger duration — same drop-on-failure discipline.
|
|
rt = raw.get("record_time")
|
|
if rt is not None:
|
|
fv = _parse_float(rt)
|
|
if fv is not None:
|
|
out["record_time_sec"] = fv
|
|
pt = raw.get("pre_trigger_length")
|
|
if pt is not None:
|
|
fv = _parse_float(pt)
|
|
if fv is not None:
|
|
out["pre_trigger_sec"] = fv
|
|
|
|
# Project / client / operator / location strings. Thor's title
|
|
# strings are operator-defined; conventional mapping (per Thor's
|
|
# default TitleNote labels in the example data):
|
|
# TitleString1 = Location → project (sensor location identifier)
|
|
# TitleString2 = Client → client
|
|
# TitleString3 = Company → operator (the monitoring company)
|
|
# TitleString4 = Notes → notes
|
|
out["project"] = _take("title_string1")
|
|
out["client"] = _take("title_string2")
|
|
out["operator"] = _take("title_string3", "operator")
|
|
out["notes"] = _take("title_string4", "post_event_note")
|
|
|
|
if "setup" in raw:
|
|
out["setup"] = raw["setup"]
|
|
if "version" in raw:
|
|
out["version"] = raw["version"]
|
|
|
|
# Battery (e.g. "3.8 volts" → 3.8)
|
|
bl = raw.get("battery_level")
|
|
if bl is not None:
|
|
fv = _parse_float(bl)
|
|
if fv is not None:
|
|
out["battery_volts"] = fv
|
|
|
|
# Calibration line is free-form (e.g. "November 22, 2023 by Instantel").
|
|
if "calibration" in raw:
|
|
out["calibration_text"] = raw["calibration"]
|
|
|
|
# Sensor self-check results — bool flags
|
|
for key, out_key in (
|
|
("tran_test_results", "tran_test_passed"),
|
|
("vert_test_results", "vert_test_passed"),
|
|
("long_test_results", "long_test_passed"),
|
|
("mic_test_results", "mic_test_passed"),
|
|
):
|
|
v = raw.get(key)
|
|
if v is not None:
|
|
out[out_key] = v.strip().lower() == "passed"
|
|
|
|
return out
|
|
|
|
|
|
def serial_from_filename(name: str) -> Optional[str]:
|
|
"""Convenience: pull the serial prefix from a Thor event filename.
|
|
|
|
Thor uses the literal serial as the filename prefix:
|
|
UM11719_20231219163444.IDFW → "UM11719"
|
|
BE9439_20200713124251.IDFH → "BE9439"
|
|
"""
|
|
m = re.match(r"^([A-Z]{2}\d+)_\d{14}\.(IDFH|IDFW)(?:\.txt)?$",
|
|
name, re.IGNORECASE)
|
|
return m.group(1).upper() if m else None
|
|
|
|
|
|
def parse_event_filename(name: str) -> Optional[Tuple[str, datetime.datetime, str]]:
|
|
"""Parse `<SERIAL>_<YYYYMMDDHHMMSS>.<KIND>` → (serial, datetime, kind).
|
|
|
|
`kind` is "IDFH" or "IDFW" (upper-case). Returns None on no match.
|
|
"""
|
|
m = re.match(r"^([A-Z]{2}\d+)_(\d{14})\.(IDFH|IDFW)$",
|
|
name, re.IGNORECASE)
|
|
if not m:
|
|
return None
|
|
try:
|
|
ts = datetime.datetime.strptime(m.group(2), "%Y%m%d%H%M%S")
|
|
except ValueError:
|
|
return None
|
|
return m.group(1).upper(), ts, m.group(3).upper()
|