diff --git a/minimateplus/bw_ascii_report.py b/minimateplus/bw_ascii_report.py index f312fba..7451d35 100644 --- a/minimateplus/bw_ascii_report.py +++ b/minimateplus/bw_ascii_report.py @@ -265,6 +265,61 @@ def _parse_monitor_ts(s: str) -> Optional[datetime.datetime]: return None +# ── Operator-field label normalisation ────────────────────────────────────── +# +# BW has used different label spellings across versions and recording +# modes for the same operator-supplied fields: +# +# project: "Project:" / "Project" +# client: "Client:" / "Client" +# operator: "User Name:" / "User Name" +# sensor_location: "Seis Loc:" / "Seis. Location" / "Seis Location" +# / "Sensor Location" +# +# Per user feedback ("the tags themselves dont matter a ton, what +# matters is the field"), we normalise labels at lookup time so the +# value-extraction works regardless of which spelling BW happens to +# emit on a given machine. +# +# To add a new variant: edit `_OPERATOR_LABEL_MAP` — single source of +# truth. Keys are normalised forms (lowercase, trailing colon and +# period stripped, internal whitespace collapsed); values are +# attribute names on `BwAsciiReport`. + +_OPERATOR_LABEL_MAP = { + # project + "project": "project", + # client + "client": "client", + # operator + "user name": "operator", + # sensor location — most variants of "Seis*" + "Sensor Location" + "seis loc": "sensor_location", + "seis. loc": "sensor_location", + "seis. location": "sensor_location", + "seis location": "sensor_location", + "sensor location": "sensor_location", +} + + +def _normalise_label_for_lookup(key: str) -> str: + """Normalise a label for the operator-field lookup. + + Strips a trailing colon and/or period, collapses internal + whitespace runs, and lowercases. So all of: + + "Seis Loc:" + "Seis. Location" + "seis location" + "Sensor Location" + + map to canonical forms in `_OPERATOR_LABEL_MAP`. + """ + s = key.strip().rstrip(":").rstrip(".").strip() + s = _KEY_NORMALISE_RE.sub(" ", s) + return s.lower() + + # ───────────────────────────────────────────────────────────────────────────── # Top-level parser # ───────────────────────────────────────────────────────────────────────────── @@ -346,12 +401,14 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA report.calibration_date, report.calibration_by = _parse_calibration(value) elif key == "Units": report.units = value - # Project labels in BW carry their own trailing colon — after - # _normalise_key we just strip it for matching. - elif key.rstrip(":") == "Project": report.project = value - elif key.rstrip(":") == "Client": report.client = value - elif key.rstrip(":") == "User Name":report.operator = value - elif key.rstrip(":") == "Seis Loc": report.sensor_location = value + # Operator-supplied labels (Project / Client / User Name / + # Seis Loc) — BW writes these with assorted spellings across + # firmware versions and recording modes (e.g. "Seis Loc:" on + # waveform exports vs "Seis. Location" on histogram exports). + # The label normaliser absorbs all known variants; see + # `_OPERATOR_LABEL_MAP` above for the dispatch table. + elif (slot := _OPERATOR_LABEL_MAP.get(_normalise_label_for_lookup(key))): + setattr(report, slot, value) elif key == "Geo Range": report.geo_range_ips = _parse_number(value) diff --git a/tests/test_bw_ascii_report.py b/tests/test_bw_ascii_report.py index a735861..fa2991f 100644 --- a/tests/test_bw_ascii_report.py +++ b/tests/test_bw_ascii_report.py @@ -257,3 +257,70 @@ def test_parse_handles_micl_double_space_in_key(): r = parse_report(text) assert r.mic.time_of_peak_s == pytest.approx(0.012) assert r.mic.zc_freq_hz == pytest.approx(51.0) + + +# ── Operator-field label normalisation ────────────────────────────────────── + + +@pytest.mark.parametrize("label,expected_field,expected_value", [ + # project — both with-colon and bare + ("Project:", "project", "Test4-21-26"), + ("Project", "project", "Test4-21-26"), + # client + ("Client:", "client", "Acme Inc"), + ("Client", "client", "Acme Inc"), + # operator (User Name) + ("User Name:", "operator", "Brian"), + ("User Name", "operator", "Brian"), + # sensor_location — every Seis*/Sensor Location variant we know about + ("Seis Loc:", "sensor_location", "Catbed"), + ("Seis Loc", "sensor_location", "Catbed"), + ("Seis. Location", "sensor_location", "Catbed"), + ("Seis Location", "sensor_location", "Catbed"), + ("Sensor Location", "sensor_location", "Catbed"), +]) +def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value): + """All known label spellings of the operator-supplied fields route + to the same dataclass attribute.""" + text = '"{} : {}"\n'.format(label, expected_value) + r = parse_report(text) + assert getattr(r, expected_field) == expected_value, (label, expected_field) + + +def test_label_normalisation_is_case_insensitive(): + """Lowercase / uppercase / mixed-case labels all hit the same slot.""" + for label in ("seis loc", "SEIS LOC", "SeIs LoC"): + text = '"{} : Catbed"\n'.format(label) + r = parse_report(text) + assert r.sensor_location == "Catbed", label + + +def test_label_normalisation_collapses_extra_whitespace(): + """Internal whitespace runs in labels are collapsed before lookup.""" + text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc" + r = parse_report(text) + assert r.sensor_location == "Catbed" + + +def test_real_histogram_fixture_populates_sensor_location(): + """End-to-end: the histogram fixture from example-events/histogram/ + uses 'Seis. Location' (with period) and must successfully populate + sensor_location after the label-normalisation fix.""" + fixture_dir = ( + Path(__file__).parent.parent / "example-events" / "histogram" + ) + if not fixture_dir.exists(): + pytest.skip("histogram fixtures not present") + txt = next(fixture_dir.glob("*_ASCII.TXT"), None) + if txt is None: + pytest.skip("no histogram TXT in fixture dir") + + r = parse_report_file(txt) + # The histogram TXTs verified to use "Seis. Location" — should now + # populate sensor_location instead of being silently dropped. + assert r.sensor_location is not None + assert len(r.sensor_location) > 0 + # Sanity: other shared fields still parse correctly + assert r.serial is not None + assert r.serial.startswith("BE") + assert r.geo_range_ips is not None