feat(bw-report): normalise operator-field label variants

Blastware writes the operator-supplied fields with different label
spellings across firmware versions and recording modes — most
notably "Seis. Location" on histogram exports vs "Seis Loc:" on
waveform exports.  Previous parser only matched the latter, so
every histogram event silently lost its sensor_location field.

Replace the four hardcoded `key.rstrip(":") == "X"` branches with
a single `_OPERATOR_LABEL_MAP` dispatch table keyed by normalised
label (lowercase, trailing colon/period stripped, internal
whitespace collapsed).  Adds these variants on day 1:

  project:         "Project:" / "Project"
  client:          "Client:"  / "Client"
  operator:        "User Name:" / "User Name"
  sensor_location: "Seis Loc:" / "Seis. Location" / "Seis Location"
                 / "Sensor Location" / "Seis Loc"

To absorb future BW label drift, add a one-line dict entry — no
new elif branch.

14 new tests cover:
  - Each label variant routes to the correct field (parametrised)
  - Case-insensitive matching ("seis loc" / "SEIS LOC" / "SeIs LoC")
  - Whitespace-collapse ("Seis  Loc" with double-space)
  - End-to-end parse of a real histogram fixture from
    example-events/histogram/ — sensor_location ('Loc #1 - 2652 Hepner...')
    populates correctly even though the file uses "Seis. Location"

Total bw_ascii_report tests: 19 → 33.  Full SFM suite still green
(69 passed, 44 skipped — pre-existing skips for h5py-dep tests).

Pairs with series3-watcher v1.5.4 (which fixes the filename pairing
so histograms actually reach this parser in the first place).
This commit is contained in:
2026-05-10 20:13:44 +00:00
parent cdfe4ad3c8
commit 6a7e8c6e86
2 changed files with 130 additions and 6 deletions
+67
View File
@@ -257,3 +257,70 @@ def test_parse_handles_micl_double_space_in_key():
r = parse_report(text)
assert r.mic.time_of_peak_s == pytest.approx(0.012)
assert r.mic.zc_freq_hz == pytest.approx(51.0)
# ── Operator-field label normalisation ──────────────────────────────────────
@pytest.mark.parametrize("label,expected_field,expected_value", [
# project — both with-colon and bare
("Project:", "project", "Test4-21-26"),
("Project", "project", "Test4-21-26"),
# client
("Client:", "client", "Acme Inc"),
("Client", "client", "Acme Inc"),
# operator (User Name)
("User Name:", "operator", "Brian"),
("User Name", "operator", "Brian"),
# sensor_location — every Seis*/Sensor Location variant we know about
("Seis Loc:", "sensor_location", "Catbed"),
("Seis Loc", "sensor_location", "Catbed"),
("Seis. Location", "sensor_location", "Catbed"),
("Seis Location", "sensor_location", "Catbed"),
("Sensor Location", "sensor_location", "Catbed"),
])
def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value):
"""All known label spellings of the operator-supplied fields route
to the same dataclass attribute."""
text = '"{} : {}"\n'.format(label, expected_value)
r = parse_report(text)
assert getattr(r, expected_field) == expected_value, (label, expected_field)
def test_label_normalisation_is_case_insensitive():
"""Lowercase / uppercase / mixed-case labels all hit the same slot."""
for label in ("seis loc", "SEIS LOC", "SeIs LoC"):
text = '"{} : Catbed"\n'.format(label)
r = parse_report(text)
assert r.sensor_location == "Catbed", label
def test_label_normalisation_collapses_extra_whitespace():
"""Internal whitespace runs in labels are collapsed before lookup."""
text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc"
r = parse_report(text)
assert r.sensor_location == "Catbed"
def test_real_histogram_fixture_populates_sensor_location():
"""End-to-end: the histogram fixture from example-events/histogram/
uses 'Seis. Location' (with period) and must successfully populate
sensor_location after the label-normalisation fix."""
fixture_dir = (
Path(__file__).parent.parent / "example-events" / "histogram"
)
if not fixture_dir.exists():
pytest.skip("histogram fixtures not present")
txt = next(fixture_dir.glob("*_ASCII.TXT"), None)
if txt is None:
pytest.skip("no histogram TXT in fixture dir")
r = parse_report_file(txt)
# The histogram TXTs verified to use "Seis. Location" — should now
# populate sensor_location instead of being silently dropped.
assert r.sensor_location is not None
assert len(r.sensor_location) > 0
# Sanity: other shared fields still parse correctly
assert r.serial is not None
assert r.serial.startswith("BE")
assert r.geo_range_ips is not None