feat(bw-report): normalise operator-field label variants
Blastware writes the operator-supplied fields with different label
spellings across firmware versions and recording modes — most
notably "Seis. Location" on histogram exports vs "Seis Loc:" on
waveform exports. Previous parser only matched the latter, so
every histogram event silently lost its sensor_location field.
Replace the four hardcoded `key.rstrip(":") == "X"` branches with
a single `_OPERATOR_LABEL_MAP` dispatch table keyed by normalised
label (lowercase, trailing colon/period stripped, internal
whitespace collapsed). Adds these variants on day 1:
project: "Project:" / "Project"
client: "Client:" / "Client"
operator: "User Name:" / "User Name"
sensor_location: "Seis Loc:" / "Seis. Location" / "Seis Location"
/ "Sensor Location" / "Seis Loc"
To absorb future BW label drift, add a one-line dict entry — no
new elif branch.
14 new tests cover:
- Each label variant routes to the correct field (parametrised)
- Case-insensitive matching ("seis loc" / "SEIS LOC" / "SeIs LoC")
- Whitespace-collapse ("Seis Loc" with double-space)
- End-to-end parse of a real histogram fixture from
example-events/histogram/ — sensor_location ('Loc #1 - 2652 Hepner...')
populates correctly even though the file uses "Seis. Location"
Total bw_ascii_report tests: 19 → 33. Full SFM suite still green
(69 passed, 44 skipped — pre-existing skips for h5py-dep tests).
Pairs with series3-watcher v1.5.4 (which fixes the filename pairing
so histograms actually reach this parser in the first place).
This commit is contained in:
@@ -257,3 +257,70 @@ def test_parse_handles_micl_double_space_in_key():
|
||||
r = parse_report(text)
|
||||
assert r.mic.time_of_peak_s == pytest.approx(0.012)
|
||||
assert r.mic.zc_freq_hz == pytest.approx(51.0)
|
||||
|
||||
|
||||
# ── Operator-field label normalisation ──────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.parametrize("label,expected_field,expected_value", [
|
||||
# project — both with-colon and bare
|
||||
("Project:", "project", "Test4-21-26"),
|
||||
("Project", "project", "Test4-21-26"),
|
||||
# client
|
||||
("Client:", "client", "Acme Inc"),
|
||||
("Client", "client", "Acme Inc"),
|
||||
# operator (User Name)
|
||||
("User Name:", "operator", "Brian"),
|
||||
("User Name", "operator", "Brian"),
|
||||
# sensor_location — every Seis*/Sensor Location variant we know about
|
||||
("Seis Loc:", "sensor_location", "Catbed"),
|
||||
("Seis Loc", "sensor_location", "Catbed"),
|
||||
("Seis. Location", "sensor_location", "Catbed"),
|
||||
("Seis Location", "sensor_location", "Catbed"),
|
||||
("Sensor Location", "sensor_location", "Catbed"),
|
||||
])
|
||||
def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value):
|
||||
"""All known label spellings of the operator-supplied fields route
|
||||
to the same dataclass attribute."""
|
||||
text = '"{} : {}"\n'.format(label, expected_value)
|
||||
r = parse_report(text)
|
||||
assert getattr(r, expected_field) == expected_value, (label, expected_field)
|
||||
|
||||
|
||||
def test_label_normalisation_is_case_insensitive():
|
||||
"""Lowercase / uppercase / mixed-case labels all hit the same slot."""
|
||||
for label in ("seis loc", "SEIS LOC", "SeIs LoC"):
|
||||
text = '"{} : Catbed"\n'.format(label)
|
||||
r = parse_report(text)
|
||||
assert r.sensor_location == "Catbed", label
|
||||
|
||||
|
||||
def test_label_normalisation_collapses_extra_whitespace():
|
||||
"""Internal whitespace runs in labels are collapsed before lookup."""
|
||||
text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc"
|
||||
r = parse_report(text)
|
||||
assert r.sensor_location == "Catbed"
|
||||
|
||||
|
||||
def test_real_histogram_fixture_populates_sensor_location():
|
||||
"""End-to-end: the histogram fixture from example-events/histogram/
|
||||
uses 'Seis. Location' (with period) and must successfully populate
|
||||
sensor_location after the label-normalisation fix."""
|
||||
fixture_dir = (
|
||||
Path(__file__).parent.parent / "example-events" / "histogram"
|
||||
)
|
||||
if not fixture_dir.exists():
|
||||
pytest.skip("histogram fixtures not present")
|
||||
txt = next(fixture_dir.glob("*_ASCII.TXT"), None)
|
||||
if txt is None:
|
||||
pytest.skip("no histogram TXT in fixture dir")
|
||||
|
||||
r = parse_report_file(txt)
|
||||
# The histogram TXTs verified to use "Seis. Location" — should now
|
||||
# populate sensor_location instead of being silently dropped.
|
||||
assert r.sensor_location is not None
|
||||
assert len(r.sensor_location) > 0
|
||||
# Sanity: other shared fields still parse correctly
|
||||
assert r.serial is not None
|
||||
assert r.serial.startswith("BE")
|
||||
assert r.geo_range_ips is not None
|
||||
|
||||
Reference in New Issue
Block a user