refactor(bw-report): parse user notes by POSITION, not by label
The four operator-supplied note fields in BW's Compliance Setup →
Notes tab (Project / Client / User Name / Seis Loc) have
USER-EDITABLE LABELS — an operator can rename them in BW's UI to
"Building:", "Site Address:", "Inspector:", or anything else, and
the ASCII export writes those literal labels verbatim. The
previous label-normalisation map approach (just added in commit
6a7e8c6) was fragile: it could only match label spellings we'd
enumerated in advance. An operator using "Site:" instead of
"Seis Loc:" would have their sensor location silently dropped.
What IS reliable: BW always writes the 4 user-notes lines
contiguously, in the same order, between the "Units :" line and
the "Geo Range :" line of the export. So parse them by POSITION:
position 1 → project
position 2 → client
position 3 → operator
position 4 → sensor_location
The original labels BW wrote are preserved in a new
`BwAsciiReport.user_note_labels` dict (canonical slot → literal
label string) so terra-view can render them as the operator named
them.
Removes the `_OPERATOR_LABEL_MAP` / `_normalise_label_for_lookup`
helpers and the elif-by-normalised-label branch in `parse_report`.
Replaces with a small state machine that flips on the "Units" line
and flips off on the "Geo Range" line.
Tests:
- Default-label fixtures (waveform + histogram) still populate
correctly, with operator's labels captured.
- Synthetic custom-labelled exports ("Building:" / "Site Address:" /
etc.) populate the right slots by position.
- Histogram-specific "Seis. Location:" works.
- Lines outside the Units→Geo Range range are ignored even if
they look like user notes (defensive against malformed exports).
- Partial blocks (fewer than 4 lines) leave later slots None.
- Extra lines beyond 4 are dropped (5th slot doesn't exist).
26 tests in test_bw_ascii_report.py (was 33; net drop reflects
parametrised label tests collapsed into 6 focused position tests).
Full SFM suite: 62 passed, 44 skipped.
Pairs with series3-watcher v1.5.0 which fixes the filename pairing
so the report reaches this parser in the first place.
This commit is contained in:
+120
-39
@@ -259,53 +259,135 @@ def test_parse_handles_micl_double_space_in_key():
|
||||
assert r.mic.zc_freq_hz == pytest.approx(51.0)
|
||||
|
||||
|
||||
# ── Operator-field label normalisation ──────────────────────────────────────
|
||||
# ── Position-based user-notes parsing ───────────────────────────────────────
|
||||
#
|
||||
# The 4 user-supplied note slots (Project / Client / User Name / Seis Loc
|
||||
# by default) have OPERATOR-EDITABLE labels in BW's Compliance Setup →
|
||||
# Notes tab. An operator could rename them to "Building:", "Site:",
|
||||
# "Address:", etc. and the ASCII export would write those labels
|
||||
# verbatim. We parse by POSITION between the `Units :` and `Geo Range :`
|
||||
# anchors, NOT by matching the label text.
|
||||
|
||||
|
||||
@pytest.mark.parametrize("label,expected_field,expected_value", [
|
||||
# project — both with-colon and bare
|
||||
("Project:", "project", "Test4-21-26"),
|
||||
("Project", "project", "Test4-21-26"),
|
||||
# client
|
||||
("Client:", "client", "Acme Inc"),
|
||||
("Client", "client", "Acme Inc"),
|
||||
# operator (User Name)
|
||||
("User Name:", "operator", "Brian"),
|
||||
("User Name", "operator", "Brian"),
|
||||
# sensor_location — every Seis*/Sensor Location variant we know about
|
||||
("Seis Loc:", "sensor_location", "Catbed"),
|
||||
("Seis Loc", "sensor_location", "Catbed"),
|
||||
("Seis. Location", "sensor_location", "Catbed"),
|
||||
("Seis Location", "sensor_location", "Catbed"),
|
||||
("Sensor Location", "sensor_location", "Catbed"),
|
||||
])
|
||||
def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value):
|
||||
"""All known label spellings of the operator-supplied fields route
|
||||
to the same dataclass attribute."""
|
||||
text = '"{} : {}"\n'.format(label, expected_value)
|
||||
def _wrap_user_notes(*lines: str) -> str:
|
||||
"""Helper: wrap N user-note lines in the minimal context the parser
|
||||
needs (`Units :` opens the block, `Geo Range :` closes it)."""
|
||||
body = ['"Units : in/s and dB(L)"']
|
||||
body.extend('"' + l + '"' for l in lines)
|
||||
body.append('"Geo Range : 10.000 in/s"')
|
||||
return "\n".join(body) + "\n"
|
||||
|
||||
|
||||
def test_user_notes_default_labels_populate_by_position():
|
||||
"""The BW-default labels (Project / Client / User Name / Seis Loc)
|
||||
populate the four canonical slots in order."""
|
||||
r = parse_report(_wrap_user_notes(
|
||||
"Project: : Test4-21-26",
|
||||
"Client: : Acme Inc",
|
||||
"User Name: : Brian",
|
||||
"Seis Loc: : Catbed",
|
||||
))
|
||||
assert r.project == "Test4-21-26"
|
||||
assert r.client == "Acme Inc"
|
||||
assert r.operator == "Brian"
|
||||
assert r.sensor_location == "Catbed"
|
||||
assert r.user_note_labels == {
|
||||
"project": "Project:",
|
||||
"client": "Client:",
|
||||
"operator": "User Name:",
|
||||
"sensor_location": "Seis Loc:",
|
||||
}
|
||||
|
||||
|
||||
def test_user_notes_operator_renamed_labels_still_populate():
|
||||
"""If the operator renames the labels in BW's UI (e.g. "Seis Loc:"
|
||||
→ "Building:"), the values STILL populate the canonical slots by
|
||||
position — and the operator's labels are preserved in
|
||||
`user_note_labels` for terra-view to display."""
|
||||
r = parse_report(_wrap_user_notes(
|
||||
"Building : Main Office",
|
||||
"Project Manager : Brian",
|
||||
"Inspector : Claude",
|
||||
"Site Address : 123 Main St",
|
||||
))
|
||||
assert r.project == "Main Office"
|
||||
assert r.client == "Brian"
|
||||
assert r.operator == "Claude"
|
||||
assert r.sensor_location == "123 Main St"
|
||||
assert r.user_note_labels == {
|
||||
"project": "Building",
|
||||
"client": "Project Manager",
|
||||
"operator": "Inspector",
|
||||
"sensor_location": "Site Address",
|
||||
}
|
||||
|
||||
|
||||
def test_user_notes_with_histogram_label_spelling():
|
||||
"""Histogram exports use 'Seis. Location:' (with period and colon)
|
||||
instead of 'Seis Loc:'. Position-based parsing handles both."""
|
||||
r = parse_report(_wrap_user_notes(
|
||||
"Project: : Plum Cont.- Rainbow Run",
|
||||
"Client: : Plum Contracting In.c",
|
||||
"User Name: : Terra-Mechanics Inc.",
|
||||
"Seis. Location: : Loc #1 - 2652 Hepner",
|
||||
))
|
||||
assert r.project == "Plum Cont.- Rainbow Run"
|
||||
assert r.client == "Plum Contracting In.c"
|
||||
assert r.operator == "Terra-Mechanics Inc."
|
||||
assert r.sensor_location == "Loc #1 - 2652 Hepner"
|
||||
# And the histogram's specific label spelling is preserved
|
||||
assert r.user_note_labels["sensor_location"] == "Seis. Location:"
|
||||
|
||||
|
||||
def test_user_notes_outside_block_are_ignored():
|
||||
"""Lines that look like user-notes but appear OUTSIDE the
|
||||
Units→Geo Range range don't get assigned to user-note slots."""
|
||||
# No Units anchor — these lines shouldn't populate user-note slots
|
||||
text = (
|
||||
'"Serial Number : BE11529"\n'
|
||||
'"Project: : SHOULD NOT POPULATE"\n'
|
||||
)
|
||||
r = parse_report(text)
|
||||
assert getattr(r, expected_field) == expected_value, (label, expected_field)
|
||||
assert r.serial == "BE11529"
|
||||
assert r.project is None
|
||||
|
||||
|
||||
def test_label_normalisation_is_case_insensitive():
|
||||
"""Lowercase / uppercase / mixed-case labels all hit the same slot."""
|
||||
for label in ("seis loc", "SEIS LOC", "SeIs LoC"):
|
||||
text = '"{} : Catbed"\n'.format(label)
|
||||
r = parse_report(text)
|
||||
assert r.sensor_location == "Catbed", label
|
||||
def test_user_notes_partial_block_only_fills_present_slots():
|
||||
"""If BW writes fewer than 4 user-notes (e.g. operator disabled
|
||||
Extended Notes mid-block), only the present positions populate;
|
||||
later slots stay None."""
|
||||
r = parse_report(_wrap_user_notes(
|
||||
"Project: : Just-a-project",
|
||||
"Client: : Just-a-client",
|
||||
))
|
||||
assert r.project == "Just-a-project"
|
||||
assert r.client == "Just-a-client"
|
||||
assert r.operator is None
|
||||
assert r.sensor_location is None
|
||||
|
||||
|
||||
def test_label_normalisation_collapses_extra_whitespace():
|
||||
"""Internal whitespace runs in labels are collapsed before lookup."""
|
||||
text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc"
|
||||
r = parse_report(text)
|
||||
assert r.sensor_location == "Catbed"
|
||||
def test_user_notes_extra_lines_beyond_four_are_dropped():
|
||||
"""If somehow more than 4 lines appear in the user-notes block
|
||||
(e.g. BW adds an Extended Notes line), only the first 4 are
|
||||
captured — slots 5+ have nowhere to go."""
|
||||
r = parse_report(_wrap_user_notes(
|
||||
"L1 : v1",
|
||||
"L2 : v2",
|
||||
"L3 : v3",
|
||||
"L4 : v4",
|
||||
"L5 : v5", # ignored — no fifth slot
|
||||
))
|
||||
assert r.project == "v1"
|
||||
assert r.client == "v2"
|
||||
assert r.operator == "v3"
|
||||
assert r.sensor_location == "v4"
|
||||
# 5th label not captured
|
||||
assert "L5" not in r.user_note_labels.values()
|
||||
|
||||
|
||||
def test_real_histogram_fixture_populates_sensor_location():
|
||||
"""End-to-end: the histogram fixture from example-events/histogram/
|
||||
uses 'Seis. Location' (with period) and must successfully populate
|
||||
sensor_location after the label-normalisation fix."""
|
||||
"""End-to-end: the histogram fixture uses 'Seis. Location:' — must
|
||||
successfully populate sensor_location via position-based parsing."""
|
||||
fixture_dir = (
|
||||
Path(__file__).parent.parent / "example-events" / "histogram"
|
||||
)
|
||||
@@ -316,10 +398,9 @@ def test_real_histogram_fixture_populates_sensor_location():
|
||||
pytest.skip("no histogram TXT in fixture dir")
|
||||
|
||||
r = parse_report_file(txt)
|
||||
# The histogram TXTs verified to use "Seis. Location" — should now
|
||||
# populate sensor_location instead of being silently dropped.
|
||||
assert r.sensor_location is not None
|
||||
assert len(r.sensor_location) > 0
|
||||
assert r.user_note_labels.get("sensor_location") is not None
|
||||
# Sanity: other shared fields still parse correctly
|
||||
assert r.serial is not None
|
||||
assert r.serial.startswith("BE")
|
||||
|
||||
Reference in New Issue
Block a user