refactor(bw-report): parse user notes by POSITION, not by label

The four operator-supplied note fields in BW's Compliance Setup →
Notes tab (Project / Client / User Name / Seis Loc) have
USER-EDITABLE LABELS — an operator can rename them in BW's UI to
"Building:", "Site Address:", "Inspector:", or anything else, and
the ASCII export writes those literal labels verbatim.  The
previous label-normalisation map approach (just added in commit
6a7e8c6) was fragile: it could only match label spellings we'd
enumerated in advance.  An operator using "Site:" instead of
"Seis Loc:" would have their sensor location silently dropped.

What IS reliable: BW always writes the 4 user-notes lines
contiguously, in the same order, between the "Units :" line and
the "Geo Range :" line of the export.  So parse them by POSITION:

  position 1 → project
  position 2 → client
  position 3 → operator
  position 4 → sensor_location

The original labels BW wrote are preserved in a new
`BwAsciiReport.user_note_labels` dict (canonical slot → literal
label string) so terra-view can render them as the operator named
them.

Removes the `_OPERATOR_LABEL_MAP` / `_normalise_label_for_lookup`
helpers and the elif-by-normalised-label branch in `parse_report`.
Replaces with a small state machine that flips on the "Units" line
and flips off on the "Geo Range" line.

Tests:
  - Default-label fixtures (waveform + histogram) still populate
    correctly, with operator's labels captured.
  - Synthetic custom-labelled exports ("Building:" / "Site Address:" /
    etc.) populate the right slots by position.
  - Histogram-specific "Seis. Location:" works.
  - Lines outside the Units→Geo Range range are ignored even if
    they look like user notes (defensive against malformed exports).
  - Partial blocks (fewer than 4 lines) leave later slots None.
  - Extra lines beyond 4 are dropped (5th slot doesn't exist).

26 tests in test_bw_ascii_report.py (was 33; net drop reflects
parametrised label tests collapsed into 6 focused position tests).
Full SFM suite: 62 passed, 44 skipped.

Pairs with series3-watcher v1.5.0 which fixes the filename pairing
so the report reaches this parser in the first place.
This commit is contained in:
2026-05-10 22:28:31 +00:00
parent 6a7e8c6e86
commit a032fa5451
2 changed files with 181 additions and 103 deletions
+61 -64
View File
@@ -115,10 +115,24 @@ class BwAsciiReport:
units: Optional[str] = None # e.g. "in/s and dB(L)" units: Optional[str] = None # e.g. "in/s and dB(L)"
# ── Operator-supplied metadata ────────────────────────────────────────── # ── Operator-supplied metadata ──────────────────────────────────────────
project: Optional[str] = None # Parsed by POSITION from the 4-line "User Notes" block BW writes
client: Optional[str] = None # between the `Units :` and `Geo Range :` lines. Position-based so
operator: Optional[str] = None # User Name # the values populate correctly even when an operator renames the
sensor_location: Optional[str] = None # Seis Loc # labels in Blastware's Compliance Setup → Notes tab (the 4 labels
# are user-editable, e.g. "Seis Loc:" → "Building:" → "Site Address:").
# The original labels BW wrote are preserved in `user_note_labels`
# so terra-view can render them as the operator named them.
project: Optional[str] = None # position 1 (BW default label "Project:")
client: Optional[str] = None # position 2 (BW default label "Client:")
operator: Optional[str] = None # position 3 (BW default label "User Name:")
sensor_location: Optional[str] = None # position 4 (BW default label "Seis Loc:")
# Maps canonical slot name → the literal label BW wrote in the ASCII
# export. Empty if the User Notes block wasn't present. Example
# when the operator renamed slot 4 to "Building:":
# {"project": "Project:", "client": "Client:",
# "operator": "User Name:", "sensor_location": "Building:"}
user_note_labels: Dict[str, str] = field(default_factory=dict)
# ── Geo channel scaling ───────────────────────────────────────────────── # ── Geo channel scaling ─────────────────────────────────────────────────
geo_range_ips: Optional[float] = None # 10.000 / 1.250 geo_range_ips: Optional[float] = None # 10.000 / 1.250
@@ -265,59 +279,23 @@ def _parse_monitor_ts(s: str) -> Optional[datetime.datetime]:
return None return None
# ── Operator-field label normalisation ────────────────────────────────────── # ── User-notes positional slot map ──────────────────────────────────────────
# #
# BW has used different label spellings across versions and recording # Blastware's Compliance Setup → Notes tab shows four operator-supplied
# modes for the same operator-supplied fields: # fields whose LABELS the operator can rename (see screenshot in
# project archive). Defaults are "Project:" / "Client:" /
# "User Name:" / "Seis Loc:", but an operator using a different
# convention can rename them to anything ("Building:", "Site:",
# "Address:", etc.). The ASCII export reflects whatever the operator
# typed, so label-based matching is fragile.
# #
# project: "Project:" / "Project" # What IS reliable: BW always writes the 4 user-notes lines in the
# client: "Client:" / "Client" # same order, contiguously between the `Units :` line and the
# operator: "User Name:" / "User Name" # `Geo Range :` line. We parse them by POSITION and preserve the
# sensor_location: "Seis Loc:" / "Seis. Location" / "Seis Location" # operator's labels in `report.user_note_labels` so terra-view can
# / "Sensor Location" # render them as the operator intended.
#
# Per user feedback ("the tags themselves dont matter a ton, what
# matters is the field"), we normalise labels at lookup time so the
# value-extraction works regardless of which spelling BW happens to
# emit on a given machine.
#
# To add a new variant: edit `_OPERATOR_LABEL_MAP` — single source of
# truth. Keys are normalised forms (lowercase, trailing colon and
# period stripped, internal whitespace collapsed); values are
# attribute names on `BwAsciiReport`.
_OPERATOR_LABEL_MAP = { _USER_NOTE_SLOTS = ("project", "client", "operator", "sensor_location")
# project
"project": "project",
# client
"client": "client",
# operator
"user name": "operator",
# sensor location — most variants of "Seis*" + "Sensor Location"
"seis loc": "sensor_location",
"seis. loc": "sensor_location",
"seis. location": "sensor_location",
"seis location": "sensor_location",
"sensor location": "sensor_location",
}
def _normalise_label_for_lookup(key: str) -> str:
"""Normalise a label for the operator-field lookup.
Strips a trailing colon and/or period, collapses internal
whitespace runs, and lowercases. So all of:
"Seis Loc:"
"Seis. Location"
"seis location"
"Sensor Location"
map to canonical forms in `_OPERATOR_LABEL_MAP`.
"""
s = key.strip().rstrip(":").rstrip(".").strip()
s = _KEY_NORMALISE_RE.sub(" ", s)
return s.lower()
# ───────────────────────────────────────────────────────────────────────────── # ─────────────────────────────────────────────────────────────────────────────
@@ -349,6 +327,15 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
event_time_str: Optional[str] = None event_time_str: Optional[str] = None
event_date: Optional[datetime.date] = None event_date: Optional[datetime.date] = None
# User-notes block detection. We enter the block after parsing
# the "Units :" line and exit on the "Geo Range :" line. Inside,
# the first 4 unmatched `<label> : <value>` lines are assigned to
# the 4 canonical operator-supplied slots by POSITION (project,
# client, operator, sensor_location) regardless of what the
# operator named the labels in BW's Compliance Setup → Notes tab.
in_user_notes_block = False
user_note_position = 0
while i < n: while i < n:
raw_line = lines[i] raw_line = lines[i]
i += 1 i += 1
@@ -399,18 +386,28 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
elif key == "Battery Level": report.battery_volts = _parse_number(value) elif key == "Battery Level": report.battery_volts = _parse_number(value)
elif key == "Calibration": elif key == "Calibration":
report.calibration_date, report.calibration_by = _parse_calibration(value) report.calibration_date, report.calibration_by = _parse_calibration(value)
elif key == "Units": report.units = value elif key == "Units":
report.units = value
# Entering the user-notes block. Next ~4 lines until
# "Geo Range :" are the operator-supplied notes.
in_user_notes_block = True
user_note_position = 0
# Operator-supplied labels (Project / Client / User Name / elif key == "Geo Range":
# Seis Loc) — BW writes these with assorted spellings across # Exiting the user-notes block.
# firmware versions and recording modes (e.g. "Seis Loc:" on in_user_notes_block = False
# waveform exports vs "Seis. Location" on histogram exports). report.geo_range_ips = _parse_number(value)
# The label normaliser absorbs all known variants; see
# `_OPERATOR_LABEL_MAP` above for the dispatch table. # User-notes block: assign by position (operator may have
elif (slot := _OPERATOR_LABEL_MAP.get(_normalise_label_for_lookup(key))): # renamed the labels, so we don't trust them). Preserve the
# original labels in `user_note_labels` for downstream UIs
# (terra-view) that want to display them as the operator
# named them.
elif in_user_notes_block and user_note_position < len(_USER_NOTE_SLOTS):
slot = _USER_NOTE_SLOTS[user_note_position]
setattr(report, slot, value) setattr(report, slot, value)
report.user_note_labels[slot] = key
elif key == "Geo Range": report.geo_range_ips = _parse_number(value) user_note_position += 1
# ── Per-channel stats ──────────────────────────────────────────────── # ── Per-channel stats ────────────────────────────────────────────────
# All match the pattern "{Channel} <stat-name>" # All match the pattern "{Channel} <stat-name>"
+120 -39
View File
@@ -259,53 +259,135 @@ def test_parse_handles_micl_double_space_in_key():
assert r.mic.zc_freq_hz == pytest.approx(51.0) assert r.mic.zc_freq_hz == pytest.approx(51.0)
# ── Operator-field label normalisation ────────────────────────────────────── # ── Position-based user-notes parsing ───────────────────────────────────────
#
# The 4 user-supplied note slots (Project / Client / User Name / Seis Loc
# by default) have OPERATOR-EDITABLE labels in BW's Compliance Setup →
# Notes tab. An operator could rename them to "Building:", "Site:",
# "Address:", etc. and the ASCII export would write those labels
# verbatim. We parse by POSITION between the `Units :` and `Geo Range :`
# anchors, NOT by matching the label text.
@pytest.mark.parametrize("label,expected_field,expected_value", [ def _wrap_user_notes(*lines: str) -> str:
# project — both with-colon and bare """Helper: wrap N user-note lines in the minimal context the parser
("Project:", "project", "Test4-21-26"), needs (`Units :` opens the block, `Geo Range :` closes it)."""
("Project", "project", "Test4-21-26"), body = ['"Units : in/s and dB(L)"']
# client body.extend('"' + l + '"' for l in lines)
("Client:", "client", "Acme Inc"), body.append('"Geo Range : 10.000 in/s"')
("Client", "client", "Acme Inc"), return "\n".join(body) + "\n"
# operator (User Name)
("User Name:", "operator", "Brian"),
("User Name", "operator", "Brian"), def test_user_notes_default_labels_populate_by_position():
# sensor_location — every Seis*/Sensor Location variant we know about """The BW-default labels (Project / Client / User Name / Seis Loc)
("Seis Loc:", "sensor_location", "Catbed"), populate the four canonical slots in order."""
("Seis Loc", "sensor_location", "Catbed"), r = parse_report(_wrap_user_notes(
("Seis. Location", "sensor_location", "Catbed"), "Project: : Test4-21-26",
("Seis Location", "sensor_location", "Catbed"), "Client: : Acme Inc",
("Sensor Location", "sensor_location", "Catbed"), "User Name: : Brian",
]) "Seis Loc: : Catbed",
def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value): ))
"""All known label spellings of the operator-supplied fields route assert r.project == "Test4-21-26"
to the same dataclass attribute.""" assert r.client == "Acme Inc"
text = '"{} : {}"\n'.format(label, expected_value) assert r.operator == "Brian"
assert r.sensor_location == "Catbed"
assert r.user_note_labels == {
"project": "Project:",
"client": "Client:",
"operator": "User Name:",
"sensor_location": "Seis Loc:",
}
def test_user_notes_operator_renamed_labels_still_populate():
"""If the operator renames the labels in BW's UI (e.g. "Seis Loc:"
"Building:"), the values STILL populate the canonical slots by
position — and the operator's labels are preserved in
`user_note_labels` for terra-view to display."""
r = parse_report(_wrap_user_notes(
"Building : Main Office",
"Project Manager : Brian",
"Inspector : Claude",
"Site Address : 123 Main St",
))
assert r.project == "Main Office"
assert r.client == "Brian"
assert r.operator == "Claude"
assert r.sensor_location == "123 Main St"
assert r.user_note_labels == {
"project": "Building",
"client": "Project Manager",
"operator": "Inspector",
"sensor_location": "Site Address",
}
def test_user_notes_with_histogram_label_spelling():
"""Histogram exports use 'Seis. Location:' (with period and colon)
instead of 'Seis Loc:'. Position-based parsing handles both."""
r = parse_report(_wrap_user_notes(
"Project: : Plum Cont.- Rainbow Run",
"Client: : Plum Contracting In.c",
"User Name: : Terra-Mechanics Inc.",
"Seis. Location: : Loc #1 - 2652 Hepner",
))
assert r.project == "Plum Cont.- Rainbow Run"
assert r.client == "Plum Contracting In.c"
assert r.operator == "Terra-Mechanics Inc."
assert r.sensor_location == "Loc #1 - 2652 Hepner"
# And the histogram's specific label spelling is preserved
assert r.user_note_labels["sensor_location"] == "Seis. Location:"
def test_user_notes_outside_block_are_ignored():
"""Lines that look like user-notes but appear OUTSIDE the
Units→Geo Range range don't get assigned to user-note slots."""
# No Units anchor — these lines shouldn't populate user-note slots
text = (
'"Serial Number : BE11529"\n'
'"Project: : SHOULD NOT POPULATE"\n'
)
r = parse_report(text) r = parse_report(text)
assert getattr(r, expected_field) == expected_value, (label, expected_field) assert r.serial == "BE11529"
assert r.project is None
def test_label_normalisation_is_case_insensitive(): def test_user_notes_partial_block_only_fills_present_slots():
"""Lowercase / uppercase / mixed-case labels all hit the same slot.""" """If BW writes fewer than 4 user-notes (e.g. operator disabled
for label in ("seis loc", "SEIS LOC", "SeIs LoC"): Extended Notes mid-block), only the present positions populate;
text = '"{} : Catbed"\n'.format(label) later slots stay None."""
r = parse_report(text) r = parse_report(_wrap_user_notes(
assert r.sensor_location == "Catbed", label "Project: : Just-a-project",
"Client: : Just-a-client",
))
assert r.project == "Just-a-project"
assert r.client == "Just-a-client"
assert r.operator is None
assert r.sensor_location is None
def test_label_normalisation_collapses_extra_whitespace(): def test_user_notes_extra_lines_beyond_four_are_dropped():
"""Internal whitespace runs in labels are collapsed before lookup.""" """If somehow more than 4 lines appear in the user-notes block
text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc" (e.g. BW adds an Extended Notes line), only the first 4 are
r = parse_report(text) captured — slots 5+ have nowhere to go."""
assert r.sensor_location == "Catbed" r = parse_report(_wrap_user_notes(
"L1 : v1",
"L2 : v2",
"L3 : v3",
"L4 : v4",
"L5 : v5", # ignored — no fifth slot
))
assert r.project == "v1"
assert r.client == "v2"
assert r.operator == "v3"
assert r.sensor_location == "v4"
# 5th label not captured
assert "L5" not in r.user_note_labels.values()
def test_real_histogram_fixture_populates_sensor_location(): def test_real_histogram_fixture_populates_sensor_location():
"""End-to-end: the histogram fixture from example-events/histogram/ """End-to-end: the histogram fixture uses 'Seis. Location:' — must
uses 'Seis. Location' (with period) and must successfully populate successfully populate sensor_location via position-based parsing."""
sensor_location after the label-normalisation fix."""
fixture_dir = ( fixture_dir = (
Path(__file__).parent.parent / "example-events" / "histogram" Path(__file__).parent.parent / "example-events" / "histogram"
) )
@@ -316,10 +398,9 @@ def test_real_histogram_fixture_populates_sensor_location():
pytest.skip("no histogram TXT in fixture dir") pytest.skip("no histogram TXT in fixture dir")
r = parse_report_file(txt) r = parse_report_file(txt)
# The histogram TXTs verified to use "Seis. Location" — should now
# populate sensor_location instead of being silently dropped.
assert r.sensor_location is not None assert r.sensor_location is not None
assert len(r.sensor_location) > 0 assert len(r.sensor_location) > 0
assert r.user_note_labels.get("sensor_location") is not None
# Sanity: other shared fields still parse correctly # Sanity: other shared fields still parse correctly
assert r.serial is not None assert r.serial is not None
assert r.serial.startswith("BE") assert r.serial.startswith("BE")