feat(import): v0.16.0 - Fully implemented series 3 BW-ACH pipeline stablized. #19

Merged
serversdown merged 9 commits from ach-report-ingestion into main 2026-05-11 15:55:24 -04:00
2 changed files with 181 additions and 103 deletions
Showing only changes of commit a032fa5451 - Show all commits
+61 -64
View File
@@ -115,10 +115,24 @@ class BwAsciiReport:
units: Optional[str] = None # e.g. "in/s and dB(L)"
# ── Operator-supplied metadata ──────────────────────────────────────────
project: Optional[str] = None
client: Optional[str] = None
operator: Optional[str] = None # User Name
sensor_location: Optional[str] = None # Seis Loc
# Parsed by POSITION from the 4-line "User Notes" block BW writes
# between the `Units :` and `Geo Range :` lines. Position-based so
# the values populate correctly even when an operator renames the
# labels in Blastware's Compliance Setup → Notes tab (the 4 labels
# are user-editable, e.g. "Seis Loc:" → "Building:" → "Site Address:").
# The original labels BW wrote are preserved in `user_note_labels`
# so terra-view can render them as the operator named them.
project: Optional[str] = None # position 1 (BW default label "Project:")
client: Optional[str] = None # position 2 (BW default label "Client:")
operator: Optional[str] = None # position 3 (BW default label "User Name:")
sensor_location: Optional[str] = None # position 4 (BW default label "Seis Loc:")
# Maps canonical slot name → the literal label BW wrote in the ASCII
# export. Empty if the User Notes block wasn't present. Example
# when the operator renamed slot 4 to "Building:":
# {"project": "Project:", "client": "Client:",
# "operator": "User Name:", "sensor_location": "Building:"}
user_note_labels: Dict[str, str] = field(default_factory=dict)
# ── Geo channel scaling ─────────────────────────────────────────────────
geo_range_ips: Optional[float] = None # 10.000 / 1.250
@@ -265,59 +279,23 @@ def _parse_monitor_ts(s: str) -> Optional[datetime.datetime]:
return None
# ── Operator-field label normalisation ──────────────────────────────────────
# ── User-notes positional slot map ──────────────────────────────────────────
#
# BW has used different label spellings across versions and recording
# modes for the same operator-supplied fields:
# Blastware's Compliance Setup → Notes tab shows four operator-supplied
# fields whose LABELS the operator can rename (see screenshot in
# project archive). Defaults are "Project:" / "Client:" /
# "User Name:" / "Seis Loc:", but an operator using a different
# convention can rename them to anything ("Building:", "Site:",
# "Address:", etc.). The ASCII export reflects whatever the operator
# typed, so label-based matching is fragile.
#
# project: "Project:" / "Project"
# client: "Client:" / "Client"
# operator: "User Name:" / "User Name"
# sensor_location: "Seis Loc:" / "Seis. Location" / "Seis Location"
# / "Sensor Location"
#
# Per user feedback ("the tags themselves dont matter a ton, what
# matters is the field"), we normalise labels at lookup time so the
# value-extraction works regardless of which spelling BW happens to
# emit on a given machine.
#
# To add a new variant: edit `_OPERATOR_LABEL_MAP` — single source of
# truth. Keys are normalised forms (lowercase, trailing colon and
# period stripped, internal whitespace collapsed); values are
# attribute names on `BwAsciiReport`.
# What IS reliable: BW always writes the 4 user-notes lines in the
# same order, contiguously between the `Units :` line and the
# `Geo Range :` line. We parse them by POSITION and preserve the
# operator's labels in `report.user_note_labels` so terra-view can
# render them as the operator intended.
_OPERATOR_LABEL_MAP = {
# project
"project": "project",
# client
"client": "client",
# operator
"user name": "operator",
# sensor location — most variants of "Seis*" + "Sensor Location"
"seis loc": "sensor_location",
"seis. loc": "sensor_location",
"seis. location": "sensor_location",
"seis location": "sensor_location",
"sensor location": "sensor_location",
}
def _normalise_label_for_lookup(key: str) -> str:
"""Normalise a label for the operator-field lookup.
Strips a trailing colon and/or period, collapses internal
whitespace runs, and lowercases. So all of:
"Seis Loc:"
"Seis. Location"
"seis location"
"Sensor Location"
map to canonical forms in `_OPERATOR_LABEL_MAP`.
"""
s = key.strip().rstrip(":").rstrip(".").strip()
s = _KEY_NORMALISE_RE.sub(" ", s)
return s.lower()
_USER_NOTE_SLOTS = ("project", "client", "operator", "sensor_location")
# ─────────────────────────────────────────────────────────────────────────────
@@ -349,6 +327,15 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
event_time_str: Optional[str] = None
event_date: Optional[datetime.date] = None
# User-notes block detection. We enter the block after parsing
# the "Units :" line and exit on the "Geo Range :" line. Inside,
# the first 4 unmatched `<label> : <value>` lines are assigned to
# the 4 canonical operator-supplied slots by POSITION (project,
# client, operator, sensor_location) regardless of what the
# operator named the labels in BW's Compliance Setup → Notes tab.
in_user_notes_block = False
user_note_position = 0
while i < n:
raw_line = lines[i]
i += 1
@@ -399,18 +386,28 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
elif key == "Battery Level": report.battery_volts = _parse_number(value)
elif key == "Calibration":
report.calibration_date, report.calibration_by = _parse_calibration(value)
elif key == "Units": report.units = value
elif key == "Units":
report.units = value
# Entering the user-notes block. Next ~4 lines until
# "Geo Range :" are the operator-supplied notes.
in_user_notes_block = True
user_note_position = 0
# Operator-supplied labels (Project / Client / User Name /
# Seis Loc) — BW writes these with assorted spellings across
# firmware versions and recording modes (e.g. "Seis Loc:" on
# waveform exports vs "Seis. Location" on histogram exports).
# The label normaliser absorbs all known variants; see
# `_OPERATOR_LABEL_MAP` above for the dispatch table.
elif (slot := _OPERATOR_LABEL_MAP.get(_normalise_label_for_lookup(key))):
elif key == "Geo Range":
# Exiting the user-notes block.
in_user_notes_block = False
report.geo_range_ips = _parse_number(value)
# User-notes block: assign by position (operator may have
# renamed the labels, so we don't trust them). Preserve the
# original labels in `user_note_labels` for downstream UIs
# (terra-view) that want to display them as the operator
# named them.
elif in_user_notes_block and user_note_position < len(_USER_NOTE_SLOTS):
slot = _USER_NOTE_SLOTS[user_note_position]
setattr(report, slot, value)
elif key == "Geo Range": report.geo_range_ips = _parse_number(value)
report.user_note_labels[slot] = key
user_note_position += 1
# ── Per-channel stats ────────────────────────────────────────────────
# All match the pattern "{Channel} <stat-name>"
+122 -41
View File
@@ -259,53 +259,135 @@ def test_parse_handles_micl_double_space_in_key():
assert r.mic.zc_freq_hz == pytest.approx(51.0)
# ── Operator-field label normalisation ──────────────────────────────────────
# ── Position-based user-notes parsing ───────────────────────────────────────
#
# The 4 user-supplied note slots (Project / Client / User Name / Seis Loc
# by default) have OPERATOR-EDITABLE labels in BW's Compliance Setup →
# Notes tab. An operator could rename them to "Building:", "Site:",
# "Address:", etc. and the ASCII export would write those labels
# verbatim. We parse by POSITION between the `Units :` and `Geo Range :`
# anchors, NOT by matching the label text.
@pytest.mark.parametrize("label,expected_field,expected_value", [
# project — both with-colon and bare
("Project:", "project", "Test4-21-26"),
("Project", "project", "Test4-21-26"),
# client
("Client:", "client", "Acme Inc"),
("Client", "client", "Acme Inc"),
# operator (User Name)
("User Name:", "operator", "Brian"),
("User Name", "operator", "Brian"),
# sensor_location — every Seis*/Sensor Location variant we know about
("Seis Loc:", "sensor_location", "Catbed"),
("Seis Loc", "sensor_location", "Catbed"),
("Seis. Location", "sensor_location", "Catbed"),
("Seis Location", "sensor_location", "Catbed"),
("Sensor Location", "sensor_location", "Catbed"),
])
def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value):
"""All known label spellings of the operator-supplied fields route
to the same dataclass attribute."""
text = '"{} : {}"\n'.format(label, expected_value)
r = parse_report(text)
assert getattr(r, expected_field) == expected_value, (label, expected_field)
def _wrap_user_notes(*lines: str) -> str:
"""Helper: wrap N user-note lines in the minimal context the parser
needs (`Units :` opens the block, `Geo Range :` closes it)."""
body = ['"Units : in/s and dB(L)"']
body.extend('"' + l + '"' for l in lines)
body.append('"Geo Range : 10.000 in/s"')
return "\n".join(body) + "\n"
def test_label_normalisation_is_case_insensitive():
"""Lowercase / uppercase / mixed-case labels all hit the same slot."""
for label in ("seis loc", "SEIS LOC", "SeIs LoC"):
text = '"{} : Catbed"\n'.format(label)
r = parse_report(text)
assert r.sensor_location == "Catbed", label
def test_label_normalisation_collapses_extra_whitespace():
"""Internal whitespace runs in labels are collapsed before lookup."""
text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc"
r = parse_report(text)
def test_user_notes_default_labels_populate_by_position():
"""The BW-default labels (Project / Client / User Name / Seis Loc)
populate the four canonical slots in order."""
r = parse_report(_wrap_user_notes(
"Project: : Test4-21-26",
"Client: : Acme Inc",
"User Name: : Brian",
"Seis Loc: : Catbed",
))
assert r.project == "Test4-21-26"
assert r.client == "Acme Inc"
assert r.operator == "Brian"
assert r.sensor_location == "Catbed"
assert r.user_note_labels == {
"project": "Project:",
"client": "Client:",
"operator": "User Name:",
"sensor_location": "Seis Loc:",
}
def test_user_notes_operator_renamed_labels_still_populate():
"""If the operator renames the labels in BW's UI (e.g. "Seis Loc:"
"Building:"), the values STILL populate the canonical slots by
position — and the operator's labels are preserved in
`user_note_labels` for terra-view to display."""
r = parse_report(_wrap_user_notes(
"Building : Main Office",
"Project Manager : Brian",
"Inspector : Claude",
"Site Address : 123 Main St",
))
assert r.project == "Main Office"
assert r.client == "Brian"
assert r.operator == "Claude"
assert r.sensor_location == "123 Main St"
assert r.user_note_labels == {
"project": "Building",
"client": "Project Manager",
"operator": "Inspector",
"sensor_location": "Site Address",
}
def test_user_notes_with_histogram_label_spelling():
"""Histogram exports use 'Seis. Location:' (with period and colon)
instead of 'Seis Loc:'. Position-based parsing handles both."""
r = parse_report(_wrap_user_notes(
"Project: : Plum Cont.- Rainbow Run",
"Client: : Plum Contracting In.c",
"User Name: : Terra-Mechanics Inc.",
"Seis. Location: : Loc #1 - 2652 Hepner",
))
assert r.project == "Plum Cont.- Rainbow Run"
assert r.client == "Plum Contracting In.c"
assert r.operator == "Terra-Mechanics Inc."
assert r.sensor_location == "Loc #1 - 2652 Hepner"
# And the histogram's specific label spelling is preserved
assert r.user_note_labels["sensor_location"] == "Seis. Location:"
def test_user_notes_outside_block_are_ignored():
"""Lines that look like user-notes but appear OUTSIDE the
Units→Geo Range range don't get assigned to user-note slots."""
# No Units anchor — these lines shouldn't populate user-note slots
text = (
'"Serial Number : BE11529"\n'
'"Project: : SHOULD NOT POPULATE"\n'
)
r = parse_report(text)
assert r.serial == "BE11529"
assert r.project is None
def test_user_notes_partial_block_only_fills_present_slots():
"""If BW writes fewer than 4 user-notes (e.g. operator disabled
Extended Notes mid-block), only the present positions populate;
later slots stay None."""
r = parse_report(_wrap_user_notes(
"Project: : Just-a-project",
"Client: : Just-a-client",
))
assert r.project == "Just-a-project"
assert r.client == "Just-a-client"
assert r.operator is None
assert r.sensor_location is None
def test_user_notes_extra_lines_beyond_four_are_dropped():
"""If somehow more than 4 lines appear in the user-notes block
(e.g. BW adds an Extended Notes line), only the first 4 are
captured — slots 5+ have nowhere to go."""
r = parse_report(_wrap_user_notes(
"L1 : v1",
"L2 : v2",
"L3 : v3",
"L4 : v4",
"L5 : v5", # ignored — no fifth slot
))
assert r.project == "v1"
assert r.client == "v2"
assert r.operator == "v3"
assert r.sensor_location == "v4"
# 5th label not captured
assert "L5" not in r.user_note_labels.values()
def test_real_histogram_fixture_populates_sensor_location():
"""End-to-end: the histogram fixture from example-events/histogram/
uses 'Seis. Location' (with period) and must successfully populate
sensor_location after the label-normalisation fix."""
"""End-to-end: the histogram fixture uses 'Seis. Location:' — must
successfully populate sensor_location via position-based parsing."""
fixture_dir = (
Path(__file__).parent.parent / "example-events" / "histogram"
)
@@ -316,10 +398,9 @@ def test_real_histogram_fixture_populates_sensor_location():
pytest.skip("no histogram TXT in fixture dir")
r = parse_report_file(txt)
# The histogram TXTs verified to use "Seis. Location" — should now
# populate sensor_location instead of being silently dropped.
assert r.sensor_location is not None
assert len(r.sensor_location) > 0
assert r.user_note_labels.get("sensor_location") is not None
# Sanity: other shared fields still parse correctly
assert r.serial is not None
assert r.serial.startswith("BE")