feat(import): v0.16.0 - Fully implemented series 3 BW-ACH pipeline stablized. #19
@@ -265,6 +265,61 @@ def _parse_monitor_ts(s: str) -> Optional[datetime.datetime]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Operator-field label normalisation ──────────────────────────────────────
|
||||||
|
#
|
||||||
|
# BW has used different label spellings across versions and recording
|
||||||
|
# modes for the same operator-supplied fields:
|
||||||
|
#
|
||||||
|
# project: "Project:" / "Project"
|
||||||
|
# client: "Client:" / "Client"
|
||||||
|
# operator: "User Name:" / "User Name"
|
||||||
|
# sensor_location: "Seis Loc:" / "Seis. Location" / "Seis Location"
|
||||||
|
# / "Sensor Location"
|
||||||
|
#
|
||||||
|
# Per user feedback ("the tags themselves dont matter a ton, what
|
||||||
|
# matters is the field"), we normalise labels at lookup time so the
|
||||||
|
# value-extraction works regardless of which spelling BW happens to
|
||||||
|
# emit on a given machine.
|
||||||
|
#
|
||||||
|
# To add a new variant: edit `_OPERATOR_LABEL_MAP` — single source of
|
||||||
|
# truth. Keys are normalised forms (lowercase, trailing colon and
|
||||||
|
# period stripped, internal whitespace collapsed); values are
|
||||||
|
# attribute names on `BwAsciiReport`.
|
||||||
|
|
||||||
|
_OPERATOR_LABEL_MAP = {
|
||||||
|
# project
|
||||||
|
"project": "project",
|
||||||
|
# client
|
||||||
|
"client": "client",
|
||||||
|
# operator
|
||||||
|
"user name": "operator",
|
||||||
|
# sensor location — most variants of "Seis*" + "Sensor Location"
|
||||||
|
"seis loc": "sensor_location",
|
||||||
|
"seis. loc": "sensor_location",
|
||||||
|
"seis. location": "sensor_location",
|
||||||
|
"seis location": "sensor_location",
|
||||||
|
"sensor location": "sensor_location",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_label_for_lookup(key: str) -> str:
|
||||||
|
"""Normalise a label for the operator-field lookup.
|
||||||
|
|
||||||
|
Strips a trailing colon and/or period, collapses internal
|
||||||
|
whitespace runs, and lowercases. So all of:
|
||||||
|
|
||||||
|
"Seis Loc:"
|
||||||
|
"Seis. Location"
|
||||||
|
"seis location"
|
||||||
|
"Sensor Location"
|
||||||
|
|
||||||
|
map to canonical forms in `_OPERATOR_LABEL_MAP`.
|
||||||
|
"""
|
||||||
|
s = key.strip().rstrip(":").rstrip(".").strip()
|
||||||
|
s = _KEY_NORMALISE_RE.sub(" ", s)
|
||||||
|
return s.lower()
|
||||||
|
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
# Top-level parser
|
# Top-level parser
|
||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
@@ -346,12 +401,14 @@ def parse_report(text: Union[str, bytes], *, parse_samples: bool = False) -> BwA
|
|||||||
report.calibration_date, report.calibration_by = _parse_calibration(value)
|
report.calibration_date, report.calibration_by = _parse_calibration(value)
|
||||||
elif key == "Units": report.units = value
|
elif key == "Units": report.units = value
|
||||||
|
|
||||||
# Project labels in BW carry their own trailing colon — after
|
# Operator-supplied labels (Project / Client / User Name /
|
||||||
# _normalise_key we just strip it for matching.
|
# Seis Loc) — BW writes these with assorted spellings across
|
||||||
elif key.rstrip(":") == "Project": report.project = value
|
# firmware versions and recording modes (e.g. "Seis Loc:" on
|
||||||
elif key.rstrip(":") == "Client": report.client = value
|
# waveform exports vs "Seis. Location" on histogram exports).
|
||||||
elif key.rstrip(":") == "User Name":report.operator = value
|
# The label normaliser absorbs all known variants; see
|
||||||
elif key.rstrip(":") == "Seis Loc": report.sensor_location = value
|
# `_OPERATOR_LABEL_MAP` above for the dispatch table.
|
||||||
|
elif (slot := _OPERATOR_LABEL_MAP.get(_normalise_label_for_lookup(key))):
|
||||||
|
setattr(report, slot, value)
|
||||||
|
|
||||||
elif key == "Geo Range": report.geo_range_ips = _parse_number(value)
|
elif key == "Geo Range": report.geo_range_ips = _parse_number(value)
|
||||||
|
|
||||||
|
|||||||
@@ -257,3 +257,70 @@ def test_parse_handles_micl_double_space_in_key():
|
|||||||
r = parse_report(text)
|
r = parse_report(text)
|
||||||
assert r.mic.time_of_peak_s == pytest.approx(0.012)
|
assert r.mic.time_of_peak_s == pytest.approx(0.012)
|
||||||
assert r.mic.zc_freq_hz == pytest.approx(51.0)
|
assert r.mic.zc_freq_hz == pytest.approx(51.0)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Operator-field label normalisation ──────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("label,expected_field,expected_value", [
|
||||||
|
# project — both with-colon and bare
|
||||||
|
("Project:", "project", "Test4-21-26"),
|
||||||
|
("Project", "project", "Test4-21-26"),
|
||||||
|
# client
|
||||||
|
("Client:", "client", "Acme Inc"),
|
||||||
|
("Client", "client", "Acme Inc"),
|
||||||
|
# operator (User Name)
|
||||||
|
("User Name:", "operator", "Brian"),
|
||||||
|
("User Name", "operator", "Brian"),
|
||||||
|
# sensor_location — every Seis*/Sensor Location variant we know about
|
||||||
|
("Seis Loc:", "sensor_location", "Catbed"),
|
||||||
|
("Seis Loc", "sensor_location", "Catbed"),
|
||||||
|
("Seis. Location", "sensor_location", "Catbed"),
|
||||||
|
("Seis Location", "sensor_location", "Catbed"),
|
||||||
|
("Sensor Location", "sensor_location", "Catbed"),
|
||||||
|
])
|
||||||
|
def test_operator_label_variants_route_to_correct_field(label, expected_field, expected_value):
|
||||||
|
"""All known label spellings of the operator-supplied fields route
|
||||||
|
to the same dataclass attribute."""
|
||||||
|
text = '"{} : {}"\n'.format(label, expected_value)
|
||||||
|
r = parse_report(text)
|
||||||
|
assert getattr(r, expected_field) == expected_value, (label, expected_field)
|
||||||
|
|
||||||
|
|
||||||
|
def test_label_normalisation_is_case_insensitive():
|
||||||
|
"""Lowercase / uppercase / mixed-case labels all hit the same slot."""
|
||||||
|
for label in ("seis loc", "SEIS LOC", "SeIs LoC"):
|
||||||
|
text = '"{} : Catbed"\n'.format(label)
|
||||||
|
r = parse_report(text)
|
||||||
|
assert r.sensor_location == "Catbed", label
|
||||||
|
|
||||||
|
|
||||||
|
def test_label_normalisation_collapses_extra_whitespace():
|
||||||
|
"""Internal whitespace runs in labels are collapsed before lookup."""
|
||||||
|
text = '"Seis Loc : Catbed"\n' # two spaces between "Seis" and "Loc"
|
||||||
|
r = parse_report(text)
|
||||||
|
assert r.sensor_location == "Catbed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_real_histogram_fixture_populates_sensor_location():
|
||||||
|
"""End-to-end: the histogram fixture from example-events/histogram/
|
||||||
|
uses 'Seis. Location' (with period) and must successfully populate
|
||||||
|
sensor_location after the label-normalisation fix."""
|
||||||
|
fixture_dir = (
|
||||||
|
Path(__file__).parent.parent / "example-events" / "histogram"
|
||||||
|
)
|
||||||
|
if not fixture_dir.exists():
|
||||||
|
pytest.skip("histogram fixtures not present")
|
||||||
|
txt = next(fixture_dir.glob("*_ASCII.TXT"), None)
|
||||||
|
if txt is None:
|
||||||
|
pytest.skip("no histogram TXT in fixture dir")
|
||||||
|
|
||||||
|
r = parse_report_file(txt)
|
||||||
|
# The histogram TXTs verified to use "Seis. Location" — should now
|
||||||
|
# populate sensor_location instead of being silently dropped.
|
||||||
|
assert r.sensor_location is not None
|
||||||
|
assert len(r.sensor_location) > 0
|
||||||
|
# Sanity: other shared fields still parse correctly
|
||||||
|
assert r.serial is not None
|
||||||
|
assert r.serial.startswith("BE")
|
||||||
|
assert r.geo_range_ips is not None
|
||||||
|
|||||||
Reference in New Issue
Block a user