seismo-relay/analysis/load_bundle.py

"""
load_bundle.py — extract body bytes from BW binary + parse sample columns from TXT.

Used by the codec reverse-engineering scripts in this directory.
"""
from __future__ import annotations

import os
import re
from dataclasses import dataclass


BUNDLE_ROOT = os.path.join(
    os.path.dirname(__file__), "..", "tests", "fixtures", "decode-re-5-8-26"
)


@dataclass
class Bundle:
    name: str
    bin_path: str
    txt_path: str
    bin: bytes
    body: bytes  # bytes between STRT (43) and footer (last 26)
    strt: bytes  # 21-byte STRT record
    samples: dict  # {"Tran": [...], "Vert": [...], "Long": [...], "MicL": [...]}
    sample_rate: int
    rectime_sec: float
    pretrig_sec: float
    geo_range_ips: float
    ppv: dict  # {"Tran": float, "Vert": float, "Long": float}
    mic_pspl: float
    serial: str


def _parse_txt(path: str) -> dict:
    with open(path, "r", encoding="utf-8", errors="replace") as f:
        text = f.read()

    meta = {}
    samples = {"Tran": [], "Vert": [], "Long": [], "MicL": []}

    # Find header line that starts the columns ("Tran   Vert   Long   MicL").
    # Then every line after is sample data (4 tab-separated floats).
    lines = text.splitlines()
    header_idx = None
    for i, line in enumerate(lines):
        if "Tran" in line and "Vert" in line and "Long" in line and "MicL" in line:
            # The columns header.  Sample lines start a few lines later.
            header_idx = i
            break
    if header_idx is None:
        raise ValueError(f"no Tran/Vert/Long/MicL header in {path}")

    # Parse meta — quoted lines with "Field : value"
    for line in lines[:header_idx]:
        m = re.match(r'^"([^"]+)\s*:\s*([^"]*)"', line.strip())
        if m:
            k, v = m.group(1).strip(), m.group(2).strip()
            meta[k] = v

    # Parse samples
    for line in lines[header_idx + 1 :]:
        line = line.strip()
        if not line:
            continue
        parts = re.split(r"\s+", line)
        if len(parts) < 4:
            continue
        try:
            t = float(parts[0])
            v = float(parts[1])
            l = float(parts[2])
            m = float(parts[3])
        except ValueError:
            continue
        samples["Tran"].append(t)
        samples["Vert"].append(v)
        samples["Long"].append(l)
        samples["MicL"].append(m)

    return meta, samples


def load_bundle(name: str) -> Bundle:
    folder = os.path.join(BUNDLE_ROOT, name)
    files = os.listdir(folder)
    bin_name = next(f for f in files if not f.endswith(".TXT"))
    txt_name = next(f for f in files if f.endswith(".TXT"))

    bin_path = os.path.join(folder, bin_name)
    txt_path = os.path.join(folder, txt_name)

    with open(bin_path, "rb") as f:
        binary = f.read()

    # Header is 22 bytes; STRT at [22:43]; footer at last 26 bytes.
    strt = binary[22:43]
    body = binary[43:-26]

    meta, samples = _parse_txt(txt_path)

    sample_rate = int(re.search(r"(\d+)", meta.get("Sample Rate", "1024")).group(1))
    rectime_sec = float(re.search(r"([\d.]+)", meta.get("Record Time", "3.0")).group(1))
    pretrig_sec = float(re.search(r"-?[\d.]+", meta.get("Pre-trigger Length", "0")).group(0))
    geo_range_ips = float(re.search(r"([\d.]+)", meta.get("Geo Range", "10.0")).group(1))
    serial = meta.get("Serial Number", "").strip()

    def _f(s):
        return float(re.search(r"-?[\d.]+", s).group(0))

    ppv = {
        "Tran": _f(meta.get("Tran PPV", "0")),
        "Vert": _f(meta.get("Vert PPV", "0")),
        "Long": _f(meta.get("Long PPV", "0")),
    }
    mic_pspl = _f(meta.get("MicL PSPL", "0"))

    return Bundle(
        name=name,
        bin_path=bin_path,
        txt_path=txt_path,
        bin=binary,
        body=body,
        strt=strt,
        samples=samples,
        sample_rate=sample_rate,
        rectime_sec=rectime_sec,
        pretrig_sec=pretrig_sec,
        geo_range_ips=geo_range_ips,
        ppv=ppv,
        mic_pspl=mic_pspl,
        serial=serial,
    )


if __name__ == "__main__":
    for name in ("event-a", "event-b", "event-c", "event-d"):
        b = load_bundle(name)
        n = len(b.samples["Tran"])
        print(f"{name}: body={len(b.body):>6}  N_samples={n}  rate={b.sample_rate}  "
              f"rectime={b.rectime_sec}  pretrig={b.pretrig_sec}  range={b.geo_range_ips}  "
              f"PPV(T,V,L)={b.ppv['Tran']:.3f},{b.ppv['Vert']:.3f},{b.ppv['Long']:.3f}  "
              f"MicL={b.mic_pspl}")