feat: implement raw ADC waveform decoding and download functionality

- Added `_decode_a5_waveform()` to parse SUB 5A frames into per-channel time-series data. - Introduced `download_waveform(event)` method in `MiniMateClient` to fetch full waveform data. - Updated `Event` model to include new fields: `total_samples`, `pretrig_samples`, `rectime_seconds`, and `_waveform_key`. - Enhanced documentation in `CHANGELOG.md` and `instantel_protocol_reference.md` to reflect new features and confirmed protocol details.
2026-04-03 13:53:09 -04:00
parent 5d0f0855f2
commit 790e442a7a
5 changed files with 671 additions and 9 deletions
@@ -212,6 +212,7 @@ class MiniMateClient:
        while key4 != b"\x00\x00\x00\x00":
            log.info("get_events: record %d  key=%s", idx, key4.hex())
            ev = Event(index=idx)
+            ev._waveform_key = key4   # stored so download_waveform() can re-use it

            # First event: call 0A to verify it's a full record (0x30 length).
            # Subsequent keys come from 1F(0xFE) which guarantees full records,
@@ -280,6 +281,66 @@ class MiniMateClient:
        log.info("get_events: downloaded %d event(s)", len(events))
        return events

+    def download_waveform(self, event: Event) -> None:
+        """
+        Download the full raw ADC waveform for a previously-retrieved event
+        and populate event.raw_samples, event.total_samples,
+        event.pretrig_samples, and event.rectime_seconds.
+
+        This performs a complete SUB 5A (BULK_WAVEFORM_STREAM) download with
+        stop_after_metadata=False, fetching all waveform frames (typically 9
+        large A5 frames for a standard blast record).  The download is large
+        (up to several hundred KB for a 9-second, 4-channel, 1024-Hz record)
+        and is intentionally not performed by get_events() by default.
+
+        Args:
+            event:  An Event object returned by get_events().  Must have a
+                    waveform key embedded; the key is reconstructed from the
+                    event's timestamp and index via the 1E/1F protocol.
+
+        Raises:
+            ValueError:    if the event does not have a waveform key available.
+            RuntimeError:  if the client is not connected.
+            ProtocolError: on communication failure.
+
+        Confirmed format (4-2-26 blast capture, ✅):
+            4-channel interleaved signed 16-bit LE, 8 bytes per sample-set.
+            Total samples: 9306 (≈9.1 s at 1024 Hz), pretrig: 298 (≈0.29 s).
+            Channel order: Tran, Vert, Long, Mic  (Blastware convention).
+        """
+        proto = self._require_proto()
+
+        if event._waveform_key is None:
+            raise ValueError(
+                f"Event#{event.index} has no waveform key — "
+                "was it retrieved via get_events()?"
+            )
+
+        log.info(
+            "download_waveform: starting full 5A download for event#%d (key=%s)",
+            event.index, event._waveform_key.hex(),
+        )
+
+        a5_frames = proto.read_bulk_waveform_stream(
+            event._waveform_key, stop_after_metadata=False
+        )
+
+        log.info(
+            "download_waveform: received %d A5 frames; decoding waveform",
+            len(a5_frames),
+        )
+
+        _decode_a5_waveform(a5_frames, event)
+
+        if event.raw_samples is not None:
+            n = len(event.raw_samples.get("Tran", []))
+            log.info(
+                "download_waveform: decoded %d sample-sets across 4 channels",
+                n,
+            )
+        else:
+            log.warning("download_waveform: waveform decode produced no samples")
+
    # ── Internal helpers ──────────────────────────────────────────────────────

    def _require_proto(self) -> MiniMateProtocol:
@@ -543,6 +604,203 @@ def _decode_a5_metadata_into(frames_data: list[bytes], event: Event) -> None:
    )


+def _decode_a5_waveform(
+    frames_data: list[bytes],
+    event: Event,
+) -> None:
+    """
+    Decode the raw 4-channel ADC waveform from a complete set of SUB 5A
+    (BULK_WAVEFORM_STREAM) frame payloads and populate event.raw_samples,
+    event.total_samples, event.pretrig_samples, and event.rectime_seconds.
+
+    This requires ALL A5 frames (stop_after_metadata=False), not just the
+    metadata-bearing subset.
+
+    ── Waveform format (confirmed from 4-2-26 blast capture) ───────────────────
+    The blast waveform is 4-channel interleaved signed 16-bit little-endian,
+    8 bytes per sample-set:
+
+        [T_lo T_hi V_lo V_hi L_lo L_hi M_lo M_hi] × N
+
+    where T=Tran, V=Vert, L=Long, M=Mic.  Channel ordering follows the
+    Blastware convention [Tran, Vert, Long, Mic] = [ch0, ch1, ch2, ch3].
+
+    ⚠️  Channel ordering is a confirmed CONVENTION — the physical ordering on
+        the ADC mux is not independently verifiable from the saturating blast
+        captures we have.  The convention is consistent with Blastware labeling
+        (Tran is always the first channel field in the A5 STRT+waveform stream).
+
+    ── Frame structure ──────────────────────────────────────────────────────────
+    A5[0] (probe response):
+        db[7:]  = [11-byte header] [21-byte STRT record] [6-byte preamble] [waveform ...]
+        STRT:     b'STRT' at offset 11, total 21 bytes
+                  +8  uint16 BE: total_samples (expected full-record sample-sets)
+                  +16 uint16 BE: pretrig_samples (pre-trigger sample count)
+                  +18 uint8:     rectime_seconds (record duration)
+        Preamble: 6 bytes after the STRT record (confirmed from 4-2-26 blast capture):
+                  bytes 21-22: 0x00 0x00  (null padding)
+                  bytes 23-26: 0xFF × 4   (sync sentinel / alignment marker)
+        Waveform starts at strt_pos + 27 within db[7:].
+
+    A5[1..N] (chunk responses):
+        db[7:]  = [8-byte per-frame header] [waveform bytes ...]
+        Header:   [ctr LE uint16, 0x00 × 6] — frame sequence counter
+        Waveform starts at byte 8 of db[7:].
+
+    ── Cross-frame alignment ────────────────────────────────────────────────────
+    Frame waveform chunk sizes are NOT multiples of 8.  Naive concatenation
+    scrambles channel assignments at frame boundaries.  Fix: track the
+    cumulative global byte offset; at each new frame, the starting alignment
+    within the T,V,L,M cycle is (global_offset % 8).
+
+    Confirmed sizes from 4-2-26 (A5[0..8], skipping A5[7] metadata frame
+    and A5[9] terminator):
+        Frame 0: 934B  Frame 1: 963B  Frame 2: 946B  Frame 3: 960B
+        Frame 4: 952B  Frame 5: 946B  Frame 6: 941B  Frame 8: 992B
+    — none are multiples of 8.
+
+    ── Modifies event in-place. ─────────────────────────────────────────────────
+    """
+    if not frames_data:
+        log.debug("_decode_a5_waveform: no frames provided")
+        return
+
+    # ── Parse STRT record from A5[0] ────────────────────────────────────────
+    w0 = frames_data[0][7:]   # db[7:] for A5[0]
+    strt_pos = w0.find(b"STRT")
+    if strt_pos < 0:
+        log.warning("_decode_a5_waveform: STRT record not found in A5[0]")
+        return
+
+    # STRT record layout (21 bytes, offsets relative to b'STRT'):
+    #   +0..3   magic  b'STRT'
+    #   +8..9   uint16 BE  total_samples  (full-record expected sample-set count)
+    #   +16..17 uint16 BE  pretrig_samples
+    #   +18     uint8      rectime_seconds
+    strt = w0[strt_pos : strt_pos + 21]
+    if len(strt) < 21:
+        log.warning("_decode_a5_waveform: STRT record truncated (%dB)", len(strt))
+        return
+
+    total_samples   = struct.unpack_from(">H", strt, 8)[0]
+    pretrig_samples = struct.unpack_from(">H", strt, 16)[0]
+    rectime_seconds = strt[18]
+
+    event.total_samples   = total_samples
+    event.pretrig_samples = pretrig_samples
+    event.rectime_seconds = rectime_seconds
+
+    log.debug(
+        "_decode_a5_waveform: STRT total_samples=%d pretrig=%d rectime=%ds",
+        total_samples, pretrig_samples, rectime_seconds,
+    )
+
+    # ── Collect per-frame waveform bytes with global offset tracking ─────────
+    # global_offset is the cumulative byte count across all frames, used to
+    # compute the channel alignment at each frame boundary.
+    chunks: list[tuple[int, bytes]] = []   # (frame_idx, waveform_bytes)
+    global_offset = 0
+
+    for fi, db in enumerate(frames_data):
+        w = db[7:]
+
+        # A5[0]: waveform begins after the 21-byte STRT record and 6-byte preamble.
+        # Layout: STRT(21B) + null-pad(2B) + 0xFF sentinel(4B) = 27 bytes total.
+        if fi == 0:
+            sp = w.find(b"STRT")
+            if sp < 0:
+                continue
+            wave = w[sp + 27 :]
+
+        # Frame 7 carries event-time metadata strings ("Project:", "Client:", …)
+        # and no waveform ADC data.
+        elif fi == 7:
+            continue
+
+        # A5[9] is the device terminator frame (page_key=0x0000), also no data.
+        elif fi == 9:
+            continue
+
+        else:
+            # Strip the 8-byte per-frame header (ctr + 6 zero bytes)
+            if len(w) < 8:
+                continue
+            wave = w[8:]
+
+        if len(wave) < 2:
+            continue
+
+        chunks.append((fi, wave))
+        global_offset += len(wave)
+
+    total_bytes = global_offset
+    n_sets = total_bytes // 8
+    log.debug(
+        "_decode_a5_waveform: %d chunks, %dB total → %d complete sample-sets "
+        "(%d of %d expected; %.0f%%)",
+        len(chunks), total_bytes, n_sets, n_sets, total_samples,
+        100.0 * n_sets / total_samples if total_samples else 0,
+    )
+
+    if n_sets == 0:
+        log.warning("_decode_a5_waveform: no complete sample-sets found")
+        return
+
+    # ── Concatenate into one stream and decode ───────────────────────────────
+    # Rather than concatenating and then fixing up, we reconstruct the correct
+    # channel-aligned stream by skipping misaligned partial sample-sets at each
+    # frame start.
+    #
+    # At global byte offset G, the byte position within the T,V,L,M cycle is
+    # G % 8.  When a frame starts with align = G % 8 ≠ 0, the first
+    # (8 - align) bytes of that frame complete a partial sample-set that
+    # cannot be decoded cleanly, so we skip them and start from the next full
+    # T-boundary.
+    #
+    # This produces a slightly smaller decoded set but preserves correct
+    # channel alignment throughout.
+
+    tran: list[int] = []
+    vert: list[int] = []
+    long_: list[int] = []
+    mic: list[int] = []
+
+    running_offset = 0
+    for fi, wave in chunks:
+        align = running_offset % 8           # byte position within T,V,L,M cycle
+        skip  = (8 - align) % 8             # bytes to discard to reach next T start
+        if skip > 0 and skip < len(wave):
+            usable = wave[skip:]
+        elif align == 0:
+            usable = wave
+        else:
+            running_offset += len(wave)
+            continue                          # entire frame is a partial sample-set
+
+        n_usable = len(usable) // 8
+        for i in range(n_usable):
+            off = i * 8
+            tran.append( struct.unpack_from("<h", usable, off)[0])
+            vert.append( struct.unpack_from("<h", usable, off + 2)[0])
+            long_.append(struct.unpack_from("<h", usable, off + 4)[0])
+            mic.append(  struct.unpack_from("<h", usable, off + 6)[0])
+
+        running_offset += len(wave)
+
+    log.debug(
+        "_decode_a5_waveform: decoded %d alignment-corrected sample-sets "
+        "(skipped %d due to frame boundary misalignment)",
+        len(tran), n_sets - len(tran),
+    )
+
+    event.raw_samples = {
+        "Tran": tran,
+        "Vert": vert,
+        "Long": long_,
+        "Mic":  mic,
+    }
+
+
 def _extract_record_type(data: bytes) -> Optional[str]:
    """
    Decode the recording mode from byte[1] of the 210-byte waveform record.
@@ -327,12 +327,19 @@ class Event:
    # Raw ADC samples keyed by channel label.  Not fetched unless explicitly
    # requested (large data transfer — up to several MB per event).
    raw_samples: Optional[dict] = None      # {"Tran": [...], "Vert": [...], ...}
+    total_samples: Optional[int] = None     # from STRT record: expected total sample-sets
+    pretrig_samples: Optional[int] = None   # from STRT record: pre-trigger sample count
+    rectime_seconds: Optional[int] = None   # from STRT record: record duration (seconds)

    # ── Debug / introspection ─────────────────────────────────────────────────
    # Raw 210-byte waveform record bytes, set when debug mode is active.
    # Exposed by the SFM server via ?debug=true so field layouts can be verified.
    _raw_record: Optional[bytes] = field(default=None, repr=False)

+    # 4-byte waveform key used to request this event via SUB 5A.
+    # Set by get_events(); required by download_waveform().
+    _waveform_key: Optional[bytes] = field(default=None, repr=False)
+
    def __str__(self) -> str:
        ts = str(self.timestamp) if self.timestamp else "no timestamp"
        ppv = ""