feat: s3_session.bin now works as index, allowing for AB comparing in same captures

2026-03-11 16:16:04 -04:00
parent 5eb5499034
commit 6d99f86502
2 changed files with 179 additions and 15 deletions
@@ -12,6 +12,7 @@ Usage:
 from __future__ import annotations

 import argparse
+import struct
 import sys
 import time
 from dataclasses import dataclass
@@ -294,6 +295,115 @@ def split_into_sessions(
    return sessions


+# ──────────────────────────────────────────────────────────────────────────────
+# Mark-based session splitting (using structured .bin log)
+# ──────────────────────────────────────────────────────────────────────────────
+
+# Structured .bin record types (from s3_bridge.py)
+_REC_BW   = 0x01
+_REC_S3   = 0x02
+_REC_MARK = 0x03
+_REC_INFO = 0x04
+
+
+@dataclass
+class MarkSplit:
+    """A session boundary derived from a MARK record in the structured .bin log."""
+    label: str
+    bw_byte_offset: int   # byte position in the flat raw_bw stream at mark time
+    s3_byte_offset: int   # byte position in the flat raw_s3 stream at mark time
+
+
+def parse_structured_bin(bin_blob: bytes) -> list[MarkSplit]:
+    """
+    Read a structured s3_session_*.bin file and return one MarkSplit per MARK
+    record, containing the cumulative BW and S3 byte counts at that point.
+
+    Record format: [type:1][ts_us:8 LE][len:4 LE][payload:len]
+    """
+    marks: list[MarkSplit] = []
+    bw_bytes = 0
+    s3_bytes = 0
+    pos = 0
+
+    while pos + 13 <= len(bin_blob):
+        rec_type = bin_blob[pos]
+        # ts_us: 8 bytes LE (we don't need it, just skip)
+        length = struct.unpack_from("<I", bin_blob, pos + 9)[0]
+        payload_start = pos + 13
+        payload_end   = payload_start + length
+
+        if payload_end > len(bin_blob):
+            break  # truncated record
+
+        payload = bin_blob[payload_start:payload_end]
+
+        if rec_type == _REC_BW:
+            bw_bytes += length
+        elif rec_type == _REC_S3:
+            s3_bytes += length
+        elif rec_type == _REC_MARK:
+            label = payload.decode("utf-8", errors="replace")
+            marks.append(MarkSplit(label=label,
+                                   bw_byte_offset=bw_bytes,
+                                   s3_byte_offset=s3_bytes))
+
+        pos = payload_end
+
+    return marks
+
+
+def split_sessions_at_marks(
+    bw_blob:  bytes,
+    s3_blob:  bytes,
+    marks:    list[MarkSplit],
+) -> list[Session]:
+    """
+    Split raw byte streams into sessions using mark byte offsets, then apply
+    the standard 0x74-based sub-splitting within each mark segment.
+
+    Each mark creates a new session boundary: session 0 = bytes before mark 0,
+    session 1 = bytes between mark 0 and mark 1, etc.
+    """
+    if not marks:
+        # No marks — fall back to standard session detection
+        bw_frames = annotate_frames(parse_bw(bw_blob, trailer_len=0,
+                                             validate_checksum=True), "BW")
+        s3_frames = annotate_frames(parse_s3(s3_blob, trailer_len=0), "S3")
+        return split_into_sessions(bw_frames, s3_frames)
+
+    # Build slice boundaries: [0 .. mark0.bw, mark0.bw .. mark1.bw, ...]
+    bw_cuts = [m.bw_byte_offset for m in marks] + [len(bw_blob)]
+    s3_cuts = [m.s3_byte_offset for m in marks] + [len(s3_blob)]
+
+    all_sessions: list[Session] = []
+    session_offset = 0
+    bw_prev = s3_prev = 0
+
+    for seg_i, (bw_end, s3_end) in enumerate(zip(bw_cuts, s3_cuts)):
+        bw_chunk = bw_blob[bw_prev:bw_end]
+        s3_chunk = s3_blob[s3_prev:s3_end]
+
+        bw_frames = annotate_frames(parse_bw(bw_chunk, trailer_len=0,
+                                             validate_checksum=True), "BW")
+        s3_frames = annotate_frames(parse_s3(s3_chunk, trailer_len=0), "S3")
+
+        seg_sessions = split_into_sessions(bw_frames, s3_frames)
+
+        # Re-index sessions so they are globally unique
+        for sess in seg_sessions:
+            sess.index = session_offset
+            for f in sess.all_frames:
+                f.session_idx = session_offset
+            session_offset += 1
+            all_sessions.append(sess)
+
+        bw_prev = bw_end
+        s3_prev = s3_end
+
+    return all_sessions
+
+
 # ──────────────────────────────────────────────────────────────────────────────
 # Diff engine
 # ──────────────────────────────────────────────────────────────────────────────