feat: s3_session.bin now works as index, allowing for AB comparing in same captures

This commit is contained in:
serversdwn
2026-03-11 16:16:04 -04:00
parent 5eb5499034
commit 6d99f86502
2 changed files with 179 additions and 15 deletions

View File

@@ -12,6 +12,7 @@ Usage:
from __future__ import annotations
import argparse
import struct
import sys
import time
from dataclasses import dataclass
@@ -294,6 +295,115 @@ def split_into_sessions(
return sessions
# ──────────────────────────────────────────────────────────────────────────────
# Mark-based session splitting (using structured .bin log)
# ──────────────────────────────────────────────────────────────────────────────
# Structured .bin record types (from s3_bridge.py)
_REC_BW = 0x01
_REC_S3 = 0x02
_REC_MARK = 0x03
_REC_INFO = 0x04
@dataclass
class MarkSplit:
"""A session boundary derived from a MARK record in the structured .bin log."""
label: str
bw_byte_offset: int # byte position in the flat raw_bw stream at mark time
s3_byte_offset: int # byte position in the flat raw_s3 stream at mark time
def parse_structured_bin(bin_blob: bytes) -> list[MarkSplit]:
"""
Read a structured s3_session_*.bin file and return one MarkSplit per MARK
record, containing the cumulative BW and S3 byte counts at that point.
Record format: [type:1][ts_us:8 LE][len:4 LE][payload:len]
"""
marks: list[MarkSplit] = []
bw_bytes = 0
s3_bytes = 0
pos = 0
while pos + 13 <= len(bin_blob):
rec_type = bin_blob[pos]
# ts_us: 8 bytes LE (we don't need it, just skip)
length = struct.unpack_from("<I", bin_blob, pos + 9)[0]
payload_start = pos + 13
payload_end = payload_start + length
if payload_end > len(bin_blob):
break # truncated record
payload = bin_blob[payload_start:payload_end]
if rec_type == _REC_BW:
bw_bytes += length
elif rec_type == _REC_S3:
s3_bytes += length
elif rec_type == _REC_MARK:
label = payload.decode("utf-8", errors="replace")
marks.append(MarkSplit(label=label,
bw_byte_offset=bw_bytes,
s3_byte_offset=s3_bytes))
pos = payload_end
return marks
def split_sessions_at_marks(
bw_blob: bytes,
s3_blob: bytes,
marks: list[MarkSplit],
) -> list[Session]:
"""
Split raw byte streams into sessions using mark byte offsets, then apply
the standard 0x74-based sub-splitting within each mark segment.
Each mark creates a new session boundary: session 0 = bytes before mark 0,
session 1 = bytes between mark 0 and mark 1, etc.
"""
if not marks:
# No marks — fall back to standard session detection
bw_frames = annotate_frames(parse_bw(bw_blob, trailer_len=0,
validate_checksum=True), "BW")
s3_frames = annotate_frames(parse_s3(s3_blob, trailer_len=0), "S3")
return split_into_sessions(bw_frames, s3_frames)
# Build slice boundaries: [0 .. mark0.bw, mark0.bw .. mark1.bw, ...]
bw_cuts = [m.bw_byte_offset for m in marks] + [len(bw_blob)]
s3_cuts = [m.s3_byte_offset for m in marks] + [len(s3_blob)]
all_sessions: list[Session] = []
session_offset = 0
bw_prev = s3_prev = 0
for seg_i, (bw_end, s3_end) in enumerate(zip(bw_cuts, s3_cuts)):
bw_chunk = bw_blob[bw_prev:bw_end]
s3_chunk = s3_blob[s3_prev:s3_end]
bw_frames = annotate_frames(parse_bw(bw_chunk, trailer_len=0,
validate_checksum=True), "BW")
s3_frames = annotate_frames(parse_s3(s3_chunk, trailer_len=0), "S3")
seg_sessions = split_into_sessions(bw_frames, s3_frames)
# Re-index sessions so they are globally unique
for sess in seg_sessions:
sess.index = session_offset
for f in sess.all_frames:
f.session_idx = session_offset
session_offset += 1
all_sessions.append(sess)
bw_prev = bw_end
s3_prev = s3_end
return all_sessions
# ──────────────────────────────────────────────────────────────────────────────
# Diff engine
# ──────────────────────────────────────────────────────────────────────────────