diff --git a/parsers/s3_analyzer.py b/parsers/s3_analyzer.py index fb8b264..ee396dc 100644 --- a/parsers/s3_analyzer.py +++ b/parsers/s3_analyzer.py @@ -12,6 +12,7 @@ Usage: from __future__ import annotations import argparse +import struct import sys import time from dataclasses import dataclass @@ -294,6 +295,115 @@ def split_into_sessions( return sessions +# ────────────────────────────────────────────────────────────────────────────── +# Mark-based session splitting (using structured .bin log) +# ────────────────────────────────────────────────────────────────────────────── + +# Structured .bin record types (from s3_bridge.py) +_REC_BW = 0x01 +_REC_S3 = 0x02 +_REC_MARK = 0x03 +_REC_INFO = 0x04 + + +@dataclass +class MarkSplit: + """A session boundary derived from a MARK record in the structured .bin log.""" + label: str + bw_byte_offset: int # byte position in the flat raw_bw stream at mark time + s3_byte_offset: int # byte position in the flat raw_s3 stream at mark time + + +def parse_structured_bin(bin_blob: bytes) -> list[MarkSplit]: + """ + Read a structured s3_session_*.bin file and return one MarkSplit per MARK + record, containing the cumulative BW and S3 byte counts at that point. + + Record format: [type:1][ts_us:8 LE][len:4 LE][payload:len] + """ + marks: list[MarkSplit] = [] + bw_bytes = 0 + s3_bytes = 0 + pos = 0 + + while pos + 13 <= len(bin_blob): + rec_type = bin_blob[pos] + # ts_us: 8 bytes LE (we don't need it, just skip) + length = struct.unpack_from(" len(bin_blob): + break # truncated record + + payload = bin_blob[payload_start:payload_end] + + if rec_type == _REC_BW: + bw_bytes += length + elif rec_type == _REC_S3: + s3_bytes += length + elif rec_type == _REC_MARK: + label = payload.decode("utf-8", errors="replace") + marks.append(MarkSplit(label=label, + bw_byte_offset=bw_bytes, + s3_byte_offset=s3_bytes)) + + pos = payload_end + + return marks + + +def split_sessions_at_marks( + bw_blob: bytes, + s3_blob: bytes, + marks: list[MarkSplit], +) -> list[Session]: + """ + Split raw byte streams into sessions using mark byte offsets, then apply + the standard 0x74-based sub-splitting within each mark segment. + + Each mark creates a new session boundary: session 0 = bytes before mark 0, + session 1 = bytes between mark 0 and mark 1, etc. + """ + if not marks: + # No marks — fall back to standard session detection + bw_frames = annotate_frames(parse_bw(bw_blob, trailer_len=0, + validate_checksum=True), "BW") + s3_frames = annotate_frames(parse_s3(s3_blob, trailer_len=0), "S3") + return split_into_sessions(bw_frames, s3_frames) + + # Build slice boundaries: [0 .. mark0.bw, mark0.bw .. mark1.bw, ...] + bw_cuts = [m.bw_byte_offset for m in marks] + [len(bw_blob)] + s3_cuts = [m.s3_byte_offset for m in marks] + [len(s3_blob)] + + all_sessions: list[Session] = [] + session_offset = 0 + bw_prev = s3_prev = 0 + + for seg_i, (bw_end, s3_end) in enumerate(zip(bw_cuts, s3_cuts)): + bw_chunk = bw_blob[bw_prev:bw_end] + s3_chunk = s3_blob[s3_prev:s3_end] + + bw_frames = annotate_frames(parse_bw(bw_chunk, trailer_len=0, + validate_checksum=True), "BW") + s3_frames = annotate_frames(parse_s3(s3_chunk, trailer_len=0), "S3") + + seg_sessions = split_into_sessions(bw_frames, s3_frames) + + # Re-index sessions so they are globally unique + for sess in seg_sessions: + sess.index = session_offset + for f in sess.all_frames: + f.session_idx = session_offset + session_offset += 1 + all_sessions.append(sess) + + bw_prev = bw_end + s3_prev = s3_end + + return all_sessions + + # ────────────────────────────────────────────────────────────────────────────── # Diff engine # ────────────────────────────────────────────────────────────────────────────── diff --git a/seismo_lab.py b/seismo_lab.py index e73167f..984d6d6 100644 --- a/seismo_lab.py +++ b/seismo_lab.py @@ -42,8 +42,10 @@ from s3_analyzer import ( # noqa: E402 format_hex_dump, parse_bw, parse_s3, + parse_structured_bin, render_session_report, split_into_sessions, + split_sessions_at_marks, write_claude_export, ) from frame_db import FrameDB # noqa: E402 @@ -97,7 +99,7 @@ class BridgePanel(tk.Frame): def __init__(self, parent: tk.Widget, on_bridge_started, on_bridge_stopped, **kw): super().__init__(parent, bg=BG2, **kw) - self._on_started = on_bridge_started + self._on_started = on_bridge_started # signature: (raw_bw, raw_s3, struct_bin) self._on_stopped = on_bridge_stopped self.process: Optional[subprocess.Popen] = None self._stdout_q: queue.Queue[str] = queue.Queue() @@ -224,6 +226,9 @@ class BridgePanel(tk.Frame): raw_s3_path = os.path.join(logdir, f"raw_s3_{ts}.bin") args += ["--raw-s3", raw_s3_path] + # Structured bin path — written by bridge automatically, named by ts + struct_bin_path = os.path.join(logdir, f"s3_session_{ts}.bin") + try: self.process = subprocess.Popen( args, @@ -245,7 +250,7 @@ class BridgePanel(tk.Frame): self._append_log(f"== Bridge started [{ts}] ==\n") # Notify parent so Analyzer can wire up live mode - self._on_started(raw_bw_path, raw_s3_path) + self._on_started(raw_bw_path, raw_s3_path, struct_bin_path) def stop_bridge(self) -> None: if self.process and self.process.poll() is None: @@ -318,12 +323,15 @@ class AnalyzerPanel(tk.Frame): # ── external API (called by parent when bridge starts/stops) ────────── - def set_live_files(self, raw_bw: Optional[str], raw_s3: Optional[str]) -> None: + def set_live_files(self, raw_bw: Optional[str], raw_s3: Optional[str], + struct_bin: Optional[str] = None) -> None: """Called when the bridge starts — inject file paths and start live mode.""" if raw_s3: self.s3_var.set(raw_s3) if raw_bw: self.bw_var.set(raw_bw) + if struct_bin: + self.bin_var.set(struct_bin) if raw_s3 and raw_bw: self._start_live() @@ -345,22 +353,44 @@ class AnalyzerPanel(tk.Frame): bar.pack(side=tk.TOP, fill=tk.X) pad = {"padx": 5, "pady": 2} - tk.Label(bar, text="S3 raw:", bg=BG2, fg=FG, font=MONO).pack(side=tk.LEFT, **pad) + # Row 1: raw files + row1 = tk.Frame(bar, bg=BG2) + row1.pack(side=tk.TOP, fill=tk.X) + + tk.Label(row1, text="S3 raw:", bg=BG2, fg=FG, font=MONO).pack(side=tk.LEFT, **pad) self.s3_var = tk.StringVar() - tk.Entry(bar, textvariable=self.s3_var, width=30, bg=BG3, fg=FG, + tk.Entry(row1, textvariable=self.s3_var, width=30, bg=BG3, fg=FG, insertbackground=FG, relief="flat", font=MONO).pack(side=tk.LEFT, **pad) - tk.Button(bar, text="Browse", bg=BG3, fg=FG, relief="flat", cursor="hand2", + tk.Button(row1, text="Browse", bg=BG3, fg=FG, relief="flat", cursor="hand2", font=MONO, command=lambda: self._browse(self.s3_var, "raw_s3.bin") ).pack(side=tk.LEFT, **pad) - tk.Label(bar, text=" BW raw:", bg=BG2, fg=FG, font=MONO).pack(side=tk.LEFT, **pad) + tk.Label(row1, text=" BW raw:", bg=BG2, fg=FG, font=MONO).pack(side=tk.LEFT, **pad) self.bw_var = tk.StringVar() - tk.Entry(bar, textvariable=self.bw_var, width=30, bg=BG3, fg=FG, + tk.Entry(row1, textvariable=self.bw_var, width=30, bg=BG3, fg=FG, insertbackground=FG, relief="flat", font=MONO).pack(side=tk.LEFT, **pad) - tk.Button(bar, text="Browse", bg=BG3, fg=FG, relief="flat", cursor="hand2", + tk.Button(row1, text="Browse", bg=BG3, fg=FG, relief="flat", cursor="hand2", font=MONO, command=lambda: self._browse(self.bw_var, "raw_bw.bin") ).pack(side=tk.LEFT, **pad) + # Row 2: structured bin (optional — enables mark-based session splitting) + row2 = tk.Frame(bar, bg=BG2) + row2.pack(side=tk.TOP, fill=tk.X) + + tk.Label(row2, text="Session .bin:", bg=BG2, fg=FG, font=MONO).pack(side=tk.LEFT, **pad) + self.bin_var = tk.StringVar() + tk.Entry(row2, textvariable=self.bin_var, width=46, bg=BG3, fg=FG, + insertbackground=FG, relief="flat", font=MONO).pack(side=tk.LEFT, **pad) + tk.Button(row2, text="Browse", bg=BG3, fg=FG, relief="flat", cursor="hand2", + font=MONO, command=lambda: self._browse(self.bin_var, "s3_session.bin") + ).pack(side=tk.LEFT, **pad) + tk.Label(row2, text="(optional — splits sessions at marks)", bg=BG2, fg=FG_DIM, + font=MONO_SM).pack(side=tk.LEFT, padx=6) + + # Row 3: buttons + bar = tk.Frame(bar, bg=BG2) + bar.pack(side=tk.TOP, fill=tk.X) + tk.Frame(bar, bg=BG2, width=10).pack(side=tk.LEFT) self.analyze_btn = tk.Button(bar, text="Analyze", bg=ACCENT, fg="#ffffff", relief="flat", padx=10, cursor="hand2", font=MONO_B, @@ -598,14 +628,37 @@ class AnalyzerPanel(tk.Frame): self.state.bw_path = bwp self._do_analyze(s3p, bwp) + def _browse_bin(self) -> None: + path = filedialog.askopenfilename( + title="Select session .bin file", + filetypes=[("Binary", "*.bin"), ("All files", "*.*")], + ) + if path: + self.bin_var.set(path) + def _do_analyze(self, s3_path: Path, bw_path: Path) -> None: self.status_var.set("Parsing...") self.update_idletasks() - s3_frames = annotate_frames(parse_s3(s3_path.read_bytes(), trailer_len=0), "S3") - bw_frames = annotate_frames(parse_bw(bw_path.read_bytes(), trailer_len=0, - validate_checksum=True), "BW") - sessions = split_into_sessions(bw_frames, s3_frames) + s3_blob = s3_path.read_bytes() + bw_blob = bw_path.read_bytes() + + # Use mark-based session splitting if a structured .bin is provided + bin_str = self.bin_var.get().strip() + bin_path = Path(bin_str) if bin_str else None + marks = [] + if bin_path and bin_path.exists(): + marks = parse_structured_bin(bin_path.read_bytes()) + + if marks: + sessions = split_sessions_at_marks(bw_blob, s3_blob, marks) + self.status_var.set(f"Parsing... ({len(marks)} mark(s) found)") + self.update_idletasks() + else: + s3_frames = annotate_frames(parse_s3(s3_blob, trailer_len=0), "S3") + bw_frames = annotate_frames(parse_bw(bw_blob, trailer_len=0, + validate_checksum=True), "BW") + sessions = split_into_sessions(bw_frames, s3_frames) diffs: list[Optional[list[FrameDiff]]] = [None] for i in range(1, len(sessions)): @@ -1012,9 +1065,10 @@ class SeismoLab(tk.Tk): self._nb = nb self.protocol("WM_DELETE_WINDOW", self._on_close) - def _on_bridge_started(self, raw_bw: Optional[str], raw_s3: Optional[str]) -> None: + def _on_bridge_started(self, raw_bw: Optional[str], raw_s3: Optional[str], + struct_bin: Optional[str] = None) -> None: """Bridge started — inject paths into analyzer and start live mode.""" - self._analyzer_panel.set_live_files(raw_bw, raw_s3) + self._analyzer_panel.set_live_files(raw_bw, raw_s3, struct_bin) # Switch to Analyzer tab so the user can watch it update self._nb.select(1)