feat: added raw binary data tracking for accurate format parser.

This commit is contained in:
serversdwn
2026-03-02 15:47:52 -05:00
parent 413fc53a39
commit 43c9c8b3a3
2 changed files with 246 additions and 140 deletions

View File

@@ -1,29 +1,23 @@
#!/usr/bin/env python3
"""
s3_bridge.py — S3 <-> Blastware serial bridge with frame-aware session logging
Version: v0.4.0
s3_bridge.py — S3 <-> Blastware serial bridge with raw binary capture + DLE-aware text framing
Version: v0.5.0
Key features:
- Low CPU: avoids per-byte console printing
- Forwards bytes immediately (true bridge)
- Frame-aware logging: buffers per direction until ETX (0x03), then logs full frame on one line
- Also logs plain ASCII bursts (e.g., "Operating System") cleanly
- Dual log output: hex text log (.log) AND raw binary log (.bin) written simultaneously
- Interactive annotation: type 'm' + Enter to stamp a [MARK] into both logs mid-capture
- Binary sentinel markers: out-of-band FF FF FF FF <len> <label> in .bin for programmatic correlation
- Auto-marks on session start and end
Whats new vs v0.4.0:
- .bin is now a TRUE raw capture stream with direction + timestamps (record container format).
- .log remains human-friendly and frame-oriented, but frame detection is now DLE-aware:
- frame start = 0x10 0x02 (DLE STX)
- frame end = 0x10 0x03 (DLE ETX)
(No longer splits on bare 0x03.)
- Marks/Info are stored as proper record types in .bin (no unsafe sentinel bytes).
Usage examples:
python s3_bridge.py
python s3_bridge.py --bw COM5 --s3 COM4 --baud 38400
python s3_bridge.py --quiet
Annotation:
While running, type 'm' and press Enter. You will be prompted for a label.
The mark is written to the .log as:
[HH:MM:SS.mmm] >>> MARK: your label here
And to the .bin as an out-of-band sentinel (never valid frame data):
FF FF FF FF <1-byte length> <label bytes>
BIN record format (little-endian):
[type:1][ts_us:8][len:4][payload:len]
Types:
0x01 BW->S3 bytes
0x02 S3->BW bytes
0x03 MARK (utf-8)
0x04 INFO (utf-8)
"""
from __future__ import annotations
@@ -39,38 +33,56 @@ from typing import Optional
import serial
VERSION = "v0.5.0"
VERSION = "v0.4.0"
DLE = 0x10
STX = 0x02
ETX = 0x03
ACK = 0x41
# Sentinel prefix for binary markers. Four 0xFF bytes can never appear in
# valid Instantel DLE-framed data (0xFF is not a legal protocol byte in any
# framing position), so this sequence is unambiguously out-of-band.
BIN_MARK_SENTINEL = b"\xFF\xFF\xFF\xFF"
REC_BW = 0x01
REC_S3 = 0x02
REC_MARK = 0x03
REC_INFO = 0x04
def now_ts() -> str:
# Local time with milliseconds, like [13:37:06.239]
t = _dt.datetime.now()
return t.strftime("%H:%M:%S.") + f"{int(t.microsecond/1000):03d}"
def now_us() -> int:
# Wall-clock microseconds (fine for correlation). If you want monotonic, we can switch.
return int(time.time() * 1_000_000)
def bytes_to_hex(b: bytes) -> str:
return " ".join(f"{x:02X}" for x in b)
def looks_like_text(b: bytes) -> bool:
# Heuristic: mostly printable ASCII plus spaces
if not b:
return False
printable = 0
for x in b:
if x in (9, 10, 13): # \t \n \r
if x in (9, 10, 13):
printable += 1
elif 32 <= x <= 126:
printable += 1
return (printable / len(b)) >= 0.90
def pack_u32_le(n: int) -> bytes:
return bytes((n & 0xFF, (n >> 8) & 0xFF, (n >> 16) & 0xFF, (n >> 24) & 0xFF))
def pack_u64_le(n: int) -> bytes:
out = []
for i in range(8):
out.append((n >> (8 * i)) & 0xFF)
return bytes(out)
class SessionLogger:
def __init__(self, path: str, bin_path: str):
self.path = path
@@ -83,24 +95,24 @@ class SessionLogger:
with self._lock:
self._fh.write(line + "\n")
def log_raw(self, data: bytes) -> None:
"""Write raw bytes directly to the binary log."""
def bin_write_record(self, rec_type: int, payload: bytes, ts_us: Optional[int] = None) -> None:
if ts_us is None:
ts_us = now_us()
header = bytes([rec_type]) + pack_u64_le(ts_us) + pack_u32_le(len(payload))
with self._lock:
self._bin_fh.write(data)
self._bin_fh.write(header)
if payload:
self._bin_fh.write(payload)
def log_mark(self, label: str) -> None:
"""
Write an annotation mark to both logs simultaneously.
.log — visually distinct line: [TS] >>> MARK: label
.bin — out-of-band sentinel: FF FF FF FF <len> <label utf-8, max 255 bytes>
"""
ts = now_ts()
label_bytes = label.encode("utf-8", errors="replace")[:255]
sentinel = BIN_MARK_SENTINEL + bytes([len(label_bytes)]) + label_bytes
with self._lock:
self._fh.write(f"[{ts}] >>> MARK: {label}\n")
self._bin_fh.write(sentinel)
self.log_line(f"[{ts}] >>> MARK: {label}")
self.bin_write_record(REC_MARK, label.encode("utf-8", errors="replace"))
def log_info(self, msg: str) -> None:
ts = now_ts()
self.log_line(f"[{ts}] [INFO] {msg}")
self.bin_write_record(REC_INFO, msg.encode("utf-8", errors="replace"))
def close(self) -> None:
with self._lock:
@@ -112,51 +124,87 @@ class SessionLogger:
self._bin_fh.close()
class FrameAssembler:
class DLEFrameSniffer:
"""
Maintains a rolling buffer of bytes for one direction and emits complete frames.
We treat ETX=0x03 as an end-of-frame marker.
DLE-aware sniffer for logging only.
Extracts:
- ACK bytes (0x41) as single-byte events
- DLE-framed blocks starting at 10 02 and ending at 10 03
- Occasional ASCII bursts (e.g. "Operating System") outside framing
It does NOT modify bytes; it just segments them for the .log.
"""
def __init__(self):
self.buf = bytearray()
def push(self, chunk: bytes) -> list[bytes]:
def push(self, chunk: bytes) -> list[tuple[str, bytes]]:
if chunk:
self.buf.extend(chunk)
frames: list[bytes] = []
events: list[tuple[str, bytes]] = []
# Opportunistically peel off leading ACK(s) when idle-ish.
# We do this only when an ACK is not inside a frame (frames start with DLE).
while self.buf and self.buf[0] == ACK:
events.append(("ACK", bytes([ACK])))
del self.buf[0]
# Try to parse frames: find DLE STX then scan for DLE ETX
while True:
try:
etx_i = self.buf.index(0x03)
except ValueError:
# Find start of frame
start = self._find_dle_stx(self.buf)
if start is None:
# No frame start. Maybe text?
txt = bytes(self.buf)
if looks_like_text(txt):
events.append(("TEXT", txt))
self.buf.clear()
break
# include ETX byte
frame = bytes(self.buf[: etx_i + 1])
del self.buf[: etx_i + 1]
# Emit any leading text before the frame
if start > 0:
leading = bytes(self.buf[:start])
if looks_like_text(leading):
events.append(("TEXT", leading))
else:
# Unknown junk; still preserve in log as RAW so you can see it
events.append(("RAW", leading))
del self.buf[:start]
# ignore empty noise
if frame:
frames.append(frame)
# Now buf starts with DLE STX
end = self._find_dle_etx(self.buf)
if end is None:
break # need more bytes
return frames
frame = bytes(self.buf[:end])
del self.buf[:end]
def drain_as_text_if_any(self) -> Optional[bytes]:
"""
If buffer contains non-framed data (no ETX) and looks like text, emit it.
Useful for things like "Operating System" that come as raw ASCII.
"""
if not self.buf:
return None
b = bytes(self.buf)
if looks_like_text(b):
self.buf.clear()
return b
events.append(("FRAME", frame))
# peel off any ACKs that may immediately follow
while self.buf and self.buf[0] == ACK:
events.append(("ACK", bytes([ACK])))
del self.buf[0]
return events
@staticmethod
def _find_dle_stx(b: bytearray) -> Optional[int]:
for i in range(len(b) - 1):
if b[i] == DLE and b[i + 1] == STX:
return i
return None
@staticmethod
def _find_dle_etx(b: bytearray) -> Optional[int]:
# Find first occurrence of DLE ETX after the initial DLE STX.
# Return index *after* ETX (slice end).
for i in range(2, len(b) - 1):
if b[i] == DLE and b[i + 1] == ETX:
return i + 2
return None
def open_serial(port: str, baud: int) -> serial.Serial:
# timeout keeps read() from blocking forever, enabling clean Ctrl+C shutdown
return serial.Serial(
port=port,
baudrate=baud,
@@ -170,6 +218,7 @@ def open_serial(port: str, baud: int) -> serial.Serial:
def forward_loop(
name: str,
rec_type: int,
src: serial.Serial,
dst: serial.Serial,
logger: SessionLogger,
@@ -177,22 +226,24 @@ def forward_loop(
quiet: bool,
status_every_s: float,
) -> None:
assembler = FrameAssembler()
sniffer = DLEFrameSniffer()
last_status = time.monotonic()
while not stop.is_set():
try:
n = src.in_waiting
if n:
chunk = src.read(n if n < 4096 else 4096)
else:
chunk = src.read(1) # will return b"" after timeout
chunk = src.read(n if n and n < 4096 else (4096 if n else 1))
except serial.SerialException as e:
logger.log_line(f"[{now_ts()}] [ERROR] {name} serial exception: {e!r}")
break
if chunk:
# forward immediately
ts = now_us()
# 1) RAW BIN CAPTURE (absolute truth)
logger.bin_write_record(rec_type, chunk, ts_us=ts)
# 2) Forward immediately (bridge behavior)
try:
dst.write(chunk)
except serial.SerialTimeoutException:
@@ -201,55 +252,42 @@ def forward_loop(
logger.log_line(f"[{now_ts()}] [ERROR] {name} dst write exception: {e!r}")
break
# frame-aware logging
frames = assembler.push(chunk)
for frame in frames:
# Some devices send leading STX separately; we still log as-is.
logger.log_line(f"[{now_ts()}] [{name}] {bytes_to_hex(frame)}")
logger.log_raw(frame)
# 3) Human-friendly .log segmentation (DLE-aware)
for kind, payload in sniffer.push(chunk):
if kind == "ACK":
logger.log_line(f"[{now_ts()}] [{name}] [ACK] 41")
elif kind == "FRAME":
logger.log_line(f"[{now_ts()}] [{name}] {bytes_to_hex(payload)}")
elif kind == "TEXT":
try:
s = payload.decode("ascii", errors="replace").strip("\r\n")
except Exception:
s = repr(payload)
logger.log_line(f"[{now_ts()}] [{name}] [TEXT] {s}")
else: # RAW
logger.log_line(f"[{now_ts()}] [{name}] [RAW] {bytes_to_hex(payload)}")
# If we have non-ETX data that looks like text, flush it as TEXT
text = assembler.drain_as_text_if_any()
if text is not None:
try:
s = text.decode("ascii", errors="replace").strip("\r\n")
except Exception:
s = repr(text)
logger.log_line(f"[{now_ts()}] [{name}] [TEXT] {s}")
logger.log_raw(text)
# minimal console heartbeat (cheap)
if not quiet and status_every_s > 0:
now = time.monotonic()
if (now - last_status) >= status_every_s:
print(f"[{now_ts()}] {name} alive")
last_status = now
# tiny sleep only when idle to avoid spin
if not chunk:
time.sleep(0.002)
def annotation_loop(logger: SessionLogger, stop: threading.Event) -> None:
"""
Runs on the main thread (or a dedicated thread) reading stdin.
Type 'm' + Enter to trigger an annotation prompt.
Any other non-empty input is ignored with a hint.
Bare Enter (empty line) is silently ignored to prevent accidental marks.
"""
print("[MARK] Type 'm' + Enter to annotate the capture. Ctrl+C to stop.")
while not stop.is_set():
try:
line = input()
except EOFError:
# stdin closed (e.g. piped input exhausted)
break
except KeyboardInterrupt:
except (EOFError, KeyboardInterrupt):
break
line = line.strip()
if not line:
continue # bare Enter — ignore silently
continue
if line.lower() == "m":
try:
@@ -292,11 +330,12 @@ def main() -> int:
log_path = os.path.join(args.logdir, f"s3_session_{ts}.log")
bin_path = os.path.join(args.logdir, f"s3_session_{ts}.bin")
logger = SessionLogger(log_path, bin_path)
print(f"[LOG] Writing hex log to {log_path}")
print(f"[LOG] Writing binary log to {bin_path}")
logger.log_line(f"[{now_ts()}] [INFO] s3_bridge {VERSION} start")
logger.log_line(f"[{now_ts()}] [INFO] BW={args.bw} S3={args.s3} baud={args.baud}")
logger.log_info(f"s3_bridge {VERSION} start")
logger.log_info(f"BW={args.bw} S3={args.s3} baud={args.baud}")
logger.log_mark(f"SESSION START — BW={args.bw} S3={args.s3} baud={args.baud}")
stop = threading.Event()
@@ -309,16 +348,15 @@ def main() -> int:
t1 = threading.Thread(
target=forward_loop,
name="BW_to_S3",
args=("BW->S3", bw, s3, logger, stop, args.quiet, args.status_every),
args=("BW->S3", REC_BW, bw, s3, logger, stop, args.quiet, args.status_every),
daemon=True,
)
t2 = threading.Thread(
target=forward_loop,
name="S3_to_BW",
args=("S3->BW", s3, bw, logger, stop, args.quiet, args.status_every),
args=("S3->BW", REC_S3, s3, bw, logger, stop, args.quiet, args.status_every),
daemon=True,
)
# Annotation loop runs in its own daemon thread so it doesn't block shutdown
t_ann = threading.Thread(
target=annotation_loop,
name="Annotator",
@@ -335,12 +373,11 @@ def main() -> int:
time.sleep(0.05)
finally:
print("\n[INFO] Ctrl+C detected, shutting down...")
logger.log_line(f"[{now_ts()}] [INFO] shutdown requested")
logger.log_info("shutdown requested")
stop.set()
t1.join(timeout=1.0)
t2.join(timeout=1.0)
# t_ann is daemon — don't join, it may be blocked on input()
try:
bw.close()
@@ -352,12 +389,11 @@ def main() -> int:
pass
logger.log_mark("SESSION END")
logger.log_line(f"[{now_ts()}] [INFO] ports closed, session end")
print("[LOG] Closing session log")
logger.log_info("ports closed, session end")
logger.close()
return 0
if __name__ == "__main__":
raise SystemExit(main())
raise SystemExit(main())