feat: added raw binary data tracking for accurate format parser.
This commit is contained in:
@@ -1,29 +1,23 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
s3_bridge.py — S3 <-> Blastware serial bridge with frame-aware session logging
|
||||
Version: v0.4.0
|
||||
s3_bridge.py — S3 <-> Blastware serial bridge with raw binary capture + DLE-aware text framing
|
||||
Version: v0.5.0
|
||||
|
||||
Key features:
|
||||
- Low CPU: avoids per-byte console printing
|
||||
- Forwards bytes immediately (true bridge)
|
||||
- Frame-aware logging: buffers per direction until ETX (0x03), then logs full frame on one line
|
||||
- Also logs plain ASCII bursts (e.g., "Operating System") cleanly
|
||||
- Dual log output: hex text log (.log) AND raw binary log (.bin) written simultaneously
|
||||
- Interactive annotation: type 'm' + Enter to stamp a [MARK] into both logs mid-capture
|
||||
- Binary sentinel markers: out-of-band FF FF FF FF <len> <label> in .bin for programmatic correlation
|
||||
- Auto-marks on session start and end
|
||||
What’s new vs v0.4.0:
|
||||
- .bin is now a TRUE raw capture stream with direction + timestamps (record container format).
|
||||
- .log remains human-friendly and frame-oriented, but frame detection is now DLE-aware:
|
||||
- frame start = 0x10 0x02 (DLE STX)
|
||||
- frame end = 0x10 0x03 (DLE ETX)
|
||||
(No longer splits on bare 0x03.)
|
||||
- Marks/Info are stored as proper record types in .bin (no unsafe sentinel bytes).
|
||||
|
||||
Usage examples:
|
||||
python s3_bridge.py
|
||||
python s3_bridge.py --bw COM5 --s3 COM4 --baud 38400
|
||||
python s3_bridge.py --quiet
|
||||
|
||||
Annotation:
|
||||
While running, type 'm' and press Enter. You will be prompted for a label.
|
||||
The mark is written to the .log as:
|
||||
[HH:MM:SS.mmm] >>> MARK: your label here
|
||||
And to the .bin as an out-of-band sentinel (never valid frame data):
|
||||
FF FF FF FF <1-byte length> <label bytes>
|
||||
BIN record format (little-endian):
|
||||
[type:1][ts_us:8][len:4][payload:len]
|
||||
Types:
|
||||
0x01 BW->S3 bytes
|
||||
0x02 S3->BW bytes
|
||||
0x03 MARK (utf-8)
|
||||
0x04 INFO (utf-8)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -39,38 +33,56 @@ from typing import Optional
|
||||
|
||||
import serial
|
||||
|
||||
VERSION = "v0.5.0"
|
||||
|
||||
VERSION = "v0.4.0"
|
||||
DLE = 0x10
|
||||
STX = 0x02
|
||||
ETX = 0x03
|
||||
ACK = 0x41
|
||||
|
||||
# Sentinel prefix for binary markers. Four 0xFF bytes can never appear in
|
||||
# valid Instantel DLE-framed data (0xFF is not a legal protocol byte in any
|
||||
# framing position), so this sequence is unambiguously out-of-band.
|
||||
BIN_MARK_SENTINEL = b"\xFF\xFF\xFF\xFF"
|
||||
REC_BW = 0x01
|
||||
REC_S3 = 0x02
|
||||
REC_MARK = 0x03
|
||||
REC_INFO = 0x04
|
||||
|
||||
|
||||
def now_ts() -> str:
|
||||
# Local time with milliseconds, like [13:37:06.239]
|
||||
t = _dt.datetime.now()
|
||||
return t.strftime("%H:%M:%S.") + f"{int(t.microsecond/1000):03d}"
|
||||
|
||||
|
||||
def now_us() -> int:
|
||||
# Wall-clock microseconds (fine for correlation). If you want monotonic, we can switch.
|
||||
return int(time.time() * 1_000_000)
|
||||
|
||||
|
||||
def bytes_to_hex(b: bytes) -> str:
|
||||
return " ".join(f"{x:02X}" for x in b)
|
||||
|
||||
|
||||
def looks_like_text(b: bytes) -> bool:
|
||||
# Heuristic: mostly printable ASCII plus spaces
|
||||
if not b:
|
||||
return False
|
||||
printable = 0
|
||||
for x in b:
|
||||
if x in (9, 10, 13): # \t \n \r
|
||||
if x in (9, 10, 13):
|
||||
printable += 1
|
||||
elif 32 <= x <= 126:
|
||||
printable += 1
|
||||
return (printable / len(b)) >= 0.90
|
||||
|
||||
|
||||
def pack_u32_le(n: int) -> bytes:
|
||||
return bytes((n & 0xFF, (n >> 8) & 0xFF, (n >> 16) & 0xFF, (n >> 24) & 0xFF))
|
||||
|
||||
|
||||
def pack_u64_le(n: int) -> bytes:
|
||||
out = []
|
||||
for i in range(8):
|
||||
out.append((n >> (8 * i)) & 0xFF)
|
||||
return bytes(out)
|
||||
|
||||
|
||||
class SessionLogger:
|
||||
def __init__(self, path: str, bin_path: str):
|
||||
self.path = path
|
||||
@@ -83,24 +95,24 @@ class SessionLogger:
|
||||
with self._lock:
|
||||
self._fh.write(line + "\n")
|
||||
|
||||
def log_raw(self, data: bytes) -> None:
|
||||
"""Write raw bytes directly to the binary log."""
|
||||
def bin_write_record(self, rec_type: int, payload: bytes, ts_us: Optional[int] = None) -> None:
|
||||
if ts_us is None:
|
||||
ts_us = now_us()
|
||||
header = bytes([rec_type]) + pack_u64_le(ts_us) + pack_u32_le(len(payload))
|
||||
with self._lock:
|
||||
self._bin_fh.write(data)
|
||||
self._bin_fh.write(header)
|
||||
if payload:
|
||||
self._bin_fh.write(payload)
|
||||
|
||||
def log_mark(self, label: str) -> None:
|
||||
"""
|
||||
Write an annotation mark to both logs simultaneously.
|
||||
|
||||
.log — visually distinct line: [TS] >>> MARK: label
|
||||
.bin — out-of-band sentinel: FF FF FF FF <len> <label utf-8, max 255 bytes>
|
||||
"""
|
||||
ts = now_ts()
|
||||
label_bytes = label.encode("utf-8", errors="replace")[:255]
|
||||
sentinel = BIN_MARK_SENTINEL + bytes([len(label_bytes)]) + label_bytes
|
||||
with self._lock:
|
||||
self._fh.write(f"[{ts}] >>> MARK: {label}\n")
|
||||
self._bin_fh.write(sentinel)
|
||||
self.log_line(f"[{ts}] >>> MARK: {label}")
|
||||
self.bin_write_record(REC_MARK, label.encode("utf-8", errors="replace"))
|
||||
|
||||
def log_info(self, msg: str) -> None:
|
||||
ts = now_ts()
|
||||
self.log_line(f"[{ts}] [INFO] {msg}")
|
||||
self.bin_write_record(REC_INFO, msg.encode("utf-8", errors="replace"))
|
||||
|
||||
def close(self) -> None:
|
||||
with self._lock:
|
||||
@@ -112,51 +124,87 @@ class SessionLogger:
|
||||
self._bin_fh.close()
|
||||
|
||||
|
||||
class FrameAssembler:
|
||||
class DLEFrameSniffer:
|
||||
"""
|
||||
Maintains a rolling buffer of bytes for one direction and emits complete frames.
|
||||
We treat ETX=0x03 as an end-of-frame marker.
|
||||
DLE-aware sniffer for logging only.
|
||||
Extracts:
|
||||
- ACK bytes (0x41) as single-byte events
|
||||
- DLE-framed blocks starting at 10 02 and ending at 10 03
|
||||
- Occasional ASCII bursts (e.g. "Operating System") outside framing
|
||||
It does NOT modify bytes; it just segments them for the .log.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.buf = bytearray()
|
||||
|
||||
def push(self, chunk: bytes) -> list[bytes]:
|
||||
def push(self, chunk: bytes) -> list[tuple[str, bytes]]:
|
||||
if chunk:
|
||||
self.buf.extend(chunk)
|
||||
|
||||
frames: list[bytes] = []
|
||||
events: list[tuple[str, bytes]] = []
|
||||
|
||||
# Opportunistically peel off leading ACK(s) when idle-ish.
|
||||
# We do this only when an ACK is not inside a frame (frames start with DLE).
|
||||
while self.buf and self.buf[0] == ACK:
|
||||
events.append(("ACK", bytes([ACK])))
|
||||
del self.buf[0]
|
||||
|
||||
# Try to parse frames: find DLE STX then scan for DLE ETX
|
||||
while True:
|
||||
try:
|
||||
etx_i = self.buf.index(0x03)
|
||||
except ValueError:
|
||||
# Find start of frame
|
||||
start = self._find_dle_stx(self.buf)
|
||||
if start is None:
|
||||
# No frame start. Maybe text?
|
||||
txt = bytes(self.buf)
|
||||
if looks_like_text(txt):
|
||||
events.append(("TEXT", txt))
|
||||
self.buf.clear()
|
||||
break
|
||||
|
||||
# include ETX byte
|
||||
frame = bytes(self.buf[: etx_i + 1])
|
||||
del self.buf[: etx_i + 1]
|
||||
# Emit any leading text before the frame
|
||||
if start > 0:
|
||||
leading = bytes(self.buf[:start])
|
||||
if looks_like_text(leading):
|
||||
events.append(("TEXT", leading))
|
||||
else:
|
||||
# Unknown junk; still preserve in log as RAW so you can see it
|
||||
events.append(("RAW", leading))
|
||||
del self.buf[:start]
|
||||
|
||||
# ignore empty noise
|
||||
if frame:
|
||||
frames.append(frame)
|
||||
# Now buf starts with DLE STX
|
||||
end = self._find_dle_etx(self.buf)
|
||||
if end is None:
|
||||
break # need more bytes
|
||||
|
||||
return frames
|
||||
frame = bytes(self.buf[:end])
|
||||
del self.buf[:end]
|
||||
|
||||
def drain_as_text_if_any(self) -> Optional[bytes]:
|
||||
"""
|
||||
If buffer contains non-framed data (no ETX) and looks like text, emit it.
|
||||
Useful for things like "Operating System" that come as raw ASCII.
|
||||
"""
|
||||
if not self.buf:
|
||||
return None
|
||||
b = bytes(self.buf)
|
||||
if looks_like_text(b):
|
||||
self.buf.clear()
|
||||
return b
|
||||
events.append(("FRAME", frame))
|
||||
|
||||
# peel off any ACKs that may immediately follow
|
||||
while self.buf and self.buf[0] == ACK:
|
||||
events.append(("ACK", bytes([ACK])))
|
||||
del self.buf[0]
|
||||
|
||||
return events
|
||||
|
||||
@staticmethod
|
||||
def _find_dle_stx(b: bytearray) -> Optional[int]:
|
||||
for i in range(len(b) - 1):
|
||||
if b[i] == DLE and b[i + 1] == STX:
|
||||
return i
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _find_dle_etx(b: bytearray) -> Optional[int]:
|
||||
# Find first occurrence of DLE ETX after the initial DLE STX.
|
||||
# Return index *after* ETX (slice end).
|
||||
for i in range(2, len(b) - 1):
|
||||
if b[i] == DLE and b[i + 1] == ETX:
|
||||
return i + 2
|
||||
return None
|
||||
|
||||
|
||||
def open_serial(port: str, baud: int) -> serial.Serial:
|
||||
# timeout keeps read() from blocking forever, enabling clean Ctrl+C shutdown
|
||||
return serial.Serial(
|
||||
port=port,
|
||||
baudrate=baud,
|
||||
@@ -170,6 +218,7 @@ def open_serial(port: str, baud: int) -> serial.Serial:
|
||||
|
||||
def forward_loop(
|
||||
name: str,
|
||||
rec_type: int,
|
||||
src: serial.Serial,
|
||||
dst: serial.Serial,
|
||||
logger: SessionLogger,
|
||||
@@ -177,22 +226,24 @@ def forward_loop(
|
||||
quiet: bool,
|
||||
status_every_s: float,
|
||||
) -> None:
|
||||
assembler = FrameAssembler()
|
||||
sniffer = DLEFrameSniffer()
|
||||
last_status = time.monotonic()
|
||||
|
||||
while not stop.is_set():
|
||||
try:
|
||||
n = src.in_waiting
|
||||
if n:
|
||||
chunk = src.read(n if n < 4096 else 4096)
|
||||
else:
|
||||
chunk = src.read(1) # will return b"" after timeout
|
||||
chunk = src.read(n if n and n < 4096 else (4096 if n else 1))
|
||||
except serial.SerialException as e:
|
||||
logger.log_line(f"[{now_ts()}] [ERROR] {name} serial exception: {e!r}")
|
||||
break
|
||||
|
||||
if chunk:
|
||||
# forward immediately
|
||||
ts = now_us()
|
||||
|
||||
# 1) RAW BIN CAPTURE (absolute truth)
|
||||
logger.bin_write_record(rec_type, chunk, ts_us=ts)
|
||||
|
||||
# 2) Forward immediately (bridge behavior)
|
||||
try:
|
||||
dst.write(chunk)
|
||||
except serial.SerialTimeoutException:
|
||||
@@ -201,55 +252,42 @@ def forward_loop(
|
||||
logger.log_line(f"[{now_ts()}] [ERROR] {name} dst write exception: {e!r}")
|
||||
break
|
||||
|
||||
# frame-aware logging
|
||||
frames = assembler.push(chunk)
|
||||
for frame in frames:
|
||||
# Some devices send leading STX separately; we still log as-is.
|
||||
logger.log_line(f"[{now_ts()}] [{name}] {bytes_to_hex(frame)}")
|
||||
logger.log_raw(frame)
|
||||
# 3) Human-friendly .log segmentation (DLE-aware)
|
||||
for kind, payload in sniffer.push(chunk):
|
||||
if kind == "ACK":
|
||||
logger.log_line(f"[{now_ts()}] [{name}] [ACK] 41")
|
||||
elif kind == "FRAME":
|
||||
logger.log_line(f"[{now_ts()}] [{name}] {bytes_to_hex(payload)}")
|
||||
elif kind == "TEXT":
|
||||
try:
|
||||
s = payload.decode("ascii", errors="replace").strip("\r\n")
|
||||
except Exception:
|
||||
s = repr(payload)
|
||||
logger.log_line(f"[{now_ts()}] [{name}] [TEXT] {s}")
|
||||
else: # RAW
|
||||
logger.log_line(f"[{now_ts()}] [{name}] [RAW] {bytes_to_hex(payload)}")
|
||||
|
||||
# If we have non-ETX data that looks like text, flush it as TEXT
|
||||
text = assembler.drain_as_text_if_any()
|
||||
if text is not None:
|
||||
try:
|
||||
s = text.decode("ascii", errors="replace").strip("\r\n")
|
||||
except Exception:
|
||||
s = repr(text)
|
||||
logger.log_line(f"[{now_ts()}] [{name}] [TEXT] {s}")
|
||||
logger.log_raw(text)
|
||||
|
||||
# minimal console heartbeat (cheap)
|
||||
if not quiet and status_every_s > 0:
|
||||
now = time.monotonic()
|
||||
if (now - last_status) >= status_every_s:
|
||||
print(f"[{now_ts()}] {name} alive")
|
||||
last_status = now
|
||||
|
||||
# tiny sleep only when idle to avoid spin
|
||||
if not chunk:
|
||||
time.sleep(0.002)
|
||||
|
||||
|
||||
def annotation_loop(logger: SessionLogger, stop: threading.Event) -> None:
|
||||
"""
|
||||
Runs on the main thread (or a dedicated thread) reading stdin.
|
||||
Type 'm' + Enter to trigger an annotation prompt.
|
||||
Any other non-empty input is ignored with a hint.
|
||||
Bare Enter (empty line) is silently ignored to prevent accidental marks.
|
||||
"""
|
||||
print("[MARK] Type 'm' + Enter to annotate the capture. Ctrl+C to stop.")
|
||||
while not stop.is_set():
|
||||
try:
|
||||
line = input()
|
||||
except EOFError:
|
||||
# stdin closed (e.g. piped input exhausted)
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
break
|
||||
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue # bare Enter — ignore silently
|
||||
continue
|
||||
|
||||
if line.lower() == "m":
|
||||
try:
|
||||
@@ -292,11 +330,12 @@ def main() -> int:
|
||||
log_path = os.path.join(args.logdir, f"s3_session_{ts}.log")
|
||||
bin_path = os.path.join(args.logdir, f"s3_session_{ts}.bin")
|
||||
logger = SessionLogger(log_path, bin_path)
|
||||
|
||||
print(f"[LOG] Writing hex log to {log_path}")
|
||||
print(f"[LOG] Writing binary log to {bin_path}")
|
||||
|
||||
logger.log_line(f"[{now_ts()}] [INFO] s3_bridge {VERSION} start")
|
||||
logger.log_line(f"[{now_ts()}] [INFO] BW={args.bw} S3={args.s3} baud={args.baud}")
|
||||
logger.log_info(f"s3_bridge {VERSION} start")
|
||||
logger.log_info(f"BW={args.bw} S3={args.s3} baud={args.baud}")
|
||||
logger.log_mark(f"SESSION START — BW={args.bw} S3={args.s3} baud={args.baud}")
|
||||
|
||||
stop = threading.Event()
|
||||
@@ -309,16 +348,15 @@ def main() -> int:
|
||||
t1 = threading.Thread(
|
||||
target=forward_loop,
|
||||
name="BW_to_S3",
|
||||
args=("BW->S3", bw, s3, logger, stop, args.quiet, args.status_every),
|
||||
args=("BW->S3", REC_BW, bw, s3, logger, stop, args.quiet, args.status_every),
|
||||
daemon=True,
|
||||
)
|
||||
t2 = threading.Thread(
|
||||
target=forward_loop,
|
||||
name="S3_to_BW",
|
||||
args=("S3->BW", s3, bw, logger, stop, args.quiet, args.status_every),
|
||||
args=("S3->BW", REC_S3, s3, bw, logger, stop, args.quiet, args.status_every),
|
||||
daemon=True,
|
||||
)
|
||||
# Annotation loop runs in its own daemon thread so it doesn't block shutdown
|
||||
t_ann = threading.Thread(
|
||||
target=annotation_loop,
|
||||
name="Annotator",
|
||||
@@ -335,12 +373,11 @@ def main() -> int:
|
||||
time.sleep(0.05)
|
||||
finally:
|
||||
print("\n[INFO] Ctrl+C detected, shutting down...")
|
||||
logger.log_line(f"[{now_ts()}] [INFO] shutdown requested")
|
||||
logger.log_info("shutdown requested")
|
||||
|
||||
stop.set()
|
||||
t1.join(timeout=1.0)
|
||||
t2.join(timeout=1.0)
|
||||
# t_ann is daemon — don't join, it may be blocked on input()
|
||||
|
||||
try:
|
||||
bw.close()
|
||||
@@ -352,12 +389,11 @@ def main() -> int:
|
||||
pass
|
||||
|
||||
logger.log_mark("SESSION END")
|
||||
logger.log_line(f"[{now_ts()}] [INFO] ports closed, session end")
|
||||
print("[LOG] Closing session log")
|
||||
logger.log_info("ports closed, session end")
|
||||
logger.close()
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user