#!/usr/bin/env python3 """ s3_parse.py — parse Instantel/Series3-like DLE-framed serial captures from a raw .bin logger. Assumptions (based on your HxD patterns): - Frames are delimited by DLE STX (0x10 0x02) ... DLE ETX (0x10 0x03) - Inside payload, a literal 0x10 is escaped as 0x10 0x10 - After ETX, there may be a trailer (often CRC16, maybe + seq/flags) """ from __future__ import annotations import argparse import json from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Tuple DLE = 0x10 STX = 0x02 ETX = 0x03 EOT = 0x04 @dataclass class Frame: index: int start_offset: int end_offset: int payload_raw: bytes # as captured between STX..ETX, still escaped payload: bytes # unescaped trailer: bytes # bytes immediately after ETX (length chosen by user) crc_match: Optional[str] # best-guess CRC type if verified, else None def unescape_dle(payload_escaped: bytes) -> bytes: """Convert DLE-stuffing: 0x10 0x10 => 0x10 (literal DLE).""" out = bytearray() i = 0 n = len(payload_escaped) while i < n: b = payload_escaped[i] if b == DLE: if i + 1 < n and payload_escaped[i + 1] == DLE: out.append(DLE) i += 2 continue # If we see a single DLE not followed by DLE inside payload, # keep it as-is (conservative) — could be real data or malformed capture. out.append(b) i += 1 return bytes(out) # ---- CRC helpers (we don't know which one yet, so we try a few) ---- def crc16_ibm(data: bytes) -> int: # CRC-16/IBM (aka ARC) poly=0xA001 (reflected 0x8005), init=0x0000 crc = 0x0000 for b in data: crc ^= b for _ in range(8): if crc & 1: crc = (crc >> 1) ^ 0xA001 else: crc >>= 1 return crc & 0xFFFF def crc16_ccitt_false(data: bytes) -> int: # CRC-16/CCITT-FALSE poly=0x1021, init=0xFFFF, no reflection crc = 0xFFFF for b in data: crc ^= (b << 8) for _ in range(8): if crc & 0x8000: crc = ((crc << 1) ^ 0x1021) & 0xFFFF else: crc = (crc << 1) & 0xFFFF return crc def crc16_x25(data: bytes) -> int: # CRC-16/X-25 poly=0x1021, init=0xFFFF, refin/refout true, xorout=0xFFFF crc = 0xFFFF for b in data: crc ^= b for _ in range(8): if crc & 1: crc = (crc >> 1) ^ 0x8408 else: crc >>= 1 return (crc ^ 0xFFFF) & 0xFFFF CRC_FUNCS = { "CRC-16/IBM": crc16_ibm, "CRC-16/CCITT-FALSE": crc16_ccitt_false, "CRC-16/X-25": crc16_x25, } def parse_frames(blob: bytes, trailer_len: int) -> List[Frame]: frames: List[Frame] = [] i = 0 idx = 0 n = len(blob) def is_dle_seq(pos: int, second: int) -> bool: return pos + 1 < n and blob[pos] == DLE and blob[pos + 1] == second while i < n - 1: if is_dle_seq(i, STX): start = i i += 2 # move past DLE STX payload_start = i # find DLE ETX while i < n - 1 and not is_dle_seq(i, ETX): i += 1 if i >= n - 1: break # truncated payload_end = i # bytes up to (but not including) DLE ETX i += 2 # skip DLE ETX end = i payload_raw = blob[payload_start:payload_end] payload = unescape_dle(payload_raw) trailer = blob[i:i + trailer_len] if trailer_len > 0 else b"" i += trailer_len frames.append(Frame( index=idx, start_offset=start, end_offset=end, payload_raw=payload_raw, payload=payload, trailer=trailer, crc_match=None )) idx += 1 continue # optional: you can also detect DLE EOT boundaries if useful later i += 1 return frames def best_crc_match(payload: bytes, trailer: bytes, little_endian: bool) -> Optional[str]: """Try to interpret first 2 trailer bytes as CRC16 and see which algorithm matches.""" if len(trailer) < 2: return None given = int.from_bytes(trailer[:2], byteorder="little" if little_endian else "big", signed=False) matches = [] for name, fn in CRC_FUNCS.items(): calc = fn(payload) if calc == given: matches.append(name) if len(matches) == 1: return matches[0] if len(matches) > 1: return " / ".join(matches) return None def main() -> None: ap = argparse.ArgumentParser(description="Parse DLE-framed serial capture .bin into frames (and guess CRC).") ap.add_argument("binfile", type=Path, help="Path to capture .bin file") ap.add_argument("--trailer-len", type=int, default=2, help="Bytes to capture after DLE ETX (default: 2)") ap.add_argument("--crc", action="store_true", help="Attempt CRC match using first 2 trailer bytes") ap.add_argument("--crc-endian", choices=["little", "big"], default="little", help="CRC endian when reading trailer") ap.add_argument("--out", type=Path, default=None, help="Write JSONL output to this file") args = ap.parse_args() blob = args.binfile.read_bytes() frames = parse_frames(blob, trailer_len=args.trailer_len) little = (args.crc_endian == "little") if args.crc: for f in frames: f.crc_match = best_crc_match(f.payload, f.trailer, little_endian=little) # Summary total = len(frames) crc_hits = sum(1 for f in frames if f.crc_match) if args.crc else 0 print(f"Frames found: {total}") if args.crc: print(f"CRC matches: {crc_hits} ({(crc_hits/total*100.0):.1f}%)" if total else "CRC matches: 0") # Emit JSONL def to_hex(b: bytes) -> str: return b.hex() lines = [] for f in frames: obj = { "index": f.index, "start_offset": f.start_offset, "end_offset": f.end_offset, "payload_len": len(f.payload), "payload_hex": to_hex(f.payload), "trailer_hex": to_hex(f.trailer), "crc_match": f.crc_match, } lines.append(json.dumps(obj)) if args.out: args.out.write_text("\n".join(lines) + "\n", encoding="utf-8") print(f"Wrote: {args.out}") else: # Print first few only (avoid spewing your terminal) for line in lines[:10]: print(line) if len(lines) > 10: print(f"... ({len(lines) - 10} more)") if __name__ == "__main__": main()