Files
seismo-relay/parsers/s3_parser.py

413 lines
11 KiB
Python

#!/usr/bin/env python3
"""
s3_parser.py — Unified Instantel frame parser (S3 + BW).
Modes:
- s3: DLE STX (10 02) ... DLE ETX (10 03)
- bw: ACK+STX (41 02) ... ETX (03)
Stuffing:
- Literal 0x10 in payload is stuffed as 10 10 in both directions.
Checksums:
- BW frames appear to use more than one checksum style depending on message type.
Small frames often validate with 1-byte SUM8.
Large config/write frames appear to use a 2-byte CRC16 variant.
In BW mode we therefore validate candidate ETX positions using AUTO checksum matching:
- SUM8 (1 byte)
- CRC16 variants (2 bytes), both little/big endian
If any match, we accept the ETX as a real frame terminator.
"""
from __future__ import annotations
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Dict, List, Optional, Tuple
DLE = 0x10
STX = 0x02
ETX = 0x03
ACK = 0x41
__version__ = "0.2.2"
@dataclass
class Frame:
index: int
start_offset: int
end_offset: int
payload_raw: bytes # de-stuffed bytes between STX..ETX (includes checksum bytes at end)
payload: bytes # payload without checksum bytes
trailer: bytes
checksum_valid: Optional[bool]
checksum_type: Optional[str]
checksum_hex: Optional[str]
# ------------------------
# Checksum / CRC helpers
# ------------------------
def checksum8_sum(data: bytes) -> int:
"""SUM8: sum(payload) & 0xFF"""
return sum(data) & 0xFF
def crc16_ibm(data: bytes) -> int:
# CRC-16/IBM (aka ARC) poly=0xA001, init=0x0000, refin/refout true
crc = 0x0000
for b in data:
crc ^= b
for _ in range(8):
crc = (crc >> 1) ^ 0xA001 if (crc & 1) else (crc >> 1)
return crc & 0xFFFF
def crc16_ccitt_false(data: bytes) -> int:
# CRC-16/CCITT-FALSE poly=0x1021, init=0xFFFF, refin/refout false
crc = 0xFFFF
for b in data:
crc ^= (b << 8)
for _ in range(8):
crc = ((crc << 1) ^ 0x1021) & 0xFFFF if (crc & 0x8000) else (crc << 1) & 0xFFFF
return crc
def crc16_x25(data: bytes) -> int:
# CRC-16/X-25 poly=0x8408 (reflected), init=0xFFFF, xorout=0xFFFF
crc = 0xFFFF
for b in data:
crc ^= b
for _ in range(8):
crc = (crc >> 1) ^ 0x8408 if (crc & 1) else (crc >> 1)
return (crc ^ 0xFFFF) & 0xFFFF
CRC16_FUNCS: Dict[str, Callable[[bytes], int]] = {
"CRC16_IBM": crc16_ibm,
"CRC16_CCITT_FALSE": crc16_ccitt_false,
"CRC16_X25": crc16_x25,
}
def _try_validate_sum8(body: bytes) -> Optional[Tuple[bytes, bytes, str]]:
"""
body = payload + chk8
Returns (payload, chk_bytes, type) if valid, else None
"""
if len(body) < 1:
return None
payload = body[:-1]
chk = body[-1]
if checksum8_sum(payload) == chk:
return payload, bytes([chk]), "SUM8"
return None
def _try_validate_sum8_large(body: bytes) -> Optional[Tuple[bytes, bytes, str]]:
"""
Large BW->S3 write frame checksum (SUBs 68, 69, 71, 82, 1A with data).
Formula: (sum(b for b in payload[2:-1] if b != 0x10) + 0x10) & 0xFF
- Starts from byte [2], skipping CMD (0x10) and DLE (0x10) at [0][1]
- Skips all 0x10 bytes in the covered range
- Adds 0x10 as a constant offset
- body[-1] is the checksum byte
Confirmed across 20 frames from two independent captures (2026-03-12).
"""
if len(body) < 3:
return None
payload = body[:-1]
chk = body[-1]
calc = (sum(b for b in payload[2:] if b != 0x10) + 0x10) & 0xFF
if calc == chk:
return payload, bytes([chk]), "SUM8_LARGE"
return None
def _try_validate_crc16(body: bytes) -> Optional[Tuple[bytes, bytes, str]]:
"""
body = payload + crc16(2 bytes)
Try multiple CRC16 types and both endian interpretations.
Returns (payload, chk_bytes, type) if valid, else None
"""
if len(body) < 2:
return None
payload = body[:-2]
chk_bytes = body[-2:]
given_le = int.from_bytes(chk_bytes, "little", signed=False)
given_be = int.from_bytes(chk_bytes, "big", signed=False)
for name, fn in CRC16_FUNCS.items():
calc = fn(payload)
if calc == given_le:
return payload, chk_bytes, f"{name}_LE"
if calc == given_be:
return payload, chk_bytes, f"{name}_BE"
return None
def validate_bw_body_auto(body: bytes) -> Optional[Tuple[bytes, bytes, str]]:
"""
Try to interpret the tail of body as a checksum in several ways.
Return (payload, checksum_bytes, checksum_type) if any match; else None.
"""
# Prefer plain SUM8 first (small frames: POLL, read commands)
hit = _try_validate_sum8(body)
if hit:
return hit
# Large BW->S3 write frames (SUBs 68, 69, 71, 82, 1A with data)
hit = _try_validate_sum8_large(body)
if hit:
return hit
# Then CRC16 variants
hit = _try_validate_crc16(body)
if hit:
return hit
return None
# ------------------------
# S3 MODE (DLE framed)
# ------------------------
def parse_s3(blob: bytes, trailer_len: int) -> List[Frame]:
frames: List[Frame] = []
IDLE = 0
IN_FRAME = 1
AFTER_DLE = 2
state = IDLE
body = bytearray()
start_offset = 0
idx = 0
i = 0
n = len(blob)
while i < n:
b = blob[i]
if state == IDLE:
if b == DLE and i + 1 < n and blob[i + 1] == STX:
start_offset = i
body.clear()
state = IN_FRAME
i += 2
continue
elif state == IN_FRAME:
if b == DLE:
state = AFTER_DLE
i += 1
continue
body.append(b)
else: # AFTER_DLE
if b == DLE:
body.append(DLE)
state = IN_FRAME
i += 1
continue
if b == ETX:
end_offset = i + 1
trailer_start = i + 1
trailer_end = trailer_start + trailer_len
trailer = blob[trailer_start:trailer_end]
# For S3 mode we don't assume checksum type here yet.
frames.append(Frame(
index=idx,
start_offset=start_offset,
end_offset=end_offset,
payload_raw=bytes(body),
payload=bytes(body),
trailer=trailer,
checksum_valid=None,
checksum_type=None,
checksum_hex=None
))
idx += 1
state = IDLE
i = trailer_end
continue
# Unexpected DLE + byte → treat as literal data
body.append(DLE)
body.append(b)
state = IN_FRAME
i += 1
continue
i += 1
return frames
# ------------------------
# BW MODE (ACK+STX framed, bare ETX)
# ------------------------
def parse_bw(blob: bytes, trailer_len: int, validate_checksum: bool) -> List[Frame]:
frames: List[Frame] = []
IDLE = 0
IN_FRAME = 1
AFTER_DLE = 2
state = IDLE
body = bytearray()
start_offset = 0
idx = 0
i = 0
n = len(blob)
while i < n:
b = blob[i]
if state == IDLE:
# Frame start signature: ACK + STX
if b == ACK and i + 1 < n and blob[i + 1] == STX:
start_offset = i
body.clear()
state = IN_FRAME
i += 2
continue
i += 1
continue
if state == IN_FRAME:
if b == DLE:
state = AFTER_DLE
i += 1
continue
if b == ETX:
# Candidate end-of-frame.
# Accept ETX if the next bytes look like a real next-frame start (ACK+STX),
# or we're at EOF. This prevents chopping on in-payload 0x03.
next_is_start = (i + 2 < n and blob[i + 1] == ACK and blob[i + 2] == STX)
at_eof = (i == n - 1)
if not (next_is_start or at_eof):
# Not a real boundary -> payload byte
body.append(ETX)
i += 1
continue
trailer_start = i + 1
trailer_end = trailer_start + trailer_len
trailer = blob[trailer_start:trailer_end]
chk_valid = None
chk_type = None
chk_hex = None
payload = bytes(body)
if validate_checksum:
hit = validate_bw_body_auto(payload)
if hit:
payload, chk_bytes, chk_type = hit
chk_valid = True
chk_hex = chk_bytes.hex()
else:
chk_valid = False
frames.append(Frame(
index=idx,
start_offset=start_offset,
end_offset=i + 1,
payload_raw=bytes(body),
payload=payload,
trailer=trailer,
checksum_valid=chk_valid,
checksum_type=chk_type,
checksum_hex=chk_hex
))
idx += 1
state = IDLE
i = trailer_end
continue
# Normal byte
body.append(b)
i += 1
continue
# AFTER_DLE: DLE XX => literal XX for any XX (full DLE stuffing)
body.append(b)
state = IN_FRAME
i += 1
return frames
# ------------------------
# CLI
# ------------------------
def main() -> None:
ap = argparse.ArgumentParser(description="Parse Instantel S3/BW binary captures.")
ap.add_argument("binfile", type=Path)
ap.add_argument("--mode", choices=["s3", "bw"], default="s3")
ap.add_argument("--trailer-len", type=int, default=0)
ap.add_argument("--no-checksum", action="store_true")
ap.add_argument("--out", type=Path, default=None)
args = ap.parse_args()
print(f"s3_parser v{__version__}")
blob = args.binfile.read_bytes()
if args.mode == "s3":
frames = parse_s3(blob, args.trailer_len)
else:
frames = parse_bw(blob, args.trailer_len, validate_checksum=not args.no_checksum)
print("Frames found:", len(frames))
def to_hex(b: bytes) -> str:
return b.hex()
lines = []
for f in frames:
obj = {
"index": f.index,
"start_offset": f.start_offset,
"end_offset": f.end_offset,
"payload_len": len(f.payload),
"payload_hex": to_hex(f.payload),
"trailer_hex": to_hex(f.trailer),
"checksum_valid": f.checksum_valid,
"checksum_type": f.checksum_type,
"checksum_hex": f.checksum_hex,
}
lines.append(json.dumps(obj))
if args.out:
args.out.write_text("\n".join(lines) + "\n", encoding="utf-8")
print(f"Wrote: {args.out}")
else:
for line in lines[:10]:
print(line)
if len(lines) > 10:
print(f"... ({len(lines) - 10} more)")
if __name__ == "__main__":
main()