Handle cold-boot timeout for TCP connections
- bridges/tcp_serial_bridge.py: increase default boot_delay 2s → 8s to cover MiniMate Plus cold-start time (unit wakes from RS-232 line assertion but takes 5-10s to be ready for POLL_PROBE). - sfm/server.py: add _run_with_retry() — on TCP connections only, retries once on ProtocolError. Serial timeouts are not retried (usually a real fault). Confirmed behaviour: unit wakes purely from RS-232 line voltage, no software wake-up frame needed.
This commit is contained in:
@@ -55,8 +55,10 @@ DEFAULT_TCP_PORT = 12345
|
|||||||
CHUNK = 256 # bytes per read call
|
CHUNK = 256 # bytes per read call
|
||||||
SERIAL_TIMEOUT = 0.02 # serial read timeout (s) — non-blocking in practice
|
SERIAL_TIMEOUT = 0.02 # serial read timeout (s) — non-blocking in practice
|
||||||
TCP_TIMEOUT = 0.02 # socket recv timeout (s)
|
TCP_TIMEOUT = 0.02 # socket recv timeout (s)
|
||||||
BOOT_DELAY = 2.0 # seconds to wait after opening serial port before
|
BOOT_DELAY = 8.0 # seconds to wait after opening serial port before
|
||||||
# forwarding data — mirrors the unit's startup beep
|
# forwarding data — unit cold-boot (beep + OS init)
|
||||||
|
# takes 5-10s from first RS-232 line assertion.
|
||||||
|
# Set to 0 if unit was already running before connect.
|
||||||
|
|
||||||
|
|
||||||
# ── Bridge session ─────────────────────────────────────────────────────────────
|
# ── Bridge session ─────────────────────────────────────────────────────────────
|
||||||
|
|||||||
@@ -170,6 +170,31 @@ def _build_client(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_tcp(host: Optional[str]) -> bool:
|
||||||
|
return bool(host)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_with_retry(fn, *, is_tcp: bool):
|
||||||
|
"""
|
||||||
|
Call fn() and, for TCP connections only, retry once on ProtocolError.
|
||||||
|
|
||||||
|
Rationale: when a MiniMate Plus is cold (just had its serial lines asserted
|
||||||
|
by the modem or a local bridge), it takes 5-10 seconds to boot before it
|
||||||
|
will respond to POLL_PROBE. The first request may time out during that boot
|
||||||
|
window; a single automatic retry is enough to recover once the unit is up.
|
||||||
|
|
||||||
|
Serial connections are NOT retried — a timeout there usually means a real
|
||||||
|
problem (wrong port, wrong baud, cable unplugged).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return fn()
|
||||||
|
except ProtocolError as exc:
|
||||||
|
if not is_tcp:
|
||||||
|
raise
|
||||||
|
log.info("TCP poll timed out (unit may have been cold) — retrying once")
|
||||||
|
return fn() # let any second failure propagate normally
|
||||||
|
|
||||||
|
|
||||||
# ── Endpoints ──────────────────────────────────────────────────────────────────
|
# ── Endpoints ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
@@ -195,8 +220,10 @@ def device_info(
|
|||||||
log.info("GET /device/info port=%s host=%s tcp_port=%d", port, host, tcp_port)
|
log.info("GET /device/info port=%s host=%s tcp_port=%d", port, host, tcp_port)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with _build_client(port, baud, host, tcp_port) as client:
|
def _do():
|
||||||
info = client.connect()
|
with _build_client(port, baud, host, tcp_port) as client:
|
||||||
|
return client.connect()
|
||||||
|
info = _run_with_retry(_do, is_tcp=_is_tcp(host))
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except ProtocolError as exc:
|
except ProtocolError as exc:
|
||||||
@@ -242,9 +269,10 @@ def device_events(
|
|||||||
log.info("GET /device/events port=%s host=%s", port, host)
|
log.info("GET /device/events port=%s host=%s", port, host)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with _build_client(port, baud, host, tcp_port) as client:
|
def _do():
|
||||||
info = client.connect()
|
with _build_client(port, baud, host, tcp_port) as client:
|
||||||
events = client.get_events()
|
return client.connect(), client.get_events()
|
||||||
|
info, events = _run_with_retry(_do, is_tcp=_is_tcp(host))
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except ProtocolError as exc:
|
except ProtocolError as exc:
|
||||||
@@ -278,9 +306,11 @@ def device_event(
|
|||||||
log.info("GET /device/event/%d port=%s host=%s", index, port, host)
|
log.info("GET /device/event/%d port=%s host=%s", index, port, host)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with _build_client(port, baud, host, tcp_port) as client:
|
def _do():
|
||||||
client.connect()
|
with _build_client(port, baud, host, tcp_port) as client:
|
||||||
events = client.get_events()
|
client.connect()
|
||||||
|
return client.get_events()
|
||||||
|
events = _run_with_retry(_do, is_tcp=_is_tcp(host))
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except ProtocolError as exc:
|
except ProtocolError as exc:
|
||||||
|
|||||||
Reference in New Issue
Block a user