From de02f9cccfdc48f68a3b1210dfc0cc9372dd6310 Mon Sep 17 00:00:00 2001 From: Brian Harrison Date: Tue, 31 Mar 2026 12:02:52 -0400 Subject: [PATCH] Handle cold-boot timeout for TCP connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bridges/tcp_serial_bridge.py: increase default boot_delay 2s → 8s to cover MiniMate Plus cold-start time (unit wakes from RS-232 line assertion but takes 5-10s to be ready for POLL_PROBE). - sfm/server.py: add _run_with_retry() — on TCP connections only, retries once on ProtocolError. Serial timeouts are not retried (usually a real fault). Confirmed behaviour: unit wakes purely from RS-232 line voltage, no software wake-up frame needed. --- bridges/tcp_serial_bridge.py | 6 +++-- sfm/server.py | 46 +++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/bridges/tcp_serial_bridge.py b/bridges/tcp_serial_bridge.py index 333a39b..fde91ff 100644 --- a/bridges/tcp_serial_bridge.py +++ b/bridges/tcp_serial_bridge.py @@ -55,8 +55,10 @@ DEFAULT_TCP_PORT = 12345 CHUNK = 256 # bytes per read call SERIAL_TIMEOUT = 0.02 # serial read timeout (s) — non-blocking in practice TCP_TIMEOUT = 0.02 # socket recv timeout (s) -BOOT_DELAY = 2.0 # seconds to wait after opening serial port before - # forwarding data — mirrors the unit's startup beep +BOOT_DELAY = 8.0 # seconds to wait after opening serial port before + # forwarding data — unit cold-boot (beep + OS init) + # takes 5-10s from first RS-232 line assertion. + # Set to 0 if unit was already running before connect. # ── Bridge session ───────────────────────────────────────────────────────────── diff --git a/sfm/server.py b/sfm/server.py index 41c06f7..847705a 100644 --- a/sfm/server.py +++ b/sfm/server.py @@ -170,6 +170,31 @@ def _build_client( ) +def _is_tcp(host: Optional[str]) -> bool: + return bool(host) + + +def _run_with_retry(fn, *, is_tcp: bool): + """ + Call fn() and, for TCP connections only, retry once on ProtocolError. + + Rationale: when a MiniMate Plus is cold (just had its serial lines asserted + by the modem or a local bridge), it takes 5-10 seconds to boot before it + will respond to POLL_PROBE. The first request may time out during that boot + window; a single automatic retry is enough to recover once the unit is up. + + Serial connections are NOT retried — a timeout there usually means a real + problem (wrong port, wrong baud, cable unplugged). + """ + try: + return fn() + except ProtocolError as exc: + if not is_tcp: + raise + log.info("TCP poll timed out (unit may have been cold) — retrying once") + return fn() # let any second failure propagate normally + + # ── Endpoints ────────────────────────────────────────────────────────────────── @app.get("/health") @@ -195,8 +220,10 @@ def device_info( log.info("GET /device/info port=%s host=%s tcp_port=%d", port, host, tcp_port) try: - with _build_client(port, baud, host, tcp_port) as client: - info = client.connect() + def _do(): + with _build_client(port, baud, host, tcp_port) as client: + return client.connect() + info = _run_with_retry(_do, is_tcp=_is_tcp(host)) except HTTPException: raise except ProtocolError as exc: @@ -242,9 +269,10 @@ def device_events( log.info("GET /device/events port=%s host=%s", port, host) try: - with _build_client(port, baud, host, tcp_port) as client: - info = client.connect() - events = client.get_events() + def _do(): + with _build_client(port, baud, host, tcp_port) as client: + return client.connect(), client.get_events() + info, events = _run_with_retry(_do, is_tcp=_is_tcp(host)) except HTTPException: raise except ProtocolError as exc: @@ -278,9 +306,11 @@ def device_event( log.info("GET /device/event/%d port=%s host=%s", index, port, host) try: - with _build_client(port, baud, host, tcp_port) as client: - client.connect() - events = client.get_events() + def _do(): + with _build_client(port, baud, host, tcp_port) as client: + client.connect() + return client.get_events() + events = _run_with_retry(_do, is_tcp=_is_tcp(host)) except HTTPException: raise except ProtocolError as exc: