feat: per-unit deactivate and global SLMM standby

Lets an instance stop occupying a device's single TCP connection slot so
another instance (e.g. prod) can take over.

Per-unit:
- POST /api/nl43/{unit_id}/deactivate — poll_enabled=False (persisted) +
  drop the connection (waits up to 10s for in-flight ops via the device
  lock, then discards). Unit stays dormant across restarts.
- POST /api/nl43/{unit_id}/activate — re-enable polling.

Global standby:
- POST /api/nl43/_system/standby — poller idles and releases ALL
  connections; the loop keeps re-releasing so the instance holds no slots.
- POST /api/nl43/_system/resume — resume polling.
- GET  /api/nl43/_system/status — active vs standby + active_connections.
- SLMM_POLLING_ENABLED=false starts an instance in standby (persistent
  way to keep a dev box from latching onto a prod-owned device).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-08 22:45:52 +00:00
parent 0793e7df01
commit b954eb8c89
2 changed files with 137 additions and 4 deletions
+93
View File
@@ -153,6 +153,99 @@ async def disconnect_device(unit_id: str, db: Session = Depends(get_db)):
}
@router.post("/{unit_id}/deactivate")
async def deactivate_device(unit_id: str, db: Session = Depends(get_db)):
"""Make a single unit dormant: stop background polling for it AND drop its
connection, freeing the device's connection slot. poll_enabled=False is
persisted, so the unit stays dormant across restarts until /activate.
"""
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
if not cfg:
raise HTTPException(status_code=404, detail="NL43 config not found")
cfg.poll_enabled = False
db.commit()
from app.services import _connection_pool, _get_device_lock
device_key = f"{cfg.host}:{cfg.tcp_port}"
# Wait briefly for any in-flight poll/command to finish (so its connection is
# back in the pool), then drop it. If a long-lived stream holds the lock we
# don't block forever — discard the pooled connection regardless.
lock = await _get_device_lock(device_key)
acquired = False
try:
await asyncio.wait_for(lock.acquire(), timeout=10.0)
acquired = True
except asyncio.TimeoutError:
acquired = False
try:
await _connection_pool.discard(device_key)
finally:
if acquired:
lock.release()
return {
"status": "ok",
"unit_id": unit_id,
"poll_enabled": False,
"message": "Polling disabled and connection closed for this unit",
}
@router.post("/{unit_id}/activate")
async def activate_device(unit_id: str, db: Session = Depends(get_db)):
"""Resume background polling for a unit previously deactivated."""
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
if not cfg:
raise HTTPException(status_code=404, detail="NL43 config not found")
cfg.poll_enabled = True
db.commit()
return {
"status": "ok",
"unit_id": unit_id,
"poll_enabled": True,
"message": "Polling enabled for this unit",
}
@router.get("/_system/status")
async def system_status():
"""Report whether this SLMM instance is actively polling or in standby."""
from app.background_poller import poller
from app.services import _connection_pool
return {
"status": "ok",
"mode": "active" if poller.is_active() else "standby",
"polling_active": poller.is_active(),
"active_connections": _connection_pool.get_stats().get("active_connections", 0),
}
@router.post("/_system/standby")
async def system_standby():
"""Put this SLMM instance into standby: stop polling ALL devices and release
every connection, so it stops occupying device slots (e.g. so a prod instance
can take over). Runtime-only — on restart the instance returns to its
SLMM_POLLING_ENABLED default.
"""
from app.background_poller import poller
await poller.set_active(False)
return {"status": "ok", "mode": "standby",
"message": "Polling stopped and all device connections released"}
@router.post("/_system/resume")
async def system_resume():
"""Resume polling after standby (global)."""
from app.background_poller import poller
await poller.set_active(True)
return {"status": "ok", "mode": "active", "message": "Polling resumed"}
# ============================================================================
# GLOBAL POLLING STATUS ENDPOINT (must be before /{unit_id} routes)
# ============================================================================