From d92d01dc564b76fb1747439902e46467a564bbd6 Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 9 Jun 2026 22:27:01 +0000 Subject: [PATCH] fix(slm): dashboard status from SLMM's cached roster, not a device call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "No recent check-in" read a roster field (slm_last_check) that nothing stamps, and the live-status fetch hit /measurement-state — which sends Measure? to the DEVICE every refresh, competing with DOD polling. Now read SLMM's /roster once: it carries each unit's cached NL43Status (last_seen, measurement_state) — a cache read, no device call. is_recent is derived from last_seen (advances only on a successful monitor poll, so staleness == not being reached) within 5 min, for all non-retired units (benched units can still be monitored). Net: fewer device calls AND the dashboard reflects the live monitor. Co-Authored-By: Claude Opus 4.8 (1M context) --- backend/routers/slm_dashboard.py | 48 ++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/backend/routers/slm_dashboard.py b/backend/routers/slm_dashboard.py index 3b93488..dfcdb80 100644 --- a/backend/routers/slm_dashboard.py +++ b/backend/routers/slm_dashboard.py @@ -91,29 +91,41 @@ async def get_slm_units( one_hour_ago = datetime.utcnow() - timedelta(hours=1) for unit in units: + # Legacy default from the roster field; refined from SLMM's cached status below. unit.is_recent = bool(unit.slm_last_check and unit.slm_last_check > one_hour_ago) + unit.measurement_state = None if include_measurement: - async def fetch_measurement_state(client: httpx.AsyncClient, unit_id: str) -> str | None: - try: - response = await client.get(f"{SLMM_BASE_URL}/api/nl43/{unit_id}/measurement-state") - if response.status_code == 200: - return response.json().get("measurement_state") - except Exception: - return None - return None - - deployed_units = [unit for unit in units if unit.deployed and not unit.retired] - if deployed_units: + # SLMM's /roster carries each unit's CACHED status (last_seen, + # measurement_state) from NL43Status — a DB read on SLMM's side, NOT a device + # call. The live monitor refreshes that cache ~every 1.3s, so this reflects + # real monitoring without sending Measure? to the device (which the old + # /measurement-state did) and competing with DOD polling. One call covers all. + slmm_status = {} + try: async with httpx.AsyncClient(timeout=3.0) as client: - tasks = [fetch_measurement_state(client, unit.id) for unit in deployed_units] - results = await asyncio.gather(*tasks, return_exceptions=True) + r = await client.get(f"{SLMM_BASE_URL}/api/nl43/roster") + if r.status_code == 200: + for dev in (r.json().get("devices") or []): + slmm_status[dev.get("unit_id")] = dev.get("status") or {} + except Exception: + slmm_status = {} - for unit, state in zip(deployed_units, results): - if isinstance(state, Exception): - unit.measurement_state = None - else: - unit.measurement_state = state + # "Recent" = the monitor has a fresh successful read. last_seen only advances + # on a successful poll, so staleness == the device isn't being reached. + recent_cutoff = datetime.utcnow() - timedelta(minutes=5) + for unit in units: + st = slmm_status.get(unit.id) + if not st: + continue + unit.measurement_state = st.get("measurement_state") + last_seen = st.get("last_seen") + if last_seen: + try: + ls = datetime.fromisoformat(last_seen.replace("Z", "")) + unit.is_recent = ls > recent_cutoff + except Exception: + pass return templates.TemplateResponse("partials/slm_device_list.html", { "request": request,