fix(slm): dashboard status from SLMM's cached roster, not a device call
"No recent check-in" read a roster field (slm_last_check) that nothing stamps, and the live-status fetch hit /measurement-state — which sends Measure? to the DEVICE every refresh, competing with DOD polling. Now read SLMM's /roster once: it carries each unit's cached NL43Status (last_seen, measurement_state) — a cache read, no device call. is_recent is derived from last_seen (advances only on a successful monitor poll, so staleness == not being reached) within 5 min, for all non-retired units (benched units can still be monitored). Net: fewer device calls AND the dashboard reflects the live monitor. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -91,29 +91,41 @@ async def get_slm_units(
|
|||||||
|
|
||||||
one_hour_ago = datetime.utcnow() - timedelta(hours=1)
|
one_hour_ago = datetime.utcnow() - timedelta(hours=1)
|
||||||
for unit in units:
|
for unit in units:
|
||||||
|
# Legacy default from the roster field; refined from SLMM's cached status below.
|
||||||
unit.is_recent = bool(unit.slm_last_check and unit.slm_last_check > one_hour_ago)
|
unit.is_recent = bool(unit.slm_last_check and unit.slm_last_check > one_hour_ago)
|
||||||
|
unit.measurement_state = None
|
||||||
|
|
||||||
if include_measurement:
|
if include_measurement:
|
||||||
async def fetch_measurement_state(client: httpx.AsyncClient, unit_id: str) -> str | None:
|
# SLMM's /roster carries each unit's CACHED status (last_seen,
|
||||||
try:
|
# measurement_state) from NL43Status — a DB read on SLMM's side, NOT a device
|
||||||
response = await client.get(f"{SLMM_BASE_URL}/api/nl43/{unit_id}/measurement-state")
|
# call. The live monitor refreshes that cache ~every 1.3s, so this reflects
|
||||||
if response.status_code == 200:
|
# real monitoring without sending Measure? to the device (which the old
|
||||||
return response.json().get("measurement_state")
|
# /measurement-state did) and competing with DOD polling. One call covers all.
|
||||||
except Exception:
|
slmm_status = {}
|
||||||
return None
|
try:
|
||||||
return None
|
|
||||||
|
|
||||||
deployed_units = [unit for unit in units if unit.deployed and not unit.retired]
|
|
||||||
if deployed_units:
|
|
||||||
async with httpx.AsyncClient(timeout=3.0) as client:
|
async with httpx.AsyncClient(timeout=3.0) as client:
|
||||||
tasks = [fetch_measurement_state(client, unit.id) for unit in deployed_units]
|
r = await client.get(f"{SLMM_BASE_URL}/api/nl43/roster")
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
if r.status_code == 200:
|
||||||
|
for dev in (r.json().get("devices") or []):
|
||||||
|
slmm_status[dev.get("unit_id")] = dev.get("status") or {}
|
||||||
|
except Exception:
|
||||||
|
slmm_status = {}
|
||||||
|
|
||||||
for unit, state in zip(deployed_units, results):
|
# "Recent" = the monitor has a fresh successful read. last_seen only advances
|
||||||
if isinstance(state, Exception):
|
# on a successful poll, so staleness == the device isn't being reached.
|
||||||
unit.measurement_state = None
|
recent_cutoff = datetime.utcnow() - timedelta(minutes=5)
|
||||||
else:
|
for unit in units:
|
||||||
unit.measurement_state = state
|
st = slmm_status.get(unit.id)
|
||||||
|
if not st:
|
||||||
|
continue
|
||||||
|
unit.measurement_state = st.get("measurement_state")
|
||||||
|
last_seen = st.get("last_seen")
|
||||||
|
if last_seen:
|
||||||
|
try:
|
||||||
|
ls = datetime.fromisoformat(last_seen.replace("Z", ""))
|
||||||
|
unit.is_recent = ls > recent_cutoff
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return templates.TemplateResponse("partials/slm_device_list.html", {
|
return templates.TemplateResponse("partials/slm_device_list.html", {
|
||||||
"request": request,
|
"request": request,
|
||||||
|
|||||||
Reference in New Issue
Block a user