feat(admin): SFM + SLMM diagnostic pages under Developer settings

New /admin/sfm page (linked from Settings → Developer):
- Health banner — green/red with version + last-checked timestamp
- Connection panel — shows SFM_BASE_URL terra-view is configured with
- 4 KPI tiles — known units, total events, stale monitor_log rows,
  stale ach_sessions rows (the deprecated tables from the paused
  Python-ACH experiment, useful for confirming nothing's growing them)
- Per-unit roll-up table — serial, last_seen, event count, stale
  per-unit counts, sourced from SFM's /db/units
- Recent events with forwarding latency — color-coded gap between
  the event's recorded timestamp and SFM ingest time, so operators
  can spot watchers that are forwarding stale files (e.g. after a
  jobsite outage)
- Raw API tester — text input + GET button against any /api/sfm/*
  path, response rendered as prettified JSON

New /admin/slmm page — same layout, stripped down to health + connection
+ raw API tester.  For per-device SLM control the existing
/sound-level-meters dashboard remains the right entry point.

Backend (backend/routers/admin_modules.py):
- GET /admin/sfm, GET /admin/slmm — HTML pages
- GET /api/admin/sfm/overview — single aggregated probe that returns
  health, units, last 25 events with computed latency, stale-table
  counts, cache stats.  Tolerant of partial failures: any sub-fetch
  error is captured into errors{} so a flaky SFM endpoint doesn't
  break the whole page
- GET /api/admin/slmm/overview — health + connection info only for now

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-14 17:53:43 +00:00
parent 155f0b007a
commit 904ff04440
5 changed files with 641 additions and 1 deletions
+3
View File
@@ -106,6 +106,9 @@ app.include_router(settings.router)
from backend.routers import watcher_manager
app.include_router(watcher_manager.router)
from backend.routers import admin_modules
app.include_router(admin_modules.router)
# Projects system routers
app.include_router(projects.router)
app.include_router(project_locations.router)
+209
View File
@@ -0,0 +1,209 @@
"""
Admin / diagnostic pages for the device modules (SFM, SLMM).
These pages live under /admin/{module} and exist purely so an operator can
peek under the hood and confirm the module is reachable, what data it's
holding, and whether the proxy from terra-view is healthy.
Routes:
GET /admin/sfm — SFM diagnostic page
GET /admin/slmm — SLMM diagnostic page
API helpers (called by the HTML pages via fetch):
GET /api/admin/sfm/overview — aggregated SFM health + db stats in one call
GET /api/admin/slmm/overview — aggregated SLMM health + device count
The pages are intentionally read-only. Any actual administration of SFM
or SLMM happens in those modules directly.
"""
import logging
import os
from datetime import datetime, timezone
from typing import Any, Dict
import httpx
from fastapi import APIRouter, Depends, Request
from fastapi.responses import HTMLResponse, JSONResponse
from sqlalchemy.orm import Session
from backend.database import get_db
from backend.templates_config import templates
log = logging.getLogger(__name__)
router = APIRouter()
SFM_BASE_URL = os.getenv("SFM_BASE_URL", "http://localhost:8200")
SLMM_BASE_URL = os.getenv("SLMM_BASE_URL", "http://localhost:8100")
# ── SFM ───────────────────────────────────────────────────────────────────────
@router.get("/admin/sfm", response_class=HTMLResponse)
def admin_sfm_page(request: Request):
return templates.TemplateResponse("admin_sfm.html", {
"request": request,
"sfm_base_url": SFM_BASE_URL,
})
@router.get("/api/admin/sfm/overview")
async def admin_sfm_overview() -> JSONResponse:
"""Aggregated SFM diagnostic snapshot.
Returns health, db stats, stale-table counts, per-unit summary, and
recent events with forwarding latency. Tolerant of partial failures:
any individual sub-fetch error is captured into its section, so a flaky
sub-endpoint doesn't break the whole page.
"""
overview: Dict[str, Any] = {
"sfm_base_url": SFM_BASE_URL,
"checked_at": datetime.now(timezone.utc).isoformat(),
"health": None,
"reachable": False,
"units": [],
"events": [],
"stale": {
"monitor_log": None,
"sessions": None,
},
"cache_stats": None,
"errors": {},
}
async with httpx.AsyncClient(timeout=5.0) as client:
# Health
try:
r = await client.get(f"{SFM_BASE_URL}/health")
r.raise_for_status()
overview["health"] = r.json()
overview["reachable"] = overview["health"].get("status") == "ok"
except Exception as e: # noqa: BLE001
overview["errors"]["health"] = str(e)
overview["reachable"] = False
# If SFM is down, no point hitting the rest.
if not overview["reachable"]:
return JSONResponse(overview)
# Units
try:
r = await client.get(f"{SFM_BASE_URL}/db/units")
r.raise_for_status()
overview["units"] = r.json() or []
except Exception as e: # noqa: BLE001
overview["errors"]["units"] = str(e)
# Recent events (newest 25 — bigger sample of the call-home stream)
try:
r = await client.get(f"{SFM_BASE_URL}/db/events", params={"limit": 25})
r.raise_for_status()
payload = r.json() or {}
events = payload.get("events", []) or []
# Compute forwarding latency: created_at (SFM ingest) timestamp (event).
now = datetime.now(timezone.utc)
for ev in events:
ev.pop("waveform_blob", None)
ev.pop("a5_pickle_filename", None)
ts_str = ev.get("timestamp")
ca_str = ev.get("created_at")
latency_seconds = None
try:
if ts_str and ca_str:
ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
ca = datetime.fromisoformat(ca_str.replace("Z", "+00:00"))
if ts.tzinfo is None: ts = ts.replace(tzinfo=timezone.utc)
if ca.tzinfo is None: ca = ca.replace(tzinfo=timezone.utc)
latency_seconds = (ca - ts).total_seconds()
except ValueError:
pass
ev["forwarding_latency_seconds"] = latency_seconds
overview["events"] = events
except Exception as e: # noqa: BLE001
overview["errors"]["events"] = str(e)
# Stale tables (deprecated by the watcher-forward pipeline but still
# present in SFM's SQLite). Surface as counts only.
for key, path in (("monitor_log", "/db/monitor_log"),
("sessions", "/db/sessions")):
try:
r = await client.get(f"{SFM_BASE_URL}{path}", params={"limit": 1})
r.raise_for_status()
payload = r.json() or {}
# SFM returns count = total when limit covers all rows; we
# query with limit=1 just to be polite, then ask again with
# a high limit if we need the real total.
first_count = payload.get("count")
if first_count is None:
overview["stale"][key] = None
continue
# Re-query with high limit to get the true total.
r2 = await client.get(f"{SFM_BASE_URL}{path}", params={"limit": 100000})
r2.raise_for_status()
overview["stale"][key] = (r2.json() or {}).get("count")
except Exception as e: # noqa: BLE001
overview["errors"][f"stale_{key}"] = str(e)
# Cache stats (in-memory device cache on SFM)
try:
r = await client.get(f"{SFM_BASE_URL}/cache/stats")
r.raise_for_status()
overview["cache_stats"] = r.json()
except Exception as e: # noqa: BLE001
overview["errors"]["cache_stats"] = str(e)
# Aggregate counts the UI can render without re-walking arrays
overview["totals"] = {
"units": len(overview["units"]),
"events_total": sum(u.get("total_events", 0) for u in overview["units"]),
"stale_monitor_log": overview["stale"]["monitor_log"],
"stale_sessions": overview["stale"]["sessions"],
}
return JSONResponse(overview)
# ── SLMM ──────────────────────────────────────────────────────────────────────
@router.get("/admin/slmm", response_class=HTMLResponse)
def admin_slmm_page(request: Request):
return templates.TemplateResponse("admin_slmm.html", {
"request": request,
"slmm_base_url": SLMM_BASE_URL,
})
@router.get("/api/admin/slmm/overview")
async def admin_slmm_overview() -> JSONResponse:
"""Aggregated SLMM diagnostic snapshot."""
overview: Dict[str, Any] = {
"slmm_base_url": SLMM_BASE_URL,
"checked_at": datetime.now(timezone.utc).isoformat(),
"health": None,
"reachable": False,
"devices": [],
"errors": {},
}
async with httpx.AsyncClient(timeout=5.0) as client:
try:
r = await client.get(f"{SLMM_BASE_URL}/health")
r.raise_for_status()
overview["health"] = r.json()
overview["reachable"] = True
except Exception as e: # noqa: BLE001
overview["errors"]["health"] = str(e)
return JSONResponse(overview)
# Pull a roster of configured devices (SLMM exposes per-unit
# config + status under /api/nl43/*). This is a best-effort probe
# — SLMM doesn't expose a "list all devices" endpoint, so we ask
# terra-view's RosterUnit table what serials it knows about for
# SLMs and just check each one. For now, just surface the health
# payload and let the operator click through to /sound-level-meters
# for the per-device details.
return JSONResponse(overview)