feat(admin): SFM + SLMM diagnostic pages under Developer settings
New /admin/sfm page (linked from Settings → Developer):
- Health banner — green/red with version + last-checked timestamp
- Connection panel — shows SFM_BASE_URL terra-view is configured with
- 4 KPI tiles — known units, total events, stale monitor_log rows,
stale ach_sessions rows (the deprecated tables from the paused
Python-ACH experiment, useful for confirming nothing's growing them)
- Per-unit roll-up table — serial, last_seen, event count, stale
per-unit counts, sourced from SFM's /db/units
- Recent events with forwarding latency — color-coded gap between
the event's recorded timestamp and SFM ingest time, so operators
can spot watchers that are forwarding stale files (e.g. after a
jobsite outage)
- Raw API tester — text input + GET button against any /api/sfm/*
path, response rendered as prettified JSON
New /admin/slmm page — same layout, stripped down to health + connection
+ raw API tester. For per-device SLM control the existing
/sound-level-meters dashboard remains the right entry point.
Backend (backend/routers/admin_modules.py):
- GET /admin/sfm, GET /admin/slmm — HTML pages
- GET /api/admin/sfm/overview — single aggregated probe that returns
health, units, last 25 events with computed latency, stale-table
counts, cache stats. Tolerant of partial failures: any sub-fetch
error is captured into errors{} so a flaky SFM endpoint doesn't
break the whole page
- GET /api/admin/slmm/overview — health + connection info only for now
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -106,6 +106,9 @@ app.include_router(settings.router)
|
||||
from backend.routers import watcher_manager
|
||||
app.include_router(watcher_manager.router)
|
||||
|
||||
from backend.routers import admin_modules
|
||||
app.include_router(admin_modules.router)
|
||||
|
||||
# Projects system routers
|
||||
app.include_router(projects.router)
|
||||
app.include_router(project_locations.router)
|
||||
|
||||
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
Admin / diagnostic pages for the device modules (SFM, SLMM).
|
||||
|
||||
These pages live under /admin/{module} and exist purely so an operator can
|
||||
peek under the hood and confirm the module is reachable, what data it's
|
||||
holding, and whether the proxy from terra-view is healthy.
|
||||
|
||||
Routes:
|
||||
GET /admin/sfm — SFM diagnostic page
|
||||
GET /admin/slmm — SLMM diagnostic page
|
||||
|
||||
API helpers (called by the HTML pages via fetch):
|
||||
GET /api/admin/sfm/overview — aggregated SFM health + db stats in one call
|
||||
GET /api/admin/slmm/overview — aggregated SLMM health + device count
|
||||
|
||||
The pages are intentionally read-only. Any actual administration of SFM
|
||||
or SLMM happens in those modules directly.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend.database import get_db
|
||||
from backend.templates_config import templates
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
SFM_BASE_URL = os.getenv("SFM_BASE_URL", "http://localhost:8200")
|
||||
SLMM_BASE_URL = os.getenv("SLMM_BASE_URL", "http://localhost:8100")
|
||||
|
||||
|
||||
# ── SFM ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/admin/sfm", response_class=HTMLResponse)
|
||||
def admin_sfm_page(request: Request):
|
||||
return templates.TemplateResponse("admin_sfm.html", {
|
||||
"request": request,
|
||||
"sfm_base_url": SFM_BASE_URL,
|
||||
})
|
||||
|
||||
|
||||
@router.get("/api/admin/sfm/overview")
|
||||
async def admin_sfm_overview() -> JSONResponse:
|
||||
"""Aggregated SFM diagnostic snapshot.
|
||||
|
||||
Returns health, db stats, stale-table counts, per-unit summary, and
|
||||
recent events with forwarding latency. Tolerant of partial failures:
|
||||
any individual sub-fetch error is captured into its section, so a flaky
|
||||
sub-endpoint doesn't break the whole page.
|
||||
"""
|
||||
overview: Dict[str, Any] = {
|
||||
"sfm_base_url": SFM_BASE_URL,
|
||||
"checked_at": datetime.now(timezone.utc).isoformat(),
|
||||
"health": None,
|
||||
"reachable": False,
|
||||
"units": [],
|
||||
"events": [],
|
||||
"stale": {
|
||||
"monitor_log": None,
|
||||
"sessions": None,
|
||||
},
|
||||
"cache_stats": None,
|
||||
"errors": {},
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
# Health
|
||||
try:
|
||||
r = await client.get(f"{SFM_BASE_URL}/health")
|
||||
r.raise_for_status()
|
||||
overview["health"] = r.json()
|
||||
overview["reachable"] = overview["health"].get("status") == "ok"
|
||||
except Exception as e: # noqa: BLE001
|
||||
overview["errors"]["health"] = str(e)
|
||||
overview["reachable"] = False
|
||||
|
||||
# If SFM is down, no point hitting the rest.
|
||||
if not overview["reachable"]:
|
||||
return JSONResponse(overview)
|
||||
|
||||
# Units
|
||||
try:
|
||||
r = await client.get(f"{SFM_BASE_URL}/db/units")
|
||||
r.raise_for_status()
|
||||
overview["units"] = r.json() or []
|
||||
except Exception as e: # noqa: BLE001
|
||||
overview["errors"]["units"] = str(e)
|
||||
|
||||
# Recent events (newest 25 — bigger sample of the call-home stream)
|
||||
try:
|
||||
r = await client.get(f"{SFM_BASE_URL}/db/events", params={"limit": 25})
|
||||
r.raise_for_status()
|
||||
payload = r.json() or {}
|
||||
events = payload.get("events", []) or []
|
||||
# Compute forwarding latency: created_at (SFM ingest) − timestamp (event).
|
||||
now = datetime.now(timezone.utc)
|
||||
for ev in events:
|
||||
ev.pop("waveform_blob", None)
|
||||
ev.pop("a5_pickle_filename", None)
|
||||
ts_str = ev.get("timestamp")
|
||||
ca_str = ev.get("created_at")
|
||||
latency_seconds = None
|
||||
try:
|
||||
if ts_str and ca_str:
|
||||
ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
||||
ca = datetime.fromisoformat(ca_str.replace("Z", "+00:00"))
|
||||
if ts.tzinfo is None: ts = ts.replace(tzinfo=timezone.utc)
|
||||
if ca.tzinfo is None: ca = ca.replace(tzinfo=timezone.utc)
|
||||
latency_seconds = (ca - ts).total_seconds()
|
||||
except ValueError:
|
||||
pass
|
||||
ev["forwarding_latency_seconds"] = latency_seconds
|
||||
overview["events"] = events
|
||||
except Exception as e: # noqa: BLE001
|
||||
overview["errors"]["events"] = str(e)
|
||||
|
||||
# Stale tables (deprecated by the watcher-forward pipeline but still
|
||||
# present in SFM's SQLite). Surface as counts only.
|
||||
for key, path in (("monitor_log", "/db/monitor_log"),
|
||||
("sessions", "/db/sessions")):
|
||||
try:
|
||||
r = await client.get(f"{SFM_BASE_URL}{path}", params={"limit": 1})
|
||||
r.raise_for_status()
|
||||
payload = r.json() or {}
|
||||
# SFM returns count = total when limit covers all rows; we
|
||||
# query with limit=1 just to be polite, then ask again with
|
||||
# a high limit if we need the real total.
|
||||
first_count = payload.get("count")
|
||||
if first_count is None:
|
||||
overview["stale"][key] = None
|
||||
continue
|
||||
# Re-query with high limit to get the true total.
|
||||
r2 = await client.get(f"{SFM_BASE_URL}{path}", params={"limit": 100000})
|
||||
r2.raise_for_status()
|
||||
overview["stale"][key] = (r2.json() or {}).get("count")
|
||||
except Exception as e: # noqa: BLE001
|
||||
overview["errors"][f"stale_{key}"] = str(e)
|
||||
|
||||
# Cache stats (in-memory device cache on SFM)
|
||||
try:
|
||||
r = await client.get(f"{SFM_BASE_URL}/cache/stats")
|
||||
r.raise_for_status()
|
||||
overview["cache_stats"] = r.json()
|
||||
except Exception as e: # noqa: BLE001
|
||||
overview["errors"]["cache_stats"] = str(e)
|
||||
|
||||
# Aggregate counts the UI can render without re-walking arrays
|
||||
overview["totals"] = {
|
||||
"units": len(overview["units"]),
|
||||
"events_total": sum(u.get("total_events", 0) for u in overview["units"]),
|
||||
"stale_monitor_log": overview["stale"]["monitor_log"],
|
||||
"stale_sessions": overview["stale"]["sessions"],
|
||||
}
|
||||
|
||||
return JSONResponse(overview)
|
||||
|
||||
|
||||
# ── SLMM ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/admin/slmm", response_class=HTMLResponse)
|
||||
def admin_slmm_page(request: Request):
|
||||
return templates.TemplateResponse("admin_slmm.html", {
|
||||
"request": request,
|
||||
"slmm_base_url": SLMM_BASE_URL,
|
||||
})
|
||||
|
||||
|
||||
@router.get("/api/admin/slmm/overview")
|
||||
async def admin_slmm_overview() -> JSONResponse:
|
||||
"""Aggregated SLMM diagnostic snapshot."""
|
||||
overview: Dict[str, Any] = {
|
||||
"slmm_base_url": SLMM_BASE_URL,
|
||||
"checked_at": datetime.now(timezone.utc).isoformat(),
|
||||
"health": None,
|
||||
"reachable": False,
|
||||
"devices": [],
|
||||
"errors": {},
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
try:
|
||||
r = await client.get(f"{SLMM_BASE_URL}/health")
|
||||
r.raise_for_status()
|
||||
overview["health"] = r.json()
|
||||
overview["reachable"] = True
|
||||
except Exception as e: # noqa: BLE001
|
||||
overview["errors"]["health"] = str(e)
|
||||
return JSONResponse(overview)
|
||||
|
||||
# Pull a roster of configured devices (SLMM exposes per-unit
|
||||
# config + status under /api/nl43/*). This is a best-effort probe
|
||||
# — SLMM doesn't expose a "list all devices" endpoint, so we ask
|
||||
# terra-view's RosterUnit table what serials it knows about for
|
||||
# SLMs and just check each one. For now, just surface the health
|
||||
# payload and let the operator click through to /sound-level-meters
|
||||
# for the per-device details.
|
||||
|
||||
return JSONResponse(overview)
|
||||
Reference in New Issue
Block a user