diff --git a/backend/main.py b/backend/main.py index 3009107..8029f19 100644 --- a/backend/main.py +++ b/backend/main.py @@ -106,6 +106,9 @@ app.include_router(settings.router) from backend.routers import watcher_manager app.include_router(watcher_manager.router) +from backend.routers import admin_modules +app.include_router(admin_modules.router) + # Projects system routers app.include_router(projects.router) app.include_router(project_locations.router) diff --git a/backend/routers/admin_modules.py b/backend/routers/admin_modules.py new file mode 100644 index 0000000..8f9dc6c --- /dev/null +++ b/backend/routers/admin_modules.py @@ -0,0 +1,209 @@ +""" +Admin / diagnostic pages for the device modules (SFM, SLMM). + +These pages live under /admin/{module} and exist purely so an operator can +peek under the hood and confirm the module is reachable, what data it's +holding, and whether the proxy from terra-view is healthy. + +Routes: + GET /admin/sfm — SFM diagnostic page + GET /admin/slmm — SLMM diagnostic page + +API helpers (called by the HTML pages via fetch): + GET /api/admin/sfm/overview — aggregated SFM health + db stats in one call + GET /api/admin/slmm/overview — aggregated SLMM health + device count + +The pages are intentionally read-only. Any actual administration of SFM +or SLMM happens in those modules directly. +""" + +import logging +import os +from datetime import datetime, timezone +from typing import Any, Dict + +import httpx +from fastapi import APIRouter, Depends, Request +from fastapi.responses import HTMLResponse, JSONResponse +from sqlalchemy.orm import Session + +from backend.database import get_db +from backend.templates_config import templates + +log = logging.getLogger(__name__) + +router = APIRouter() + +SFM_BASE_URL = os.getenv("SFM_BASE_URL", "http://localhost:8200") +SLMM_BASE_URL = os.getenv("SLMM_BASE_URL", "http://localhost:8100") + + +# ── SFM ─────────────────────────────────────────────────────────────────────── + + +@router.get("/admin/sfm", response_class=HTMLResponse) +def admin_sfm_page(request: Request): + return templates.TemplateResponse("admin_sfm.html", { + "request": request, + "sfm_base_url": SFM_BASE_URL, + }) + + +@router.get("/api/admin/sfm/overview") +async def admin_sfm_overview() -> JSONResponse: + """Aggregated SFM diagnostic snapshot. + + Returns health, db stats, stale-table counts, per-unit summary, and + recent events with forwarding latency. Tolerant of partial failures: + any individual sub-fetch error is captured into its section, so a flaky + sub-endpoint doesn't break the whole page. + """ + overview: Dict[str, Any] = { + "sfm_base_url": SFM_BASE_URL, + "checked_at": datetime.now(timezone.utc).isoformat(), + "health": None, + "reachable": False, + "units": [], + "events": [], + "stale": { + "monitor_log": None, + "sessions": None, + }, + "cache_stats": None, + "errors": {}, + } + + async with httpx.AsyncClient(timeout=5.0) as client: + # Health + try: + r = await client.get(f"{SFM_BASE_URL}/health") + r.raise_for_status() + overview["health"] = r.json() + overview["reachable"] = overview["health"].get("status") == "ok" + except Exception as e: # noqa: BLE001 + overview["errors"]["health"] = str(e) + overview["reachable"] = False + + # If SFM is down, no point hitting the rest. + if not overview["reachable"]: + return JSONResponse(overview) + + # Units + try: + r = await client.get(f"{SFM_BASE_URL}/db/units") + r.raise_for_status() + overview["units"] = r.json() or [] + except Exception as e: # noqa: BLE001 + overview["errors"]["units"] = str(e) + + # Recent events (newest 25 — bigger sample of the call-home stream) + try: + r = await client.get(f"{SFM_BASE_URL}/db/events", params={"limit": 25}) + r.raise_for_status() + payload = r.json() or {} + events = payload.get("events", []) or [] + # Compute forwarding latency: created_at (SFM ingest) − timestamp (event). + now = datetime.now(timezone.utc) + for ev in events: + ev.pop("waveform_blob", None) + ev.pop("a5_pickle_filename", None) + ts_str = ev.get("timestamp") + ca_str = ev.get("created_at") + latency_seconds = None + try: + if ts_str and ca_str: + ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00")) + ca = datetime.fromisoformat(ca_str.replace("Z", "+00:00")) + if ts.tzinfo is None: ts = ts.replace(tzinfo=timezone.utc) + if ca.tzinfo is None: ca = ca.replace(tzinfo=timezone.utc) + latency_seconds = (ca - ts).total_seconds() + except ValueError: + pass + ev["forwarding_latency_seconds"] = latency_seconds + overview["events"] = events + except Exception as e: # noqa: BLE001 + overview["errors"]["events"] = str(e) + + # Stale tables (deprecated by the watcher-forward pipeline but still + # present in SFM's SQLite). Surface as counts only. + for key, path in (("monitor_log", "/db/monitor_log"), + ("sessions", "/db/sessions")): + try: + r = await client.get(f"{SFM_BASE_URL}{path}", params={"limit": 1}) + r.raise_for_status() + payload = r.json() or {} + # SFM returns count = total when limit covers all rows; we + # query with limit=1 just to be polite, then ask again with + # a high limit if we need the real total. + first_count = payload.get("count") + if first_count is None: + overview["stale"][key] = None + continue + # Re-query with high limit to get the true total. + r2 = await client.get(f"{SFM_BASE_URL}{path}", params={"limit": 100000}) + r2.raise_for_status() + overview["stale"][key] = (r2.json() or {}).get("count") + except Exception as e: # noqa: BLE001 + overview["errors"][f"stale_{key}"] = str(e) + + # Cache stats (in-memory device cache on SFM) + try: + r = await client.get(f"{SFM_BASE_URL}/cache/stats") + r.raise_for_status() + overview["cache_stats"] = r.json() + except Exception as e: # noqa: BLE001 + overview["errors"]["cache_stats"] = str(e) + + # Aggregate counts the UI can render without re-walking arrays + overview["totals"] = { + "units": len(overview["units"]), + "events_total": sum(u.get("total_events", 0) for u in overview["units"]), + "stale_monitor_log": overview["stale"]["monitor_log"], + "stale_sessions": overview["stale"]["sessions"], + } + + return JSONResponse(overview) + + +# ── SLMM ────────────────────────────────────────────────────────────────────── + + +@router.get("/admin/slmm", response_class=HTMLResponse) +def admin_slmm_page(request: Request): + return templates.TemplateResponse("admin_slmm.html", { + "request": request, + "slmm_base_url": SLMM_BASE_URL, + }) + + +@router.get("/api/admin/slmm/overview") +async def admin_slmm_overview() -> JSONResponse: + """Aggregated SLMM diagnostic snapshot.""" + overview: Dict[str, Any] = { + "slmm_base_url": SLMM_BASE_URL, + "checked_at": datetime.now(timezone.utc).isoformat(), + "health": None, + "reachable": False, + "devices": [], + "errors": {}, + } + + async with httpx.AsyncClient(timeout=5.0) as client: + try: + r = await client.get(f"{SLMM_BASE_URL}/health") + r.raise_for_status() + overview["health"] = r.json() + overview["reachable"] = True + except Exception as e: # noqa: BLE001 + overview["errors"]["health"] = str(e) + return JSONResponse(overview) + + # Pull a roster of configured devices (SLMM exposes per-unit + # config + status under /api/nl43/*). This is a best-effort probe + # — SLMM doesn't expose a "list all devices" endpoint, so we ask + # terra-view's RosterUnit table what serials it knows about for + # SLMs and just check each one. For now, just surface the health + # payload and let the operator click through to /sound-level-meters + # for the per-device details. + + return JSONResponse(overview) diff --git a/templates/admin_sfm.html b/templates/admin_sfm.html new file mode 100644 index 0000000..e73ea09 --- /dev/null +++ b/templates/admin_sfm.html @@ -0,0 +1,264 @@ +{% extends "base.html" %} + +{% block title %}SFM Admin - Seismo Fleet Manager{% endblock %} + +{% block content %} +
Diagnostics for the Seismograph Field Module (SFM) backend.
+All seismograph serials SFM has ever seen, with their last-event timestamp and total event count. Sourced from GET /db/units.
| Serial | +Last Seen | +Events | +Monitor (stale) | +Sessions (stale) | +
|---|---|---|---|---|
| Loading… | ||||
The last 25 events SFM ingested, with the gap between the event's recorded timestamp and when SFM received the forward. Large latencies indicate the watcher is forwarding stale files (e.g. after a network outage).
+| Recorded | +Serial | +Forwarded | +Latency | +File | +
|---|---|---|---|---|
| Loading… | ||||
Send a GET request to any SFM endpoint via the terra-view /api/sfm/* proxy. Path is relative to SFM root (no leading slash).
Diagnostics for the Sound Level Meter Manager (SLMM) backend.
+Send a GET request to any SLMM endpoint via the terra-view /api/slmm/* proxy.