feat: alert engine stage 1 — rules, events, state machine, CRUD
Replaces the POC single-threshold check with a real per-rule engine over
the live monitor feed.
- AlertRule / AlertEvent tables (auto-created via create_all; no migration).
Rule = {metric, comparison, threshold_db, duration_s, clear_margin_db,
schedule, channels, recipients}.
- alerts.py: per-(unit,rule) state machine IDLE->ACTIVE->IDLE with duration
debounce (both edges) + clear_margin hysteresis; onset/clear are distinct
events; optional nighttime schedule; rule cache w/ invalidation. The
state-machine core (_evaluate_step) is pure (no DB/clock) for testing.
- Dispatch is a server log (POC); _dispatch() is the seam for a Terra-View
webhook (email/SMS) later.
- CRUD: POST/GET/PUT/DELETE /{unit}/alerts/rules, GET /{unit}/alerts/events,
POST /{unit}/alerts/events/{id}/ack.
- test_alert_evaluator.py: synthetic level series proves onset debounce,
spike rejection, hysteresis hold, and below-comparison (4/4 pass, no device).
Source-agnostic: the same rules transfer unchanged if a unit's feed is later
sourced from FTP intervals instead of the DOD monitor.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+106
-1
@@ -11,7 +11,7 @@ import os
|
||||
import asyncio
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import NL43Config, NL43Status
|
||||
from app.models import NL43Config, NL43Status, AlertRule, AlertEvent
|
||||
from app.services import NL43Client, persist_snapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -320,6 +320,111 @@ async def monitor_status():
|
||||
return {"status": "ok", "monitors": monitor_manager.status()}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# ALERTS — threshold rules + fired events
|
||||
# ============================================================================
|
||||
|
||||
class AlertRulePayload(BaseModel):
|
||||
name: str = "Alert"
|
||||
metric: str = "lp" # lp/leq/lmax/lmin/lpeak/ln1/ln2
|
||||
comparison: str = "above" # above | below
|
||||
threshold_db: float
|
||||
duration_s: int = 0 # sustained seconds before firing (0 = instant)
|
||||
clear_margin_db: float = 2.0 # hysteresis band
|
||||
cooldown_s: int = 300
|
||||
schedule_start: str | None = None # "HH:MM" local; null = always
|
||||
schedule_end: str | None = None
|
||||
schedule_days: str | None = None # CSV of 0-6 (Mon=0); null = every day
|
||||
channels: str = "log"
|
||||
recipients: str | None = None
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
def _rule_dict(r: AlertRule) -> dict:
|
||||
return {
|
||||
"id": r.id, "unit_id": r.unit_id, "name": r.name, "metric": r.metric,
|
||||
"comparison": r.comparison, "threshold_db": r.threshold_db,
|
||||
"duration_s": r.duration_s, "clear_margin_db": r.clear_margin_db,
|
||||
"cooldown_s": r.cooldown_s, "schedule_start": r.schedule_start,
|
||||
"schedule_end": r.schedule_end, "schedule_days": r.schedule_days,
|
||||
"channels": r.channels, "recipients": r.recipients, "enabled": r.enabled,
|
||||
}
|
||||
|
||||
|
||||
def _event_dict(e: AlertEvent) -> dict:
|
||||
return {
|
||||
"id": e.id, "rule_id": e.rule_id, "unit_id": e.unit_id,
|
||||
"rule_name": e.rule_name, "metric": e.metric, "threshold_db": e.threshold_db,
|
||||
"onset_at": e.onset_at.isoformat() if e.onset_at else None,
|
||||
"onset_value": e.onset_value, "peak_value": e.peak_value,
|
||||
"clear_at": e.clear_at.isoformat() if e.clear_at else None,
|
||||
"status": e.status,
|
||||
"acknowledged_at": e.acknowledged_at.isoformat() if e.acknowledged_at else None,
|
||||
"acknowledged_by": e.acknowledged_by,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{unit_id}/alerts/rules")
|
||||
def create_alert_rule(unit_id: str, payload: AlertRulePayload, db: Session = Depends(get_db)):
|
||||
rule = AlertRule(unit_id=unit_id, **payload.model_dump())
|
||||
db.add(rule)
|
||||
db.commit()
|
||||
db.refresh(rule)
|
||||
from app.alerts import alert_evaluator
|
||||
alert_evaluator.invalidate(unit_id)
|
||||
return {"status": "ok", "rule": _rule_dict(rule)}
|
||||
|
||||
|
||||
@router.get("/{unit_id}/alerts/rules")
|
||||
def list_alert_rules(unit_id: str, db: Session = Depends(get_db)):
|
||||
rules = db.query(AlertRule).filter_by(unit_id=unit_id).all()
|
||||
return {"status": "ok", "rules": [_rule_dict(r) for r in rules]}
|
||||
|
||||
|
||||
@router.put("/{unit_id}/alerts/rules/{rule_id}")
|
||||
def update_alert_rule(unit_id: str, rule_id: int, payload: AlertRulePayload, db: Session = Depends(get_db)):
|
||||
rule = db.query(AlertRule).filter_by(id=rule_id, unit_id=unit_id).first()
|
||||
if not rule:
|
||||
raise HTTPException(status_code=404, detail="Alert rule not found")
|
||||
for field, value in payload.model_dump().items():
|
||||
setattr(rule, field, value)
|
||||
db.commit()
|
||||
db.refresh(rule)
|
||||
from app.alerts import alert_evaluator
|
||||
alert_evaluator.invalidate(unit_id)
|
||||
return {"status": "ok", "rule": _rule_dict(rule)}
|
||||
|
||||
|
||||
@router.delete("/{unit_id}/alerts/rules/{rule_id}")
|
||||
def delete_alert_rule(unit_id: str, rule_id: int, db: Session = Depends(get_db)):
|
||||
rule = db.query(AlertRule).filter_by(id=rule_id, unit_id=unit_id).first()
|
||||
if not rule:
|
||||
raise HTTPException(status_code=404, detail="Alert rule not found")
|
||||
db.delete(rule)
|
||||
db.commit()
|
||||
from app.alerts import alert_evaluator
|
||||
alert_evaluator.invalidate(unit_id)
|
||||
return {"status": "ok", "deleted": rule_id}
|
||||
|
||||
|
||||
@router.get("/{unit_id}/alerts/events")
|
||||
def list_alert_events(unit_id: str, limit: int = 50, db: Session = Depends(get_db)):
|
||||
events = (db.query(AlertEvent).filter_by(unit_id=unit_id)
|
||||
.order_by(AlertEvent.onset_at.desc()).limit(limit).all())
|
||||
return {"status": "ok", "events": [_event_dict(e) for e in events]}
|
||||
|
||||
|
||||
@router.post("/{unit_id}/alerts/events/{event_id}/ack")
|
||||
def ack_alert_event(unit_id: str, event_id: int, by: str | None = None, db: Session = Depends(get_db)):
|
||||
evt = db.query(AlertEvent).filter_by(id=event_id, unit_id=unit_id).first()
|
||||
if not evt:
|
||||
raise HTTPException(status_code=404, detail="Alert event not found")
|
||||
evt.acknowledged_at = datetime.utcnow()
|
||||
evt.acknowledged_by = by
|
||||
db.commit()
|
||||
return {"status": "ok", "acknowledged": event_id}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# GLOBAL POLLING STATUS ENDPOINT (must be before /{unit_id} routes)
|
||||
# ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user