fix(alerts): reset rule state + close open event on rule edit/delete

invalidate() only dropped the rule cache, not the per-(unit,rule) state machine — so editing a rule's metric/threshold left a stale 'active' phase that mis-evaluated against the new config (spurious clear, or suppressed onset), and deleting an in-alarm rule left an open AlertEvent that kept the client portal stuck "in alarm" forever. update/delete now call _reset_rule_runtime: forget_rule() drops the state machine and any open event for that rule is closed. Verified: existing evaluator tests + cooldown scenario still pass; compiles. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 23:40:52 +00:00
parent cfdeada9d6
commit ad6071b790
2 changed files with 21 additions and 0 deletions
@@ -175,6 +175,12 @@ class AlertEvaluator:
        else:
            self._rule_cache.pop(unit_id, None)

+    def forget_rule(self, unit_id: str, rule_id: int) -> None:
+        """Drop a rule's per-(unit, rule) state machine after the rule is edited or
+        deleted, so a stale 'active' phase / open event_id from the old config
+        doesn't bleed into the new one (mis-firing a clear or suppressing an onset)."""
+        self._states.pop((unit_id, rule_id), None)
+
    # -- scheduling ----------------------------------------------------------

    def _in_schedule(self, rule) -> bool:
@@ -428,6 +428,19 @@ async def _sync_keepalive_to_rules(unit_id: str, db: Session):
    await m.set_keepalive(True)


+def _reset_rule_runtime(unit_id: str, rule_id: int, db: Session):
+    """After a rule edit/delete: drop its evaluator state machine and close any open
+    event, so a stale 'active' phase doesn't mis-evaluate against the new config and
+    the client portal doesn't stay 'in alarm' on a rule that changed or is gone."""
+    from app.alerts import alert_evaluator
+    alert_evaluator.forget_rule(unit_id, rule_id)
+    now = datetime.utcnow()
+    for evt in db.query(AlertEvent).filter_by(unit_id=unit_id, rule_id=rule_id, status="active").all():
+        evt.clear_at = now
+        evt.status = "cleared"
+    db.commit()
+
+
@router.post("/{unit_id}/alerts/rules")
 async def create_alert_rule(unit_id: str, payload: AlertRulePayload, db: Session = Depends(get_db)):
    rule = AlertRule(unit_id=unit_id, **payload.model_dump())
@@ -457,6 +470,7 @@ async def update_alert_rule(unit_id: str, rule_id: int, payload: AlertRulePayloa
    db.refresh(rule)
    from app.alerts import alert_evaluator
    alert_evaluator.invalidate(unit_id)
+    _reset_rule_runtime(unit_id, rule_id, db)
    await _sync_keepalive_to_rules(unit_id, db)
    return {"status": "ok", "rule": _rule_dict(rule)}

@@ -470,6 +484,7 @@ async def delete_alert_rule(unit_id: str, rule_id: int, db: Session = Depends(ge
    db.commit()
    from app.alerts import alert_evaluator
    alert_evaluator.invalidate(unit_id)
+    _reset_rule_runtime(unit_id, rule_id, db)   # close its open event so the portal doesn't stay red
    await _sync_keepalive_to_rules(unit_id, db)  # no-op if no enabled rules remain
    return {"status": "ok", "deleted": rule_id}