fix(alerts): reset rule state + close open event on rule edit/delete

invalidate() only dropped the rule cache, not the per-(unit,rule) state machine —
so editing a rule's metric/threshold left a stale 'active' phase that mis-evaluated
against the new config (spurious clear, or suppressed onset), and deleting an
in-alarm rule left an open AlertEvent that kept the client portal stuck "in alarm"
forever. update/delete now call _reset_rule_runtime: forget_rule() drops the state
machine and any open event for that rule is closed.

Verified: existing evaluator tests + cooldown scenario still pass; compiles.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-11 23:40:52 +00:00
parent cfdeada9d6
commit ad6071b790
2 changed files with 21 additions and 0 deletions
+6
View File
@@ -175,6 +175,12 @@ class AlertEvaluator:
else:
self._rule_cache.pop(unit_id, None)
def forget_rule(self, unit_id: str, rule_id: int) -> None:
"""Drop a rule's per-(unit, rule) state machine after the rule is edited or
deleted, so a stale 'active' phase / open event_id from the old config
doesn't bleed into the new one (mis-firing a clear or suppressing an onset)."""
self._states.pop((unit_id, rule_id), None)
# -- scheduling ----------------------------------------------------------
def _in_schedule(self, rule) -> bool:
+15
View File
@@ -428,6 +428,19 @@ async def _sync_keepalive_to_rules(unit_id: str, db: Session):
await m.set_keepalive(True)
def _reset_rule_runtime(unit_id: str, rule_id: int, db: Session):
"""After a rule edit/delete: drop its evaluator state machine and close any open
event, so a stale 'active' phase doesn't mis-evaluate against the new config and
the client portal doesn't stay 'in alarm' on a rule that changed or is gone."""
from app.alerts import alert_evaluator
alert_evaluator.forget_rule(unit_id, rule_id)
now = datetime.utcnow()
for evt in db.query(AlertEvent).filter_by(unit_id=unit_id, rule_id=rule_id, status="active").all():
evt.clear_at = now
evt.status = "cleared"
db.commit()
@router.post("/{unit_id}/alerts/rules")
async def create_alert_rule(unit_id: str, payload: AlertRulePayload, db: Session = Depends(get_db)):
rule = AlertRule(unit_id=unit_id, **payload.model_dump())
@@ -457,6 +470,7 @@ async def update_alert_rule(unit_id: str, rule_id: int, payload: AlertRulePayloa
db.refresh(rule)
from app.alerts import alert_evaluator
alert_evaluator.invalidate(unit_id)
_reset_rule_runtime(unit_id, rule_id, db)
await _sync_keepalive_to_rules(unit_id, db)
return {"status": "ok", "rule": _rule_dict(rule)}
@@ -470,6 +484,7 @@ async def delete_alert_rule(unit_id: str, rule_id: int, db: Session = Depends(ge
db.commit()
from app.alerts import alert_evaluator
alert_evaluator.invalidate(unit_id)
_reset_rule_runtime(unit_id, rule_id, db) # close its open event so the portal doesn't stay red
await _sync_keepalive_to_rules(unit_id, db) # no-op if no enabled rules remain
return {"status": "ok", "deleted": rule_id}