Files
terra-view/backend/services/alert_service.py
2026-01-21 23:11:58 +00:00

408 lines
12 KiB
Python

"""
Alert Service
Manages in-app alerts for device status changes and system events.
Provides foundation for future notification channels (email, webhook).
"""
import json
import uuid
import logging
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from backend.models import Alert, RosterUnit
logger = logging.getLogger(__name__)
class AlertService:
"""
Service for managing alerts.
Handles alert lifecycle:
- Create alerts from various triggers
- Query active alerts
- Acknowledge/resolve/dismiss alerts
- (Future) Dispatch to notification channels
"""
def __init__(self, db: Session):
self.db = db
def create_alert(
self,
alert_type: str,
title: str,
message: str = None,
severity: str = "warning",
unit_id: str = None,
project_id: str = None,
location_id: str = None,
schedule_id: str = None,
metadata: dict = None,
expires_hours: int = 24,
) -> Alert:
"""
Create a new alert.
Args:
alert_type: Type of alert (device_offline, device_online, schedule_failed)
title: Short alert title
message: Detailed description
severity: info, warning, or critical
unit_id: Related unit ID (optional)
project_id: Related project ID (optional)
location_id: Related location ID (optional)
schedule_id: Related schedule ID (optional)
metadata: Additional JSON data
expires_hours: Hours until auto-expiry (default 24)
Returns:
Created Alert instance
"""
alert = Alert(
id=str(uuid.uuid4()),
alert_type=alert_type,
title=title,
message=message,
severity=severity,
unit_id=unit_id,
project_id=project_id,
location_id=location_id,
schedule_id=schedule_id,
alert_metadata=json.dumps(metadata) if metadata else None,
status="active",
expires_at=datetime.utcnow() + timedelta(hours=expires_hours),
)
self.db.add(alert)
self.db.commit()
self.db.refresh(alert)
logger.info(f"Created alert: {alert.title} ({alert.alert_type})")
return alert
def create_device_offline_alert(
self,
unit_id: str,
consecutive_failures: int = 0,
last_error: str = None,
) -> Optional[Alert]:
"""
Create alert when device becomes unreachable.
Only creates if no active offline alert exists for this device.
Args:
unit_id: The unit that went offline
consecutive_failures: Number of consecutive poll failures
last_error: Last error message from polling
Returns:
Created Alert or None if alert already exists
"""
# Check if active offline alert already exists
existing = self.db.query(Alert).filter(
and_(
Alert.unit_id == unit_id,
Alert.alert_type == "device_offline",
Alert.status == "active",
)
).first()
if existing:
logger.debug(f"Offline alert already exists for {unit_id}")
return None
# Get unit info for title
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
unit_name = unit.id if unit else unit_id
# Determine severity based on failure count
severity = "critical" if consecutive_failures >= 5 else "warning"
return self.create_alert(
alert_type="device_offline",
title=f"{unit_name} is offline",
message=f"Device has been unreachable after {consecutive_failures} failed connection attempts."
+ (f" Last error: {last_error}" if last_error else ""),
severity=severity,
unit_id=unit_id,
metadata={
"consecutive_failures": consecutive_failures,
"last_error": last_error,
},
expires_hours=48, # Offline alerts stay longer
)
def resolve_device_offline_alert(self, unit_id: str) -> Optional[Alert]:
"""
Auto-resolve offline alert when device comes back online.
Also creates an "device_online" info alert to notify user.
Args:
unit_id: The unit that came back online
Returns:
The resolved Alert or None if no alert existed
"""
# Find active offline alert
alert = self.db.query(Alert).filter(
and_(
Alert.unit_id == unit_id,
Alert.alert_type == "device_offline",
Alert.status == "active",
)
).first()
if not alert:
return None
# Resolve the offline alert
alert.status = "resolved"
alert.resolved_at = datetime.utcnow()
self.db.commit()
logger.info(f"Resolved offline alert for {unit_id}")
# Create online notification
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
unit_name = unit.id if unit else unit_id
self.create_alert(
alert_type="device_online",
title=f"{unit_name} is back online",
message="Device connection has been restored.",
severity="info",
unit_id=unit_id,
expires_hours=6, # Info alerts expire quickly
)
return alert
def create_schedule_failed_alert(
self,
schedule_id: str,
action_type: str,
unit_id: str = None,
error_message: str = None,
project_id: str = None,
location_id: str = None,
) -> Alert:
"""
Create alert when a scheduled action fails.
Args:
schedule_id: The ScheduledAction or RecurringSchedule ID
action_type: start, stop, download
unit_id: Related unit
error_message: Error from execution
project_id: Related project
location_id: Related location
Returns:
Created Alert
"""
return self.create_alert(
alert_type="schedule_failed",
title=f"Scheduled {action_type} failed",
message=error_message or f"The scheduled {action_type} action did not complete successfully.",
severity="warning",
unit_id=unit_id,
project_id=project_id,
location_id=location_id,
schedule_id=schedule_id,
metadata={"action_type": action_type},
expires_hours=24,
)
def get_active_alerts(
self,
project_id: str = None,
unit_id: str = None,
alert_type: str = None,
min_severity: str = None,
limit: int = 50,
) -> List[Alert]:
"""
Query active alerts with optional filters.
Args:
project_id: Filter by project
unit_id: Filter by unit
alert_type: Filter by alert type
min_severity: Minimum severity (info, warning, critical)
limit: Maximum results
Returns:
List of matching alerts
"""
query = self.db.query(Alert).filter(Alert.status == "active")
if project_id:
query = query.filter(Alert.project_id == project_id)
if unit_id:
query = query.filter(Alert.unit_id == unit_id)
if alert_type:
query = query.filter(Alert.alert_type == alert_type)
if min_severity:
# Map severity to numeric for comparison
severity_levels = {"info": 1, "warning": 2, "critical": 3}
min_level = severity_levels.get(min_severity, 1)
if min_level == 2:
query = query.filter(Alert.severity.in_(["warning", "critical"]))
elif min_level == 3:
query = query.filter(Alert.severity == "critical")
return query.order_by(Alert.created_at.desc()).limit(limit).all()
def get_all_alerts(
self,
status: str = None,
project_id: str = None,
unit_id: str = None,
alert_type: str = None,
limit: int = 50,
offset: int = 0,
) -> List[Alert]:
"""
Query all alerts with optional filters (includes non-active).
Args:
status: Filter by status (active, acknowledged, resolved, dismissed)
project_id: Filter by project
unit_id: Filter by unit
alert_type: Filter by alert type
limit: Maximum results
offset: Pagination offset
Returns:
List of matching alerts
"""
query = self.db.query(Alert)
if status:
query = query.filter(Alert.status == status)
if project_id:
query = query.filter(Alert.project_id == project_id)
if unit_id:
query = query.filter(Alert.unit_id == unit_id)
if alert_type:
query = query.filter(Alert.alert_type == alert_type)
return (
query.order_by(Alert.created_at.desc())
.offset(offset)
.limit(limit)
.all()
)
def get_active_alert_count(self) -> int:
"""Get count of active alerts for badge display."""
return self.db.query(Alert).filter(Alert.status == "active").count()
def acknowledge_alert(self, alert_id: str) -> Optional[Alert]:
"""
Mark alert as acknowledged.
Args:
alert_id: Alert to acknowledge
Returns:
Updated Alert or None if not found
"""
alert = self.db.query(Alert).filter_by(id=alert_id).first()
if not alert:
return None
alert.status = "acknowledged"
alert.acknowledged_at = datetime.utcnow()
self.db.commit()
logger.info(f"Acknowledged alert: {alert.title}")
return alert
def dismiss_alert(self, alert_id: str) -> Optional[Alert]:
"""
Dismiss alert (user chose to ignore).
Args:
alert_id: Alert to dismiss
Returns:
Updated Alert or None if not found
"""
alert = self.db.query(Alert).filter_by(id=alert_id).first()
if not alert:
return None
alert.status = "dismissed"
self.db.commit()
logger.info(f"Dismissed alert: {alert.title}")
return alert
def resolve_alert(self, alert_id: str) -> Optional[Alert]:
"""
Manually resolve an alert.
Args:
alert_id: Alert to resolve
Returns:
Updated Alert or None if not found
"""
alert = self.db.query(Alert).filter_by(id=alert_id).first()
if not alert:
return None
alert.status = "resolved"
alert.resolved_at = datetime.utcnow()
self.db.commit()
logger.info(f"Resolved alert: {alert.title}")
return alert
def cleanup_expired_alerts(self) -> int:
"""
Remove alerts past their expiration time.
Returns:
Number of alerts cleaned up
"""
now = datetime.utcnow()
expired = self.db.query(Alert).filter(
and_(
Alert.expires_at.isnot(None),
Alert.expires_at < now,
Alert.status == "active",
)
).all()
count = len(expired)
for alert in expired:
alert.status = "dismissed"
if count > 0:
self.db.commit()
logger.info(f"Cleaned up {count} expired alerts")
return count
def get_alert_service(db: Session) -> AlertService:
"""Get an AlertService instance with the given database session."""
return AlertService(db)