Feat: Scheduler implemented, WIP
This commit is contained in:
407
backend/services/alert_service.py
Normal file
407
backend/services/alert_service.py
Normal file
@@ -0,0 +1,407 @@
|
||||
"""
|
||||
Alert Service
|
||||
|
||||
Manages in-app alerts for device status changes and system events.
|
||||
Provides foundation for future notification channels (email, webhook).
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from backend.models import Alert, RosterUnit
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AlertService:
|
||||
"""
|
||||
Service for managing alerts.
|
||||
|
||||
Handles alert lifecycle:
|
||||
- Create alerts from various triggers
|
||||
- Query active alerts
|
||||
- Acknowledge/resolve/dismiss alerts
|
||||
- (Future) Dispatch to notification channels
|
||||
"""
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def create_alert(
|
||||
self,
|
||||
alert_type: str,
|
||||
title: str,
|
||||
message: str = None,
|
||||
severity: str = "warning",
|
||||
unit_id: str = None,
|
||||
project_id: str = None,
|
||||
location_id: str = None,
|
||||
schedule_id: str = None,
|
||||
metadata: dict = None,
|
||||
expires_hours: int = 24,
|
||||
) -> Alert:
|
||||
"""
|
||||
Create a new alert.
|
||||
|
||||
Args:
|
||||
alert_type: Type of alert (device_offline, device_online, schedule_failed)
|
||||
title: Short alert title
|
||||
message: Detailed description
|
||||
severity: info, warning, or critical
|
||||
unit_id: Related unit ID (optional)
|
||||
project_id: Related project ID (optional)
|
||||
location_id: Related location ID (optional)
|
||||
schedule_id: Related schedule ID (optional)
|
||||
metadata: Additional JSON data
|
||||
expires_hours: Hours until auto-expiry (default 24)
|
||||
|
||||
Returns:
|
||||
Created Alert instance
|
||||
"""
|
||||
alert = Alert(
|
||||
id=str(uuid.uuid4()),
|
||||
alert_type=alert_type,
|
||||
title=title,
|
||||
message=message,
|
||||
severity=severity,
|
||||
unit_id=unit_id,
|
||||
project_id=project_id,
|
||||
location_id=location_id,
|
||||
schedule_id=schedule_id,
|
||||
alert_metadata=json.dumps(metadata) if metadata else None,
|
||||
status="active",
|
||||
expires_at=datetime.utcnow() + timedelta(hours=expires_hours),
|
||||
)
|
||||
|
||||
self.db.add(alert)
|
||||
self.db.commit()
|
||||
self.db.refresh(alert)
|
||||
|
||||
logger.info(f"Created alert: {alert.title} ({alert.alert_type})")
|
||||
return alert
|
||||
|
||||
def create_device_offline_alert(
|
||||
self,
|
||||
unit_id: str,
|
||||
consecutive_failures: int = 0,
|
||||
last_error: str = None,
|
||||
) -> Optional[Alert]:
|
||||
"""
|
||||
Create alert when device becomes unreachable.
|
||||
|
||||
Only creates if no active offline alert exists for this device.
|
||||
|
||||
Args:
|
||||
unit_id: The unit that went offline
|
||||
consecutive_failures: Number of consecutive poll failures
|
||||
last_error: Last error message from polling
|
||||
|
||||
Returns:
|
||||
Created Alert or None if alert already exists
|
||||
"""
|
||||
# Check if active offline alert already exists
|
||||
existing = self.db.query(Alert).filter(
|
||||
and_(
|
||||
Alert.unit_id == unit_id,
|
||||
Alert.alert_type == "device_offline",
|
||||
Alert.status == "active",
|
||||
)
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
logger.debug(f"Offline alert already exists for {unit_id}")
|
||||
return None
|
||||
|
||||
# Get unit info for title
|
||||
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
|
||||
unit_name = unit.id if unit else unit_id
|
||||
|
||||
# Determine severity based on failure count
|
||||
severity = "critical" if consecutive_failures >= 5 else "warning"
|
||||
|
||||
return self.create_alert(
|
||||
alert_type="device_offline",
|
||||
title=f"{unit_name} is offline",
|
||||
message=f"Device has been unreachable after {consecutive_failures} failed connection attempts."
|
||||
+ (f" Last error: {last_error}" if last_error else ""),
|
||||
severity=severity,
|
||||
unit_id=unit_id,
|
||||
metadata={
|
||||
"consecutive_failures": consecutive_failures,
|
||||
"last_error": last_error,
|
||||
},
|
||||
expires_hours=48, # Offline alerts stay longer
|
||||
)
|
||||
|
||||
def resolve_device_offline_alert(self, unit_id: str) -> Optional[Alert]:
|
||||
"""
|
||||
Auto-resolve offline alert when device comes back online.
|
||||
|
||||
Also creates an "device_online" info alert to notify user.
|
||||
|
||||
Args:
|
||||
unit_id: The unit that came back online
|
||||
|
||||
Returns:
|
||||
The resolved Alert or None if no alert existed
|
||||
"""
|
||||
# Find active offline alert
|
||||
alert = self.db.query(Alert).filter(
|
||||
and_(
|
||||
Alert.unit_id == unit_id,
|
||||
Alert.alert_type == "device_offline",
|
||||
Alert.status == "active",
|
||||
)
|
||||
).first()
|
||||
|
||||
if not alert:
|
||||
return None
|
||||
|
||||
# Resolve the offline alert
|
||||
alert.status = "resolved"
|
||||
alert.resolved_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
|
||||
logger.info(f"Resolved offline alert for {unit_id}")
|
||||
|
||||
# Create online notification
|
||||
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
|
||||
unit_name = unit.id if unit else unit_id
|
||||
|
||||
self.create_alert(
|
||||
alert_type="device_online",
|
||||
title=f"{unit_name} is back online",
|
||||
message="Device connection has been restored.",
|
||||
severity="info",
|
||||
unit_id=unit_id,
|
||||
expires_hours=6, # Info alerts expire quickly
|
||||
)
|
||||
|
||||
return alert
|
||||
|
||||
def create_schedule_failed_alert(
|
||||
self,
|
||||
schedule_id: str,
|
||||
action_type: str,
|
||||
unit_id: str = None,
|
||||
error_message: str = None,
|
||||
project_id: str = None,
|
||||
location_id: str = None,
|
||||
) -> Alert:
|
||||
"""
|
||||
Create alert when a scheduled action fails.
|
||||
|
||||
Args:
|
||||
schedule_id: The ScheduledAction or RecurringSchedule ID
|
||||
action_type: start, stop, download
|
||||
unit_id: Related unit
|
||||
error_message: Error from execution
|
||||
project_id: Related project
|
||||
location_id: Related location
|
||||
|
||||
Returns:
|
||||
Created Alert
|
||||
"""
|
||||
return self.create_alert(
|
||||
alert_type="schedule_failed",
|
||||
title=f"Scheduled {action_type} failed",
|
||||
message=error_message or f"The scheduled {action_type} action did not complete successfully.",
|
||||
severity="warning",
|
||||
unit_id=unit_id,
|
||||
project_id=project_id,
|
||||
location_id=location_id,
|
||||
schedule_id=schedule_id,
|
||||
metadata={"action_type": action_type},
|
||||
expires_hours=24,
|
||||
)
|
||||
|
||||
def get_active_alerts(
|
||||
self,
|
||||
project_id: str = None,
|
||||
unit_id: str = None,
|
||||
alert_type: str = None,
|
||||
min_severity: str = None,
|
||||
limit: int = 50,
|
||||
) -> List[Alert]:
|
||||
"""
|
||||
Query active alerts with optional filters.
|
||||
|
||||
Args:
|
||||
project_id: Filter by project
|
||||
unit_id: Filter by unit
|
||||
alert_type: Filter by alert type
|
||||
min_severity: Minimum severity (info, warning, critical)
|
||||
limit: Maximum results
|
||||
|
||||
Returns:
|
||||
List of matching alerts
|
||||
"""
|
||||
query = self.db.query(Alert).filter(Alert.status == "active")
|
||||
|
||||
if project_id:
|
||||
query = query.filter(Alert.project_id == project_id)
|
||||
|
||||
if unit_id:
|
||||
query = query.filter(Alert.unit_id == unit_id)
|
||||
|
||||
if alert_type:
|
||||
query = query.filter(Alert.alert_type == alert_type)
|
||||
|
||||
if min_severity:
|
||||
# Map severity to numeric for comparison
|
||||
severity_levels = {"info": 1, "warning": 2, "critical": 3}
|
||||
min_level = severity_levels.get(min_severity, 1)
|
||||
|
||||
if min_level == 2:
|
||||
query = query.filter(Alert.severity.in_(["warning", "critical"]))
|
||||
elif min_level == 3:
|
||||
query = query.filter(Alert.severity == "critical")
|
||||
|
||||
return query.order_by(Alert.created_at.desc()).limit(limit).all()
|
||||
|
||||
def get_all_alerts(
|
||||
self,
|
||||
status: str = None,
|
||||
project_id: str = None,
|
||||
unit_id: str = None,
|
||||
alert_type: str = None,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> List[Alert]:
|
||||
"""
|
||||
Query all alerts with optional filters (includes non-active).
|
||||
|
||||
Args:
|
||||
status: Filter by status (active, acknowledged, resolved, dismissed)
|
||||
project_id: Filter by project
|
||||
unit_id: Filter by unit
|
||||
alert_type: Filter by alert type
|
||||
limit: Maximum results
|
||||
offset: Pagination offset
|
||||
|
||||
Returns:
|
||||
List of matching alerts
|
||||
"""
|
||||
query = self.db.query(Alert)
|
||||
|
||||
if status:
|
||||
query = query.filter(Alert.status == status)
|
||||
|
||||
if project_id:
|
||||
query = query.filter(Alert.project_id == project_id)
|
||||
|
||||
if unit_id:
|
||||
query = query.filter(Alert.unit_id == unit_id)
|
||||
|
||||
if alert_type:
|
||||
query = query.filter(Alert.alert_type == alert_type)
|
||||
|
||||
return (
|
||||
query.order_by(Alert.created_at.desc())
|
||||
.offset(offset)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
def get_active_alert_count(self) -> int:
|
||||
"""Get count of active alerts for badge display."""
|
||||
return self.db.query(Alert).filter(Alert.status == "active").count()
|
||||
|
||||
def acknowledge_alert(self, alert_id: str) -> Optional[Alert]:
|
||||
"""
|
||||
Mark alert as acknowledged.
|
||||
|
||||
Args:
|
||||
alert_id: Alert to acknowledge
|
||||
|
||||
Returns:
|
||||
Updated Alert or None if not found
|
||||
"""
|
||||
alert = self.db.query(Alert).filter_by(id=alert_id).first()
|
||||
if not alert:
|
||||
return None
|
||||
|
||||
alert.status = "acknowledged"
|
||||
alert.acknowledged_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
|
||||
logger.info(f"Acknowledged alert: {alert.title}")
|
||||
return alert
|
||||
|
||||
def dismiss_alert(self, alert_id: str) -> Optional[Alert]:
|
||||
"""
|
||||
Dismiss alert (user chose to ignore).
|
||||
|
||||
Args:
|
||||
alert_id: Alert to dismiss
|
||||
|
||||
Returns:
|
||||
Updated Alert or None if not found
|
||||
"""
|
||||
alert = self.db.query(Alert).filter_by(id=alert_id).first()
|
||||
if not alert:
|
||||
return None
|
||||
|
||||
alert.status = "dismissed"
|
||||
self.db.commit()
|
||||
|
||||
logger.info(f"Dismissed alert: {alert.title}")
|
||||
return alert
|
||||
|
||||
def resolve_alert(self, alert_id: str) -> Optional[Alert]:
|
||||
"""
|
||||
Manually resolve an alert.
|
||||
|
||||
Args:
|
||||
alert_id: Alert to resolve
|
||||
|
||||
Returns:
|
||||
Updated Alert or None if not found
|
||||
"""
|
||||
alert = self.db.query(Alert).filter_by(id=alert_id).first()
|
||||
if not alert:
|
||||
return None
|
||||
|
||||
alert.status = "resolved"
|
||||
alert.resolved_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
|
||||
logger.info(f"Resolved alert: {alert.title}")
|
||||
return alert
|
||||
|
||||
def cleanup_expired_alerts(self) -> int:
|
||||
"""
|
||||
Remove alerts past their expiration time.
|
||||
|
||||
Returns:
|
||||
Number of alerts cleaned up
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
expired = self.db.query(Alert).filter(
|
||||
and_(
|
||||
Alert.expires_at.isnot(None),
|
||||
Alert.expires_at < now,
|
||||
Alert.status == "active",
|
||||
)
|
||||
).all()
|
||||
|
||||
count = len(expired)
|
||||
for alert in expired:
|
||||
alert.status = "dismissed"
|
||||
|
||||
if count > 0:
|
||||
self.db.commit()
|
||||
logger.info(f"Cleaned up {count} expired alerts")
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def get_alert_service(db: Session) -> AlertService:
|
||||
"""Get an AlertService instance with the given database session."""
|
||||
return AlertService(db)
|
||||
Reference in New Issue
Block a user