463 lines
14 KiB
Python
463 lines
14 KiB
Python
"""
|
|
Alert Service
|
|
|
|
Manages in-app alerts for device status changes and system events.
|
|
Provides foundation for future notification channels (email, webhook).
|
|
"""
|
|
|
|
import json
|
|
import uuid
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional, List, Dict, Any
|
|
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import and_, or_
|
|
|
|
from backend.models import Alert, RosterUnit
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AlertService:
|
|
"""
|
|
Service for managing alerts.
|
|
|
|
Handles alert lifecycle:
|
|
- Create alerts from various triggers
|
|
- Query active alerts
|
|
- Acknowledge/resolve/dismiss alerts
|
|
- (Future) Dispatch to notification channels
|
|
"""
|
|
|
|
def __init__(self, db: Session):
|
|
self.db = db
|
|
|
|
def create_alert(
|
|
self,
|
|
alert_type: str,
|
|
title: str,
|
|
message: str = None,
|
|
severity: str = "warning",
|
|
unit_id: str = None,
|
|
project_id: str = None,
|
|
location_id: str = None,
|
|
schedule_id: str = None,
|
|
metadata: dict = None,
|
|
expires_hours: int = 24,
|
|
) -> Alert:
|
|
"""
|
|
Create a new alert.
|
|
|
|
Args:
|
|
alert_type: Type of alert (device_offline, device_online, schedule_failed)
|
|
title: Short alert title
|
|
message: Detailed description
|
|
severity: info, warning, or critical
|
|
unit_id: Related unit ID (optional)
|
|
project_id: Related project ID (optional)
|
|
location_id: Related location ID (optional)
|
|
schedule_id: Related schedule ID (optional)
|
|
metadata: Additional JSON data
|
|
expires_hours: Hours until auto-expiry (default 24)
|
|
|
|
Returns:
|
|
Created Alert instance
|
|
"""
|
|
alert = Alert(
|
|
id=str(uuid.uuid4()),
|
|
alert_type=alert_type,
|
|
title=title,
|
|
message=message,
|
|
severity=severity,
|
|
unit_id=unit_id,
|
|
project_id=project_id,
|
|
location_id=location_id,
|
|
schedule_id=schedule_id,
|
|
alert_metadata=json.dumps(metadata) if metadata else None,
|
|
status="active",
|
|
expires_at=datetime.utcnow() + timedelta(hours=expires_hours),
|
|
)
|
|
|
|
self.db.add(alert)
|
|
self.db.commit()
|
|
self.db.refresh(alert)
|
|
|
|
logger.info(f"Created alert: {alert.title} ({alert.alert_type})")
|
|
return alert
|
|
|
|
def create_device_offline_alert(
|
|
self,
|
|
unit_id: str,
|
|
consecutive_failures: int = 0,
|
|
last_error: str = None,
|
|
) -> Optional[Alert]:
|
|
"""
|
|
Create alert when device becomes unreachable.
|
|
|
|
Only creates if no active offline alert exists for this device.
|
|
|
|
Args:
|
|
unit_id: The unit that went offline
|
|
consecutive_failures: Number of consecutive poll failures
|
|
last_error: Last error message from polling
|
|
|
|
Returns:
|
|
Created Alert or None if alert already exists
|
|
"""
|
|
# Check if active offline alert already exists
|
|
existing = self.db.query(Alert).filter(
|
|
and_(
|
|
Alert.unit_id == unit_id,
|
|
Alert.alert_type == "device_offline",
|
|
Alert.status == "active",
|
|
)
|
|
).first()
|
|
|
|
if existing:
|
|
logger.debug(f"Offline alert already exists for {unit_id}")
|
|
return None
|
|
|
|
# Get unit info for title
|
|
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
|
|
unit_name = unit.id if unit else unit_id
|
|
|
|
# Determine severity based on failure count
|
|
severity = "critical" if consecutive_failures >= 5 else "warning"
|
|
|
|
return self.create_alert(
|
|
alert_type="device_offline",
|
|
title=f"{unit_name} is offline",
|
|
message=f"Device has been unreachable after {consecutive_failures} failed connection attempts."
|
|
+ (f" Last error: {last_error}" if last_error else ""),
|
|
severity=severity,
|
|
unit_id=unit_id,
|
|
metadata={
|
|
"consecutive_failures": consecutive_failures,
|
|
"last_error": last_error,
|
|
},
|
|
expires_hours=48, # Offline alerts stay longer
|
|
)
|
|
|
|
def resolve_device_offline_alert(self, unit_id: str) -> Optional[Alert]:
|
|
"""
|
|
Auto-resolve offline alert when device comes back online.
|
|
|
|
Also creates an "device_online" info alert to notify user.
|
|
|
|
Args:
|
|
unit_id: The unit that came back online
|
|
|
|
Returns:
|
|
The resolved Alert or None if no alert existed
|
|
"""
|
|
# Find active offline alert
|
|
alert = self.db.query(Alert).filter(
|
|
and_(
|
|
Alert.unit_id == unit_id,
|
|
Alert.alert_type == "device_offline",
|
|
Alert.status == "active",
|
|
)
|
|
).first()
|
|
|
|
if not alert:
|
|
return None
|
|
|
|
# Resolve the offline alert
|
|
alert.status = "resolved"
|
|
alert.resolved_at = datetime.utcnow()
|
|
self.db.commit()
|
|
|
|
logger.info(f"Resolved offline alert for {unit_id}")
|
|
|
|
# Create online notification
|
|
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
|
|
unit_name = unit.id if unit else unit_id
|
|
|
|
self.create_alert(
|
|
alert_type="device_online",
|
|
title=f"{unit_name} is back online",
|
|
message="Device connection has been restored.",
|
|
severity="info",
|
|
unit_id=unit_id,
|
|
expires_hours=6, # Info alerts expire quickly
|
|
)
|
|
|
|
return alert
|
|
|
|
def create_schedule_failed_alert(
|
|
self,
|
|
schedule_id: str,
|
|
action_type: str,
|
|
unit_id: str = None,
|
|
error_message: str = None,
|
|
project_id: str = None,
|
|
location_id: str = None,
|
|
) -> Alert:
|
|
"""
|
|
Create alert when a scheduled action fails.
|
|
|
|
Args:
|
|
schedule_id: The ScheduledAction or RecurringSchedule ID
|
|
action_type: start, stop, download
|
|
unit_id: Related unit
|
|
error_message: Error from execution
|
|
project_id: Related project
|
|
location_id: Related location
|
|
|
|
Returns:
|
|
Created Alert
|
|
"""
|
|
return self.create_alert(
|
|
alert_type="schedule_failed",
|
|
title=f"Scheduled {action_type} failed",
|
|
message=error_message or f"The scheduled {action_type} action did not complete successfully.",
|
|
severity="warning",
|
|
unit_id=unit_id,
|
|
project_id=project_id,
|
|
location_id=location_id,
|
|
schedule_id=schedule_id,
|
|
metadata={"action_type": action_type},
|
|
expires_hours=24,
|
|
)
|
|
|
|
def create_schedule_completed_alert(
|
|
self,
|
|
schedule_id: str,
|
|
action_type: str,
|
|
unit_id: str = None,
|
|
project_id: str = None,
|
|
location_id: str = None,
|
|
metadata: dict = None,
|
|
) -> Alert:
|
|
"""
|
|
Create alert when a scheduled action completes successfully.
|
|
|
|
Args:
|
|
schedule_id: The ScheduledAction ID
|
|
action_type: start, stop, download
|
|
unit_id: Related unit
|
|
project_id: Related project
|
|
location_id: Related location
|
|
metadata: Additional info (e.g., downloaded folder, index numbers)
|
|
|
|
Returns:
|
|
Created Alert
|
|
"""
|
|
# Build descriptive message based on action type and metadata
|
|
if action_type == "stop" and metadata:
|
|
download_folder = metadata.get("downloaded_folder")
|
|
download_success = metadata.get("download_success", False)
|
|
if download_success and download_folder:
|
|
message = f"Measurement stopped and data downloaded ({download_folder})"
|
|
elif download_success is False and metadata.get("download_attempted"):
|
|
message = "Measurement stopped but download failed"
|
|
else:
|
|
message = "Measurement stopped successfully"
|
|
elif action_type == "start" and metadata:
|
|
new_index = metadata.get("new_index")
|
|
if new_index is not None:
|
|
message = f"Measurement started (index {new_index:04d})"
|
|
else:
|
|
message = "Measurement started successfully"
|
|
else:
|
|
message = f"Scheduled {action_type} completed successfully"
|
|
|
|
return self.create_alert(
|
|
alert_type="schedule_completed",
|
|
title=f"Scheduled {action_type} completed",
|
|
message=message,
|
|
severity="info",
|
|
unit_id=unit_id,
|
|
project_id=project_id,
|
|
location_id=location_id,
|
|
schedule_id=schedule_id,
|
|
metadata={"action_type": action_type, **(metadata or {})},
|
|
expires_hours=12, # Info alerts expire quickly
|
|
)
|
|
|
|
def get_active_alerts(
|
|
self,
|
|
project_id: str = None,
|
|
unit_id: str = None,
|
|
alert_type: str = None,
|
|
min_severity: str = None,
|
|
limit: int = 50,
|
|
) -> List[Alert]:
|
|
"""
|
|
Query active alerts with optional filters.
|
|
|
|
Args:
|
|
project_id: Filter by project
|
|
unit_id: Filter by unit
|
|
alert_type: Filter by alert type
|
|
min_severity: Minimum severity (info, warning, critical)
|
|
limit: Maximum results
|
|
|
|
Returns:
|
|
List of matching alerts
|
|
"""
|
|
query = self.db.query(Alert).filter(Alert.status == "active")
|
|
|
|
if project_id:
|
|
query = query.filter(Alert.project_id == project_id)
|
|
|
|
if unit_id:
|
|
query = query.filter(Alert.unit_id == unit_id)
|
|
|
|
if alert_type:
|
|
query = query.filter(Alert.alert_type == alert_type)
|
|
|
|
if min_severity:
|
|
# Map severity to numeric for comparison
|
|
severity_levels = {"info": 1, "warning": 2, "critical": 3}
|
|
min_level = severity_levels.get(min_severity, 1)
|
|
|
|
if min_level == 2:
|
|
query = query.filter(Alert.severity.in_(["warning", "critical"]))
|
|
elif min_level == 3:
|
|
query = query.filter(Alert.severity == "critical")
|
|
|
|
return query.order_by(Alert.created_at.desc()).limit(limit).all()
|
|
|
|
def get_all_alerts(
|
|
self,
|
|
status: str = None,
|
|
project_id: str = None,
|
|
unit_id: str = None,
|
|
alert_type: str = None,
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
) -> List[Alert]:
|
|
"""
|
|
Query all alerts with optional filters (includes non-active).
|
|
|
|
Args:
|
|
status: Filter by status (active, acknowledged, resolved, dismissed)
|
|
project_id: Filter by project
|
|
unit_id: Filter by unit
|
|
alert_type: Filter by alert type
|
|
limit: Maximum results
|
|
offset: Pagination offset
|
|
|
|
Returns:
|
|
List of matching alerts
|
|
"""
|
|
query = self.db.query(Alert)
|
|
|
|
if status:
|
|
query = query.filter(Alert.status == status)
|
|
|
|
if project_id:
|
|
query = query.filter(Alert.project_id == project_id)
|
|
|
|
if unit_id:
|
|
query = query.filter(Alert.unit_id == unit_id)
|
|
|
|
if alert_type:
|
|
query = query.filter(Alert.alert_type == alert_type)
|
|
|
|
return (
|
|
query.order_by(Alert.created_at.desc())
|
|
.offset(offset)
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
|
|
def get_active_alert_count(self) -> int:
|
|
"""Get count of active alerts for badge display."""
|
|
return self.db.query(Alert).filter(Alert.status == "active").count()
|
|
|
|
def acknowledge_alert(self, alert_id: str) -> Optional[Alert]:
|
|
"""
|
|
Mark alert as acknowledged.
|
|
|
|
Args:
|
|
alert_id: Alert to acknowledge
|
|
|
|
Returns:
|
|
Updated Alert or None if not found
|
|
"""
|
|
alert = self.db.query(Alert).filter_by(id=alert_id).first()
|
|
if not alert:
|
|
return None
|
|
|
|
alert.status = "acknowledged"
|
|
alert.acknowledged_at = datetime.utcnow()
|
|
self.db.commit()
|
|
|
|
logger.info(f"Acknowledged alert: {alert.title}")
|
|
return alert
|
|
|
|
def dismiss_alert(self, alert_id: str) -> Optional[Alert]:
|
|
"""
|
|
Dismiss alert (user chose to ignore).
|
|
|
|
Args:
|
|
alert_id: Alert to dismiss
|
|
|
|
Returns:
|
|
Updated Alert or None if not found
|
|
"""
|
|
alert = self.db.query(Alert).filter_by(id=alert_id).first()
|
|
if not alert:
|
|
return None
|
|
|
|
alert.status = "dismissed"
|
|
self.db.commit()
|
|
|
|
logger.info(f"Dismissed alert: {alert.title}")
|
|
return alert
|
|
|
|
def resolve_alert(self, alert_id: str) -> Optional[Alert]:
|
|
"""
|
|
Manually resolve an alert.
|
|
|
|
Args:
|
|
alert_id: Alert to resolve
|
|
|
|
Returns:
|
|
Updated Alert or None if not found
|
|
"""
|
|
alert = self.db.query(Alert).filter_by(id=alert_id).first()
|
|
if not alert:
|
|
return None
|
|
|
|
alert.status = "resolved"
|
|
alert.resolved_at = datetime.utcnow()
|
|
self.db.commit()
|
|
|
|
logger.info(f"Resolved alert: {alert.title}")
|
|
return alert
|
|
|
|
def cleanup_expired_alerts(self) -> int:
|
|
"""
|
|
Remove alerts past their expiration time.
|
|
|
|
Returns:
|
|
Number of alerts cleaned up
|
|
"""
|
|
now = datetime.utcnow()
|
|
expired = self.db.query(Alert).filter(
|
|
and_(
|
|
Alert.expires_at.isnot(None),
|
|
Alert.expires_at < now,
|
|
Alert.status == "active",
|
|
)
|
|
).all()
|
|
|
|
count = len(expired)
|
|
for alert in expired:
|
|
alert.status = "dismissed"
|
|
|
|
if count > 0:
|
|
self.db.commit()
|
|
logger.info(f"Cleaned up {count} expired alerts")
|
|
|
|
return count
|
|
|
|
|
|
def get_alert_service(db: Session) -> AlertService:
|
|
"""Get an AlertService instance with the given database session."""
|
|
return AlertService(db)
|