Feat: Scheduler implemented, WIP

This commit is contained in:
serversdwn
2026-01-21 23:11:58 +00:00
parent 1f3fa7a718
commit 65ea0920db
20 changed files with 3682 additions and 15 deletions

View File

@@ -0,0 +1,407 @@
"""
Alert Service
Manages in-app alerts for device status changes and system events.
Provides foundation for future notification channels (email, webhook).
"""
import json
import uuid
import logging
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from backend.models import Alert, RosterUnit
logger = logging.getLogger(__name__)
class AlertService:
"""
Service for managing alerts.
Handles alert lifecycle:
- Create alerts from various triggers
- Query active alerts
- Acknowledge/resolve/dismiss alerts
- (Future) Dispatch to notification channels
"""
def __init__(self, db: Session):
self.db = db
def create_alert(
self,
alert_type: str,
title: str,
message: str = None,
severity: str = "warning",
unit_id: str = None,
project_id: str = None,
location_id: str = None,
schedule_id: str = None,
metadata: dict = None,
expires_hours: int = 24,
) -> Alert:
"""
Create a new alert.
Args:
alert_type: Type of alert (device_offline, device_online, schedule_failed)
title: Short alert title
message: Detailed description
severity: info, warning, or critical
unit_id: Related unit ID (optional)
project_id: Related project ID (optional)
location_id: Related location ID (optional)
schedule_id: Related schedule ID (optional)
metadata: Additional JSON data
expires_hours: Hours until auto-expiry (default 24)
Returns:
Created Alert instance
"""
alert = Alert(
id=str(uuid.uuid4()),
alert_type=alert_type,
title=title,
message=message,
severity=severity,
unit_id=unit_id,
project_id=project_id,
location_id=location_id,
schedule_id=schedule_id,
alert_metadata=json.dumps(metadata) if metadata else None,
status="active",
expires_at=datetime.utcnow() + timedelta(hours=expires_hours),
)
self.db.add(alert)
self.db.commit()
self.db.refresh(alert)
logger.info(f"Created alert: {alert.title} ({alert.alert_type})")
return alert
def create_device_offline_alert(
self,
unit_id: str,
consecutive_failures: int = 0,
last_error: str = None,
) -> Optional[Alert]:
"""
Create alert when device becomes unreachable.
Only creates if no active offline alert exists for this device.
Args:
unit_id: The unit that went offline
consecutive_failures: Number of consecutive poll failures
last_error: Last error message from polling
Returns:
Created Alert or None if alert already exists
"""
# Check if active offline alert already exists
existing = self.db.query(Alert).filter(
and_(
Alert.unit_id == unit_id,
Alert.alert_type == "device_offline",
Alert.status == "active",
)
).first()
if existing:
logger.debug(f"Offline alert already exists for {unit_id}")
return None
# Get unit info for title
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
unit_name = unit.id if unit else unit_id
# Determine severity based on failure count
severity = "critical" if consecutive_failures >= 5 else "warning"
return self.create_alert(
alert_type="device_offline",
title=f"{unit_name} is offline",
message=f"Device has been unreachable after {consecutive_failures} failed connection attempts."
+ (f" Last error: {last_error}" if last_error else ""),
severity=severity,
unit_id=unit_id,
metadata={
"consecutive_failures": consecutive_failures,
"last_error": last_error,
},
expires_hours=48, # Offline alerts stay longer
)
def resolve_device_offline_alert(self, unit_id: str) -> Optional[Alert]:
"""
Auto-resolve offline alert when device comes back online.
Also creates an "device_online" info alert to notify user.
Args:
unit_id: The unit that came back online
Returns:
The resolved Alert or None if no alert existed
"""
# Find active offline alert
alert = self.db.query(Alert).filter(
and_(
Alert.unit_id == unit_id,
Alert.alert_type == "device_offline",
Alert.status == "active",
)
).first()
if not alert:
return None
# Resolve the offline alert
alert.status = "resolved"
alert.resolved_at = datetime.utcnow()
self.db.commit()
logger.info(f"Resolved offline alert for {unit_id}")
# Create online notification
unit = self.db.query(RosterUnit).filter_by(id=unit_id).first()
unit_name = unit.id if unit else unit_id
self.create_alert(
alert_type="device_online",
title=f"{unit_name} is back online",
message="Device connection has been restored.",
severity="info",
unit_id=unit_id,
expires_hours=6, # Info alerts expire quickly
)
return alert
def create_schedule_failed_alert(
self,
schedule_id: str,
action_type: str,
unit_id: str = None,
error_message: str = None,
project_id: str = None,
location_id: str = None,
) -> Alert:
"""
Create alert when a scheduled action fails.
Args:
schedule_id: The ScheduledAction or RecurringSchedule ID
action_type: start, stop, download
unit_id: Related unit
error_message: Error from execution
project_id: Related project
location_id: Related location
Returns:
Created Alert
"""
return self.create_alert(
alert_type="schedule_failed",
title=f"Scheduled {action_type} failed",
message=error_message or f"The scheduled {action_type} action did not complete successfully.",
severity="warning",
unit_id=unit_id,
project_id=project_id,
location_id=location_id,
schedule_id=schedule_id,
metadata={"action_type": action_type},
expires_hours=24,
)
def get_active_alerts(
self,
project_id: str = None,
unit_id: str = None,
alert_type: str = None,
min_severity: str = None,
limit: int = 50,
) -> List[Alert]:
"""
Query active alerts with optional filters.
Args:
project_id: Filter by project
unit_id: Filter by unit
alert_type: Filter by alert type
min_severity: Minimum severity (info, warning, critical)
limit: Maximum results
Returns:
List of matching alerts
"""
query = self.db.query(Alert).filter(Alert.status == "active")
if project_id:
query = query.filter(Alert.project_id == project_id)
if unit_id:
query = query.filter(Alert.unit_id == unit_id)
if alert_type:
query = query.filter(Alert.alert_type == alert_type)
if min_severity:
# Map severity to numeric for comparison
severity_levels = {"info": 1, "warning": 2, "critical": 3}
min_level = severity_levels.get(min_severity, 1)
if min_level == 2:
query = query.filter(Alert.severity.in_(["warning", "critical"]))
elif min_level == 3:
query = query.filter(Alert.severity == "critical")
return query.order_by(Alert.created_at.desc()).limit(limit).all()
def get_all_alerts(
self,
status: str = None,
project_id: str = None,
unit_id: str = None,
alert_type: str = None,
limit: int = 50,
offset: int = 0,
) -> List[Alert]:
"""
Query all alerts with optional filters (includes non-active).
Args:
status: Filter by status (active, acknowledged, resolved, dismissed)
project_id: Filter by project
unit_id: Filter by unit
alert_type: Filter by alert type
limit: Maximum results
offset: Pagination offset
Returns:
List of matching alerts
"""
query = self.db.query(Alert)
if status:
query = query.filter(Alert.status == status)
if project_id:
query = query.filter(Alert.project_id == project_id)
if unit_id:
query = query.filter(Alert.unit_id == unit_id)
if alert_type:
query = query.filter(Alert.alert_type == alert_type)
return (
query.order_by(Alert.created_at.desc())
.offset(offset)
.limit(limit)
.all()
)
def get_active_alert_count(self) -> int:
"""Get count of active alerts for badge display."""
return self.db.query(Alert).filter(Alert.status == "active").count()
def acknowledge_alert(self, alert_id: str) -> Optional[Alert]:
"""
Mark alert as acknowledged.
Args:
alert_id: Alert to acknowledge
Returns:
Updated Alert or None if not found
"""
alert = self.db.query(Alert).filter_by(id=alert_id).first()
if not alert:
return None
alert.status = "acknowledged"
alert.acknowledged_at = datetime.utcnow()
self.db.commit()
logger.info(f"Acknowledged alert: {alert.title}")
return alert
def dismiss_alert(self, alert_id: str) -> Optional[Alert]:
"""
Dismiss alert (user chose to ignore).
Args:
alert_id: Alert to dismiss
Returns:
Updated Alert or None if not found
"""
alert = self.db.query(Alert).filter_by(id=alert_id).first()
if not alert:
return None
alert.status = "dismissed"
self.db.commit()
logger.info(f"Dismissed alert: {alert.title}")
return alert
def resolve_alert(self, alert_id: str) -> Optional[Alert]:
"""
Manually resolve an alert.
Args:
alert_id: Alert to resolve
Returns:
Updated Alert or None if not found
"""
alert = self.db.query(Alert).filter_by(id=alert_id).first()
if not alert:
return None
alert.status = "resolved"
alert.resolved_at = datetime.utcnow()
self.db.commit()
logger.info(f"Resolved alert: {alert.title}")
return alert
def cleanup_expired_alerts(self) -> int:
"""
Remove alerts past their expiration time.
Returns:
Number of alerts cleaned up
"""
now = datetime.utcnow()
expired = self.db.query(Alert).filter(
and_(
Alert.expires_at.isnot(None),
Alert.expires_at < now,
Alert.status == "active",
)
).all()
count = len(expired)
for alert in expired:
alert.status = "dismissed"
if count > 0:
self.db.commit()
logger.info(f"Cleaned up {count} expired alerts")
return count
def get_alert_service(db: Session) -> AlertService:
"""Get an AlertService instance with the given database session."""
return AlertService(db)

View File

@@ -333,6 +333,76 @@ class DeviceController:
else:
raise UnsupportedDeviceTypeError(f"Unsupported device type: {device_type}")
# ========================================================================
# Store/Index Management
# ========================================================================
async def increment_index(
self,
unit_id: str,
device_type: str,
) -> Dict[str, Any]:
"""
Increment the store/index number on a device.
For SLMs, this increments the store name to prevent "overwrite data?" prompts.
Should be called before starting a new measurement if auto_increment_index is enabled.
Args:
unit_id: Unit identifier
device_type: "slm" | "seismograph"
Returns:
Response dict with old_index and new_index
"""
if device_type == "slm":
try:
return await self.slmm_client.increment_index(unit_id)
except SLMMClientError as e:
raise DeviceControllerError(f"SLMM error: {str(e)}")
elif device_type == "seismograph":
# Seismographs may not have the same concept of store index
return {
"status": "not_applicable",
"message": "Index increment not applicable for seismographs",
"unit_id": unit_id,
}
else:
raise UnsupportedDeviceTypeError(f"Unsupported device type: {device_type}")
async def get_index_number(
self,
unit_id: str,
device_type: str,
) -> Dict[str, Any]:
"""
Get current store/index number from device.
Args:
unit_id: Unit identifier
device_type: "slm" | "seismograph"
Returns:
Response dict with current index_number
"""
if device_type == "slm":
try:
return await self.slmm_client.get_index_number(unit_id)
except SLMMClientError as e:
raise DeviceControllerError(f"SLMM error: {str(e)}")
elif device_type == "seismograph":
return {
"status": "not_applicable",
"message": "Index number not applicable for seismographs",
"unit_id": unit_id,
}
else:
raise UnsupportedDeviceTypeError(f"Unsupported device type: {device_type}")
# ========================================================================
# Health Check
# ========================================================================

View File

@@ -0,0 +1,184 @@
"""
Device Status Monitor
Background task that monitors device reachability via SLMM polling status
and triggers alerts when devices go offline or come back online.
This service bridges SLMM's device polling with Terra-View's alert system.
"""
import asyncio
import logging
from datetime import datetime
from typing import Optional, Dict
from backend.database import SessionLocal
from backend.services.slmm_client import get_slmm_client, SLMMClientError
from backend.services.alert_service import get_alert_service
logger = logging.getLogger(__name__)
class DeviceStatusMonitor:
"""
Monitors device reachability via SLMM's polling status endpoint.
Detects state transitions (online→offline, offline→online) and
triggers AlertService to create/resolve alerts.
Usage:
monitor = DeviceStatusMonitor()
await monitor.start() # Start background monitoring
monitor.stop() # Stop monitoring
"""
def __init__(self, check_interval: int = 60):
"""
Initialize the monitor.
Args:
check_interval: Seconds between status checks (default: 60)
"""
self.check_interval = check_interval
self.running = False
self.task: Optional[asyncio.Task] = None
self.slmm_client = get_slmm_client()
# Track previous device states to detect transitions
self._device_states: Dict[str, bool] = {}
async def start(self):
"""Start the monitoring background task."""
if self.running:
logger.warning("DeviceStatusMonitor is already running")
return
self.running = True
self.task = asyncio.create_task(self._monitor_loop())
logger.info(f"DeviceStatusMonitor started (checking every {self.check_interval}s)")
def stop(self):
"""Stop the monitoring background task."""
self.running = False
if self.task:
self.task.cancel()
logger.info("DeviceStatusMonitor stopped")
async def _monitor_loop(self):
"""Main monitoring loop."""
while self.running:
try:
await self._check_all_devices()
except Exception as e:
logger.error(f"Error in device status monitor: {e}", exc_info=True)
# Sleep in small intervals for graceful shutdown
for _ in range(self.check_interval):
if not self.running:
break
await asyncio.sleep(1)
logger.info("DeviceStatusMonitor loop exited")
async def _check_all_devices(self):
"""
Fetch polling status from SLMM and detect state transitions.
Uses GET /api/slmm/_polling/status (proxied to SLMM)
"""
try:
# Get status from SLMM
status_response = await self.slmm_client.get_polling_status()
devices = status_response.get("devices", [])
if not devices:
logger.debug("No devices in polling status response")
return
db = SessionLocal()
try:
alert_service = get_alert_service(db)
for device in devices:
unit_id = device.get("unit_id")
if not unit_id:
continue
is_reachable = device.get("is_reachable", True)
previous_reachable = self._device_states.get(unit_id)
# Skip if this is the first check (no previous state)
if previous_reachable is None:
self._device_states[unit_id] = is_reachable
logger.debug(f"Initial state for {unit_id}: reachable={is_reachable}")
continue
# Detect offline transition (was online, now offline)
if previous_reachable and not is_reachable:
logger.warning(f"Device {unit_id} went OFFLINE")
alert_service.create_device_offline_alert(
unit_id=unit_id,
consecutive_failures=device.get("consecutive_failures", 0),
last_error=device.get("last_error"),
)
# Detect online transition (was offline, now online)
elif not previous_reachable and is_reachable:
logger.info(f"Device {unit_id} came back ONLINE")
alert_service.resolve_device_offline_alert(unit_id)
# Update tracked state
self._device_states[unit_id] = is_reachable
# Cleanup expired alerts while we're here
alert_service.cleanup_expired_alerts()
finally:
db.close()
except SLMMClientError as e:
logger.warning(f"Could not reach SLMM for status check: {e}")
except Exception as e:
logger.error(f"Error checking device status: {e}", exc_info=True)
def get_tracked_devices(self) -> Dict[str, bool]:
"""
Get the current tracked device states.
Returns:
Dict mapping unit_id to is_reachable status
"""
return dict(self._device_states)
def clear_tracked_devices(self):
"""Clear all tracked device states (useful for testing)."""
self._device_states.clear()
# Singleton instance
_monitor_instance: Optional[DeviceStatusMonitor] = None
def get_device_status_monitor() -> DeviceStatusMonitor:
"""
Get the device status monitor singleton instance.
Returns:
DeviceStatusMonitor instance
"""
global _monitor_instance
if _monitor_instance is None:
_monitor_instance = DeviceStatusMonitor()
return _monitor_instance
async def start_device_status_monitor():
"""Start the global device status monitor."""
monitor = get_device_status_monitor()
await monitor.start()
def stop_device_status_monitor():
"""Stop the global device status monitor."""
monitor = get_device_status_monitor()
monitor.stop()

View File

@@ -0,0 +1,550 @@
"""
Recurring Schedule Service
Manages recurring schedule definitions and generates ScheduledAction
instances based on patterns (weekly calendar, simple interval).
"""
import json
import uuid
import logging
from datetime import datetime, timedelta, date, time
from typing import Optional, List, Dict, Any, Tuple
from zoneinfo import ZoneInfo
from sqlalchemy.orm import Session
from sqlalchemy import and_
from backend.models import RecurringSchedule, ScheduledAction, MonitoringLocation, UnitAssignment
logger = logging.getLogger(__name__)
# Day name mapping
DAY_NAMES = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
class RecurringScheduleService:
"""
Service for managing recurring schedules and generating ScheduledActions.
Supports two schedule types:
- weekly_calendar: Specific days with start/end times
- simple_interval: Daily stop/download/restart cycles for 24/7 monitoring
"""
def __init__(self, db: Session):
self.db = db
def create_schedule(
self,
project_id: str,
location_id: str,
name: str,
schedule_type: str,
device_type: str = "slm",
unit_id: str = None,
weekly_pattern: dict = None,
interval_type: str = None,
cycle_time: str = None,
include_download: bool = True,
auto_increment_index: bool = True,
timezone: str = "America/New_York",
) -> RecurringSchedule:
"""
Create a new recurring schedule.
Args:
project_id: Project ID
location_id: Monitoring location ID
name: Schedule name
schedule_type: "weekly_calendar" or "simple_interval"
device_type: "slm" or "seismograph"
unit_id: Specific unit (optional, can use assignment)
weekly_pattern: Dict of day patterns for weekly_calendar
interval_type: "daily" or "hourly" for simple_interval
cycle_time: Time string "HH:MM" for cycle
include_download: Whether to download data on cycle
auto_increment_index: Whether to auto-increment store index before start
timezone: Timezone for schedule times
Returns:
Created RecurringSchedule
"""
schedule = RecurringSchedule(
id=str(uuid.uuid4()),
project_id=project_id,
location_id=location_id,
unit_id=unit_id,
name=name,
schedule_type=schedule_type,
device_type=device_type,
weekly_pattern=json.dumps(weekly_pattern) if weekly_pattern else None,
interval_type=interval_type,
cycle_time=cycle_time,
include_download=include_download,
auto_increment_index=auto_increment_index,
enabled=True,
timezone=timezone,
)
# Calculate next occurrence
schedule.next_occurrence = self._calculate_next_occurrence(schedule)
self.db.add(schedule)
self.db.commit()
self.db.refresh(schedule)
logger.info(f"Created recurring schedule: {name} ({schedule_type})")
return schedule
def update_schedule(
self,
schedule_id: str,
**kwargs,
) -> Optional[RecurringSchedule]:
"""
Update a recurring schedule.
Args:
schedule_id: Schedule to update
**kwargs: Fields to update
Returns:
Updated schedule or None
"""
schedule = self.db.query(RecurringSchedule).filter_by(id=schedule_id).first()
if not schedule:
return None
for key, value in kwargs.items():
if hasattr(schedule, key):
if key == "weekly_pattern" and isinstance(value, dict):
value = json.dumps(value)
setattr(schedule, key, value)
# Recalculate next occurrence
schedule.next_occurrence = self._calculate_next_occurrence(schedule)
self.db.commit()
self.db.refresh(schedule)
logger.info(f"Updated recurring schedule: {schedule.name}")
return schedule
def delete_schedule(self, schedule_id: str) -> bool:
"""
Delete a recurring schedule and its pending generated actions.
Args:
schedule_id: Schedule to delete
Returns:
True if deleted, False if not found
"""
schedule = self.db.query(RecurringSchedule).filter_by(id=schedule_id).first()
if not schedule:
return False
# Delete pending generated actions for this schedule
# Note: We don't have recurring_schedule_id field yet, so we can't clean up
# generated actions. This is fine for now.
self.db.delete(schedule)
self.db.commit()
logger.info(f"Deleted recurring schedule: {schedule.name}")
return True
def enable_schedule(self, schedule_id: str) -> Optional[RecurringSchedule]:
"""Enable a disabled schedule."""
return self.update_schedule(schedule_id, enabled=True)
def disable_schedule(self, schedule_id: str) -> Optional[RecurringSchedule]:
"""Disable a schedule."""
return self.update_schedule(schedule_id, enabled=False)
def generate_actions_for_schedule(
self,
schedule: RecurringSchedule,
horizon_days: int = 7,
preview_only: bool = False,
) -> List[ScheduledAction]:
"""
Generate ScheduledAction entries for the next N days based on pattern.
Args:
schedule: The recurring schedule
horizon_days: Days ahead to generate
preview_only: If True, don't save to DB (for preview)
Returns:
List of generated ScheduledAction instances
"""
if not schedule.enabled:
return []
if schedule.schedule_type == "weekly_calendar":
actions = self._generate_weekly_calendar_actions(schedule, horizon_days)
elif schedule.schedule_type == "simple_interval":
actions = self._generate_interval_actions(schedule, horizon_days)
else:
logger.warning(f"Unknown schedule type: {schedule.schedule_type}")
return []
if not preview_only and actions:
for action in actions:
self.db.add(action)
schedule.last_generated_at = datetime.utcnow()
schedule.next_occurrence = self._calculate_next_occurrence(schedule)
self.db.commit()
logger.info(f"Generated {len(actions)} actions for schedule: {schedule.name}")
return actions
def _generate_weekly_calendar_actions(
self,
schedule: RecurringSchedule,
horizon_days: int,
) -> List[ScheduledAction]:
"""
Generate actions from weekly calendar pattern.
Pattern format:
{
"monday": {"enabled": true, "start": "19:00", "end": "07:00"},
"tuesday": {"enabled": false},
...
}
"""
if not schedule.weekly_pattern:
return []
try:
pattern = json.loads(schedule.weekly_pattern)
except json.JSONDecodeError:
logger.error(f"Invalid weekly_pattern JSON for schedule {schedule.id}")
return []
actions = []
tz = ZoneInfo(schedule.timezone)
now_utc = datetime.utcnow()
now_local = now_utc.replace(tzinfo=ZoneInfo("UTC")).astimezone(tz)
# Get unit_id (from schedule or assignment)
unit_id = self._resolve_unit_id(schedule)
for day_offset in range(horizon_days):
check_date = now_local.date() + timedelta(days=day_offset)
day_name = DAY_NAMES[check_date.weekday()]
day_config = pattern.get(day_name, {})
if not day_config.get("enabled", False):
continue
start_time_str = day_config.get("start")
end_time_str = day_config.get("end")
if not start_time_str or not end_time_str:
continue
# Parse times
start_time = self._parse_time(start_time_str)
end_time = self._parse_time(end_time_str)
if not start_time or not end_time:
continue
# Create start datetime in local timezone
start_local = datetime.combine(check_date, start_time, tzinfo=tz)
start_utc = start_local.astimezone(ZoneInfo("UTC")).replace(tzinfo=None)
# Handle overnight schedules (end time is next day)
if end_time <= start_time:
end_date = check_date + timedelta(days=1)
else:
end_date = check_date
end_local = datetime.combine(end_date, end_time, tzinfo=tz)
end_utc = end_local.astimezone(ZoneInfo("UTC")).replace(tzinfo=None)
# Skip if start time has already passed
if start_utc <= now_utc:
continue
# Check if action already exists
if self._action_exists(schedule.project_id, schedule.location_id, "start", start_utc):
continue
# Build notes with automation metadata
start_notes = json.dumps({
"schedule_name": schedule.name,
"schedule_id": schedule.id,
"auto_increment_index": schedule.auto_increment_index,
})
# Create START action
start_action = ScheduledAction(
id=str(uuid.uuid4()),
project_id=schedule.project_id,
location_id=schedule.location_id,
unit_id=unit_id,
action_type="start",
device_type=schedule.device_type,
scheduled_time=start_utc,
execution_status="pending",
notes=start_notes,
)
actions.append(start_action)
# Create STOP action
stop_notes = json.dumps({
"schedule_name": schedule.name,
"schedule_id": schedule.id,
})
stop_action = ScheduledAction(
id=str(uuid.uuid4()),
project_id=schedule.project_id,
location_id=schedule.location_id,
unit_id=unit_id,
action_type="stop",
device_type=schedule.device_type,
scheduled_time=end_utc,
execution_status="pending",
notes=stop_notes,
)
actions.append(stop_action)
# Create DOWNLOAD action if enabled (1 minute after stop)
if schedule.include_download:
download_time = end_utc + timedelta(minutes=1)
download_notes = json.dumps({
"schedule_name": schedule.name,
"schedule_id": schedule.id,
"schedule_type": "weekly_calendar",
})
download_action = ScheduledAction(
id=str(uuid.uuid4()),
project_id=schedule.project_id,
location_id=schedule.location_id,
unit_id=unit_id,
action_type="download",
device_type=schedule.device_type,
scheduled_time=download_time,
execution_status="pending",
notes=download_notes,
)
actions.append(download_action)
return actions
def _generate_interval_actions(
self,
schedule: RecurringSchedule,
horizon_days: int,
) -> List[ScheduledAction]:
"""
Generate actions from simple interval pattern.
For daily cycles: stop, download (optional), start at cycle_time each day.
"""
if not schedule.cycle_time:
return []
cycle_time = self._parse_time(schedule.cycle_time)
if not cycle_time:
return []
actions = []
tz = ZoneInfo(schedule.timezone)
now_utc = datetime.utcnow()
now_local = now_utc.replace(tzinfo=ZoneInfo("UTC")).astimezone(tz)
# Get unit_id
unit_id = self._resolve_unit_id(schedule)
for day_offset in range(horizon_days):
check_date = now_local.date() + timedelta(days=day_offset)
# Create cycle datetime in local timezone
cycle_local = datetime.combine(check_date, cycle_time, tzinfo=tz)
cycle_utc = cycle_local.astimezone(ZoneInfo("UTC")).replace(tzinfo=None)
# Skip if time has passed
if cycle_utc <= now_utc:
continue
# Check if action already exists
if self._action_exists(schedule.project_id, schedule.location_id, "stop", cycle_utc):
continue
# Build notes with metadata
stop_notes = json.dumps({
"schedule_name": schedule.name,
"schedule_id": schedule.id,
"cycle_type": "daily",
})
# Create STOP action
stop_action = ScheduledAction(
id=str(uuid.uuid4()),
project_id=schedule.project_id,
location_id=schedule.location_id,
unit_id=unit_id,
action_type="stop",
device_type=schedule.device_type,
scheduled_time=cycle_utc,
execution_status="pending",
notes=stop_notes,
)
actions.append(stop_action)
# Create DOWNLOAD action if enabled (1 minute after stop)
if schedule.include_download:
download_time = cycle_utc + timedelta(minutes=1)
download_notes = json.dumps({
"schedule_name": schedule.name,
"schedule_id": schedule.id,
"cycle_type": "daily",
})
download_action = ScheduledAction(
id=str(uuid.uuid4()),
project_id=schedule.project_id,
location_id=schedule.location_id,
unit_id=unit_id,
action_type="download",
device_type=schedule.device_type,
scheduled_time=download_time,
execution_status="pending",
notes=download_notes,
)
actions.append(download_action)
# Create START action (2 minutes after stop, or 1 minute after download)
start_offset = 2 if schedule.include_download else 1
start_time = cycle_utc + timedelta(minutes=start_offset)
start_notes = json.dumps({
"schedule_name": schedule.name,
"schedule_id": schedule.id,
"cycle_type": "daily",
"auto_increment_index": schedule.auto_increment_index,
})
start_action = ScheduledAction(
id=str(uuid.uuid4()),
project_id=schedule.project_id,
location_id=schedule.location_id,
unit_id=unit_id,
action_type="start",
device_type=schedule.device_type,
scheduled_time=start_time,
execution_status="pending",
notes=start_notes,
)
actions.append(start_action)
return actions
def _calculate_next_occurrence(self, schedule: RecurringSchedule) -> Optional[datetime]:
"""Calculate when the next action should occur."""
if not schedule.enabled:
return None
tz = ZoneInfo(schedule.timezone)
now_utc = datetime.utcnow()
now_local = now_utc.replace(tzinfo=ZoneInfo("UTC")).astimezone(tz)
if schedule.schedule_type == "weekly_calendar" and schedule.weekly_pattern:
try:
pattern = json.loads(schedule.weekly_pattern)
except:
return None
# Find next enabled day
for day_offset in range(8): # Check up to a week ahead
check_date = now_local.date() + timedelta(days=day_offset)
day_name = DAY_NAMES[check_date.weekday()]
day_config = pattern.get(day_name, {})
if day_config.get("enabled") and day_config.get("start"):
start_time = self._parse_time(day_config["start"])
if start_time:
start_local = datetime.combine(check_date, start_time, tzinfo=tz)
start_utc = start_local.astimezone(ZoneInfo("UTC")).replace(tzinfo=None)
if start_utc > now_utc:
return start_utc
elif schedule.schedule_type == "simple_interval" and schedule.cycle_time:
cycle_time = self._parse_time(schedule.cycle_time)
if cycle_time:
# Find next cycle time
for day_offset in range(2):
check_date = now_local.date() + timedelta(days=day_offset)
cycle_local = datetime.combine(check_date, cycle_time, tzinfo=tz)
cycle_utc = cycle_local.astimezone(ZoneInfo("UTC")).replace(tzinfo=None)
if cycle_utc > now_utc:
return cycle_utc
return None
def _resolve_unit_id(self, schedule: RecurringSchedule) -> Optional[str]:
"""Get unit_id from schedule or active assignment."""
if schedule.unit_id:
return schedule.unit_id
# Try to get from active assignment
assignment = self.db.query(UnitAssignment).filter(
and_(
UnitAssignment.location_id == schedule.location_id,
UnitAssignment.status == "active",
)
).first()
return assignment.unit_id if assignment else None
def _action_exists(
self,
project_id: str,
location_id: str,
action_type: str,
scheduled_time: datetime,
) -> bool:
"""Check if an action already exists for this time slot."""
# Allow 5-minute window for duplicate detection
time_window_start = scheduled_time - timedelta(minutes=5)
time_window_end = scheduled_time + timedelta(minutes=5)
exists = self.db.query(ScheduledAction).filter(
and_(
ScheduledAction.project_id == project_id,
ScheduledAction.location_id == location_id,
ScheduledAction.action_type == action_type,
ScheduledAction.scheduled_time >= time_window_start,
ScheduledAction.scheduled_time <= time_window_end,
ScheduledAction.execution_status == "pending",
)
).first()
return exists is not None
@staticmethod
def _parse_time(time_str: str) -> Optional[time]:
"""Parse time string "HH:MM" to time object."""
try:
parts = time_str.split(":")
return time(int(parts[0]), int(parts[1]))
except (ValueError, IndexError):
return None
def get_schedules_for_project(self, project_id: str) -> List[RecurringSchedule]:
"""Get all recurring schedules for a project."""
return self.db.query(RecurringSchedule).filter_by(project_id=project_id).all()
def get_enabled_schedules(self) -> List[RecurringSchedule]:
"""Get all enabled recurring schedules."""
return self.db.query(RecurringSchedule).filter_by(enabled=True).all()
def get_recurring_schedule_service(db: Session) -> RecurringScheduleService:
"""Get a RecurringScheduleService instance."""
return RecurringScheduleService(db)

View File

@@ -4,22 +4,29 @@ Scheduler Service
Executes scheduled actions for Projects system.
Monitors pending scheduled actions and executes them by calling device modules (SLMM/SFM).
Extended to support recurring schedules:
- Generates ScheduledActions from RecurringSchedule patterns
- Cleans up old completed/failed actions
This service runs as a background task in FastAPI, checking for pending actions
every minute and executing them when their scheduled time arrives.
"""
import asyncio
import json
import logging
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from sqlalchemy.orm import Session
from sqlalchemy import and_
from backend.database import SessionLocal
from backend.models import ScheduledAction, RecordingSession, MonitoringLocation, Project
from backend.models import ScheduledAction, RecordingSession, MonitoringLocation, Project, RecurringSchedule
from backend.services.device_controller import get_device_controller, DeviceControllerError
import uuid
logger = logging.getLogger(__name__)
class SchedulerService:
"""
@@ -62,11 +69,26 @@ class SchedulerService:
async def _run_loop(self):
"""Main scheduler loop."""
# Track when we last generated recurring actions (do this once per hour)
last_generation_check = datetime.utcnow() - timedelta(hours=1)
while self.running:
try:
# Execute pending actions
await self.execute_pending_actions()
# Generate actions from recurring schedules (every hour)
now = datetime.utcnow()
if (now - last_generation_check).total_seconds() >= 3600:
await self.generate_recurring_actions()
last_generation_check = now
# Cleanup old actions (also every hour, during generation cycle)
if (now - last_generation_check).total_seconds() < 60:
await self.cleanup_old_actions()
except Exception as e:
print(f"Scheduler error: {e}")
logger.error(f"Scheduler error: {e}", exc_info=True)
# Continue running even if there's an error
await asyncio.sleep(self.check_interval)
@@ -194,11 +216,34 @@ class SchedulerService:
db: Session,
) -> Dict[str, Any]:
"""Execute a 'start' action."""
# Parse action notes for automation settings
auto_increment_index = False
try:
if action.notes:
notes_data = json.loads(action.notes)
auto_increment_index = notes_data.get("auto_increment_index", False)
except json.JSONDecodeError:
pass # Notes is plain text, not JSON
# If auto_increment_index is enabled, increment the store index before starting
increment_response = None
if auto_increment_index and action.device_type == "slm":
try:
logger.info(f"Auto-incrementing store index for unit {unit_id}")
increment_response = await self.device_controller.increment_index(
unit_id,
action.device_type,
)
logger.info(f"Index incremented: {increment_response}")
except Exception as e:
logger.warning(f"Failed to increment index for {unit_id}: {e}")
# Continue with start anyway - don't fail the whole action
# Start recording via device controller
response = await self.device_controller.start_recording(
unit_id,
action.device_type,
config={}, # TODO: Load config from action.notes or metadata
config={},
)
# Create recording session
@@ -210,7 +255,11 @@ class SchedulerService:
session_type="sound" if action.device_type == "slm" else "vibration",
started_at=datetime.utcnow(),
status="recording",
session_metadata=json.dumps({"scheduled_action_id": action.id}),
session_metadata=json.dumps({
"scheduled_action_id": action.id,
"auto_increment_index": auto_increment_index,
"increment_response": increment_response,
}),
)
db.add(session)
@@ -218,6 +267,8 @@ class SchedulerService:
"status": "started",
"session_id": session.id,
"device_response": response,
"index_incremented": auto_increment_index,
"increment_response": increment_response,
}
async def _execute_stop(
@@ -295,6 +346,90 @@ class SchedulerService:
"device_response": response,
}
# ========================================================================
# Recurring Schedule Generation
# ========================================================================
async def generate_recurring_actions(self) -> int:
"""
Generate ScheduledActions from all enabled recurring schedules.
Runs once per hour to generate actions for the next 7 days.
Returns:
Total number of actions generated
"""
db = SessionLocal()
total_generated = 0
try:
from backend.services.recurring_schedule_service import get_recurring_schedule_service
service = get_recurring_schedule_service(db)
schedules = service.get_enabled_schedules()
if not schedules:
logger.debug("No enabled recurring schedules found")
return 0
logger.info(f"Generating actions for {len(schedules)} recurring schedule(s)")
for schedule in schedules:
try:
actions = service.generate_actions_for_schedule(schedule, horizon_days=7)
total_generated += len(actions)
except Exception as e:
logger.error(f"Error generating actions for schedule {schedule.id}: {e}")
if total_generated > 0:
logger.info(f"Generated {total_generated} scheduled actions from recurring schedules")
except Exception as e:
logger.error(f"Error in generate_recurring_actions: {e}", exc_info=True)
finally:
db.close()
return total_generated
async def cleanup_old_actions(self, retention_days: int = 30) -> int:
"""
Remove old completed/failed actions to prevent database bloat.
Args:
retention_days: Keep actions newer than this many days
Returns:
Number of actions cleaned up
"""
db = SessionLocal()
cleaned = 0
try:
cutoff = datetime.utcnow() - timedelta(days=retention_days)
old_actions = db.query(ScheduledAction).filter(
and_(
ScheduledAction.execution_status.in_(["completed", "failed", "cancelled"]),
ScheduledAction.executed_at < cutoff,
)
).all()
cleaned = len(old_actions)
for action in old_actions:
db.delete(action)
if cleaned > 0:
db.commit()
logger.info(f"Cleaned up {cleaned} old scheduled actions (>{retention_days} days)")
except Exception as e:
logger.error(f"Error cleaning up old actions: {e}")
db.rollback()
finally:
db.close()
return cleaned
# ========================================================================
# Manual Execution (for testing/debugging)
# ========================================================================

View File

@@ -276,6 +276,124 @@ class SLMMClient:
"""
return await self._request("POST", f"/{unit_id}/reset")
# ========================================================================
# Store/Index Management
# ========================================================================
async def get_index_number(self, unit_id: str) -> Dict[str, Any]:
"""
Get current store/index number from device.
Args:
unit_id: Unit identifier
Returns:
Dict with current index_number (store name)
"""
return await self._request("GET", f"/{unit_id}/index-number")
async def set_index_number(
self,
unit_id: str,
index_number: int,
) -> Dict[str, Any]:
"""
Set store/index number on device.
Args:
unit_id: Unit identifier
index_number: New index number to set
Returns:
Confirmation response
"""
return await self._request(
"PUT",
f"/{unit_id}/index-number",
data={"index_number": index_number},
)
async def check_overwrite_status(self, unit_id: str) -> Dict[str, Any]:
"""
Check if data exists at the current store index.
Args:
unit_id: Unit identifier
Returns:
Dict with:
- overwrite_status: "None" (safe) or "Exist" (would overwrite)
- will_overwrite: bool
- safe_to_store: bool
"""
return await self._request("GET", f"/{unit_id}/overwrite-check")
async def increment_index(self, unit_id: str, max_attempts: int = 100) -> Dict[str, Any]:
"""
Find and set the next available (unused) store/index number.
Checks the current index - if it would overwrite existing data,
increments until finding an unused index number.
Args:
unit_id: Unit identifier
max_attempts: Maximum number of indices to try before giving up
Returns:
Dict with old_index, new_index, and attempts_made
"""
# Get current index
current = await self.get_index_number(unit_id)
old_index = current.get("index_number", 0)
# Check if current index is safe
overwrite_check = await self.check_overwrite_status(unit_id)
if overwrite_check.get("safe_to_store", False):
# Current index is safe, no need to increment
return {
"success": True,
"old_index": old_index,
"new_index": old_index,
"unit_id": unit_id,
"already_safe": True,
"attempts_made": 0,
}
# Need to find an unused index
attempts = 0
test_index = old_index + 1
while attempts < max_attempts:
# Set the new index
await self.set_index_number(unit_id, test_index)
# Check if this index is safe
overwrite_check = await self.check_overwrite_status(unit_id)
attempts += 1
if overwrite_check.get("safe_to_store", False):
return {
"success": True,
"old_index": old_index,
"new_index": test_index,
"unit_id": unit_id,
"already_safe": False,
"attempts_made": attempts,
}
# Try next index (wrap around at 9999)
test_index = (test_index + 1) % 10000
# Avoid infinite loops if we've wrapped around
if test_index == old_index:
break
# Could not find a safe index
raise SLMMDeviceError(
f"Could not find unused store index for {unit_id} after {attempts} attempts. "
f"Consider downloading and clearing data from the device."
)
# ========================================================================
# Device Settings
# ========================================================================
@@ -387,6 +505,73 @@ class SLMMClient:
}
return await self._request("POST", f"/{unit_id}/ftp/download", data=data)
# ========================================================================
# Polling Status (for device monitoring/alerts)
# ========================================================================
async def get_polling_status(self) -> Dict[str, Any]:
"""
Get global polling status from SLMM.
Returns device reachability information for all polled devices.
Used by DeviceStatusMonitor to detect offline/online transitions.
Returns:
Dict with devices list containing:
- unit_id
- is_reachable
- consecutive_failures
- last_poll_attempt
- last_success
- last_error
"""
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/api/nl43/_polling/status")
response.raise_for_status()
return response.json()
except httpx.ConnectError:
raise SLMMConnectionError("Cannot connect to SLMM for polling status")
except Exception as e:
raise SLMMClientError(f"Failed to get polling status: {str(e)}")
async def get_device_polling_config(self, unit_id: str) -> Dict[str, Any]:
"""
Get polling configuration for a specific device.
Args:
unit_id: Unit identifier
Returns:
Dict with poll_enabled and poll_interval_seconds
"""
return await self._request("GET", f"/{unit_id}/polling/config")
async def update_device_polling_config(
self,
unit_id: str,
poll_enabled: Optional[bool] = None,
poll_interval_seconds: Optional[int] = None,
) -> Dict[str, Any]:
"""
Update polling configuration for a device.
Args:
unit_id: Unit identifier
poll_enabled: Enable/disable polling
poll_interval_seconds: Polling interval (10-3600)
Returns:
Updated config
"""
config = {}
if poll_enabled is not None:
config["poll_enabled"] = poll_enabled
if poll_interval_seconds is not None:
config["poll_interval_seconds"] = poll_interval_seconds
return await self._request("PUT", f"/{unit_id}/polling/config", data=config)
# ========================================================================
# Health Check
# ========================================================================