v0.2.0: async status polling added.
This commit is contained in:
264
app/background_poller.py
Normal file
264
app/background_poller.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
Background polling service for NL43 devices.
|
||||
|
||||
This module provides continuous, automatic polling of configured NL43 devices
|
||||
at configurable intervals. Status snapshots are persisted to the database
|
||||
for fast API access without querying devices on every request.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.database import SessionLocal
|
||||
from app.models import NL43Config, NL43Status
|
||||
from app.services import NL43Client, persist_snapshot
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BackgroundPoller:
|
||||
"""
|
||||
Background task that continuously polls NL43 devices and updates status cache.
|
||||
|
||||
Features:
|
||||
- Per-device configurable poll intervals (10-3600 seconds)
|
||||
- Automatic offline detection (marks unreachable after 3 consecutive failures)
|
||||
- Dynamic sleep intervals based on device configurations
|
||||
- Graceful shutdown on application stop
|
||||
- Respects existing rate limiting (1-second minimum between commands)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._running = False
|
||||
self._logger = logger
|
||||
|
||||
async def start(self):
|
||||
"""Start the background polling task."""
|
||||
if self._running:
|
||||
self._logger.warning("Background poller already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._poll_loop())
|
||||
self._logger.info("Background poller task created")
|
||||
|
||||
async def stop(self):
|
||||
"""Gracefully stop the background polling task."""
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
self._logger.info("Stopping background poller...")
|
||||
self._running = False
|
||||
|
||||
if self._task:
|
||||
try:
|
||||
await asyncio.wait_for(self._task, timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
self._logger.warning("Background poller task did not stop gracefully, cancelling...")
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
self._logger.info("Background poller stopped")
|
||||
|
||||
async def _poll_loop(self):
|
||||
"""Main polling loop that runs continuously."""
|
||||
self._logger.info("Background polling loop started")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
await self._poll_all_devices()
|
||||
except Exception as e:
|
||||
self._logger.error(f"Error in poll loop: {e}", exc_info=True)
|
||||
|
||||
# Calculate dynamic sleep interval
|
||||
sleep_time = self._calculate_sleep_interval()
|
||||
self._logger.debug(f"Sleeping for {sleep_time} seconds until next poll cycle")
|
||||
|
||||
# Sleep in small intervals to allow graceful shutdown
|
||||
for _ in range(int(sleep_time)):
|
||||
if not self._running:
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
self._logger.info("Background polling loop exited")
|
||||
|
||||
async def _poll_all_devices(self):
|
||||
"""Poll all configured devices that are due for polling."""
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
# Get all devices with TCP and polling enabled
|
||||
configs = db.query(NL43Config).filter_by(
|
||||
tcp_enabled=True,
|
||||
poll_enabled=True
|
||||
).all()
|
||||
|
||||
if not configs:
|
||||
self._logger.debug("No devices configured for polling")
|
||||
return
|
||||
|
||||
self._logger.debug(f"Checking {len(configs)} devices for polling")
|
||||
now = datetime.utcnow()
|
||||
polled_count = 0
|
||||
|
||||
for cfg in configs:
|
||||
if not self._running:
|
||||
break
|
||||
|
||||
# Get current status
|
||||
status = db.query(NL43Status).filter_by(unit_id=cfg.unit_id).first()
|
||||
|
||||
# Check if device should be polled
|
||||
if self._should_poll(cfg, status, now):
|
||||
await self._poll_device(cfg, db)
|
||||
polled_count += 1
|
||||
else:
|
||||
self._logger.debug(f"Skipping {cfg.unit_id} - interval not elapsed")
|
||||
|
||||
if polled_count > 0:
|
||||
self._logger.info(f"Polled {polled_count}/{len(configs)} devices")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def _should_poll(self, cfg: NL43Config, status: Optional[NL43Status], now: datetime) -> bool:
|
||||
"""
|
||||
Determine if a device should be polled based on interval and last poll time.
|
||||
|
||||
Args:
|
||||
cfg: Device configuration
|
||||
status: Current device status (may be None if never polled)
|
||||
now: Current UTC timestamp
|
||||
|
||||
Returns:
|
||||
True if device should be polled, False otherwise
|
||||
"""
|
||||
# If never polled before, poll now
|
||||
if not status or not status.last_poll_attempt:
|
||||
self._logger.debug(f"Device {cfg.unit_id} never polled, polling now")
|
||||
return True
|
||||
|
||||
# Calculate elapsed time since last poll attempt
|
||||
interval = cfg.poll_interval_seconds or 60
|
||||
elapsed = (now - status.last_poll_attempt).total_seconds()
|
||||
|
||||
should_poll = elapsed >= interval
|
||||
|
||||
if should_poll:
|
||||
self._logger.debug(
|
||||
f"Device {cfg.unit_id} due for polling: {elapsed:.1f}s elapsed, interval={interval}s"
|
||||
)
|
||||
|
||||
return should_poll
|
||||
|
||||
async def _poll_device(self, cfg: NL43Config, db: Session):
|
||||
"""
|
||||
Poll a single device and update its status in the database.
|
||||
|
||||
Args:
|
||||
cfg: Device configuration
|
||||
db: Database session
|
||||
"""
|
||||
unit_id = cfg.unit_id
|
||||
self._logger.info(f"Polling device {unit_id} at {cfg.host}:{cfg.tcp_port}")
|
||||
|
||||
# Get or create status record
|
||||
status = db.query(NL43Status).filter_by(unit_id=unit_id).first()
|
||||
if not status:
|
||||
status = NL43Status(unit_id=unit_id)
|
||||
db.add(status)
|
||||
|
||||
# Update last_poll_attempt immediately
|
||||
status.last_poll_attempt = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
# Create client and attempt to poll
|
||||
client = NL43Client(
|
||||
cfg.host,
|
||||
cfg.tcp_port,
|
||||
timeout=5.0,
|
||||
ftp_username=cfg.ftp_username,
|
||||
ftp_password=cfg.ftp_password,
|
||||
ftp_port=cfg.ftp_port or 21
|
||||
)
|
||||
|
||||
try:
|
||||
# Send DOD? command to get device status
|
||||
snap = await client.request_dod()
|
||||
snap.unit_id = unit_id
|
||||
|
||||
# Success - persist snapshot and reset failure counter
|
||||
persist_snapshot(snap, db)
|
||||
|
||||
status.is_reachable = True
|
||||
status.consecutive_failures = 0
|
||||
status.last_success = datetime.utcnow()
|
||||
status.last_error = None
|
||||
|
||||
db.commit()
|
||||
self._logger.info(f"✓ Successfully polled {unit_id}")
|
||||
|
||||
except Exception as e:
|
||||
# Failure - increment counter and potentially mark offline
|
||||
status.consecutive_failures += 1
|
||||
error_msg = str(e)[:500] # Truncate to prevent bloat
|
||||
status.last_error = error_msg
|
||||
|
||||
# Mark unreachable after 3 consecutive failures
|
||||
if status.consecutive_failures >= 3:
|
||||
if status.is_reachable: # Only log transition
|
||||
self._logger.warning(
|
||||
f"Device {unit_id} marked unreachable after {status.consecutive_failures} failures: {error_msg}"
|
||||
)
|
||||
status.is_reachable = False
|
||||
else:
|
||||
self._logger.warning(
|
||||
f"Poll failed for {unit_id} (attempt {status.consecutive_failures}/3): {error_msg}"
|
||||
)
|
||||
|
||||
db.commit()
|
||||
|
||||
def _calculate_sleep_interval(self) -> int:
|
||||
"""
|
||||
Calculate the next sleep interval based on all device poll intervals.
|
||||
|
||||
Returns a dynamic sleep time that ensures responsive polling:
|
||||
- Minimum 10 seconds (prevents tight loops)
|
||||
- Maximum 30 seconds (ensures responsiveness)
|
||||
- Generally half the minimum device interval
|
||||
|
||||
Returns:
|
||||
Sleep interval in seconds
|
||||
"""
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
configs = db.query(NL43Config).filter_by(
|
||||
tcp_enabled=True,
|
||||
poll_enabled=True
|
||||
).all()
|
||||
|
||||
if not configs:
|
||||
return 30 # Default sleep when no devices configured
|
||||
|
||||
# Get all intervals
|
||||
intervals = [cfg.poll_interval_seconds or 60 for cfg in configs]
|
||||
min_interval = min(intervals)
|
||||
|
||||
# Use half the minimum interval, but cap between 10-30 seconds
|
||||
sleep_time = max(10, min(30, min_interval // 2))
|
||||
|
||||
return sleep_time
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
poller = BackgroundPoller()
|
||||
24
app/main.py
24
app/main.py
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import logging
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import HTMLResponse
|
||||
@@ -7,6 +8,7 @@ from fastapi.templating import Jinja2Templates
|
||||
|
||||
from app.database import Base, engine
|
||||
from app import routers
|
||||
from app.background_poller import poller
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
@@ -23,10 +25,28 @@ logger = logging.getLogger(__name__)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
logger.info("Database tables initialized")
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Manage application lifecycle - startup and shutdown events."""
|
||||
# Startup
|
||||
logger.info("Starting background poller...")
|
||||
await poller.start()
|
||||
logger.info("Background poller started")
|
||||
|
||||
yield # Application runs
|
||||
|
||||
# Shutdown
|
||||
logger.info("Stopping background poller...")
|
||||
await poller.stop()
|
||||
logger.info("Background poller stopped")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="SLMM NL43 Addon",
|
||||
description="Standalone module for NL43 configuration and status APIs",
|
||||
version="0.1.0",
|
||||
description="Standalone module for NL43 configuration and status APIs with background polling",
|
||||
version="0.2.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# CORS configuration - use environment variable for allowed origins
|
||||
|
||||
@@ -19,6 +19,10 @@ class NL43Config(Base):
|
||||
ftp_password = Column(String, nullable=True) # FTP login password
|
||||
web_enabled = Column(Boolean, default=False)
|
||||
|
||||
# Background polling configuration
|
||||
poll_interval_seconds = Column(Integer, nullable=True, default=60) # Polling interval (10-3600 seconds)
|
||||
poll_enabled = Column(Boolean, default=True) # Enable/disable background polling for this device
|
||||
|
||||
|
||||
class NL43Status(Base):
|
||||
"""
|
||||
@@ -42,3 +46,10 @@ class NL43Status(Base):
|
||||
sd_remaining_mb = Column(String, nullable=True)
|
||||
sd_free_ratio = Column(String, nullable=True)
|
||||
raw_payload = Column(Text, nullable=True)
|
||||
|
||||
# Background polling status
|
||||
is_reachable = Column(Boolean, default=True) # Device reachability status
|
||||
consecutive_failures = Column(Integer, default=0) # Count of consecutive poll failures
|
||||
last_poll_attempt = Column(DateTime, nullable=True) # Last time background poller attempted to poll
|
||||
last_success = Column(DateTime, nullable=True) # Last successful poll timestamp
|
||||
last_error = Column(Text, nullable=True) # Last error message (truncated to 500 chars)
|
||||
|
||||
140
app/routers.py
140
app/routers.py
@@ -2,7 +2,7 @@ from fastapi import APIRouter, Depends, HTTPException, WebSocket, WebSocketDisco
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, field_validator
|
||||
from pydantic import BaseModel, field_validator, Field
|
||||
import logging
|
||||
import ipaddress
|
||||
import json
|
||||
@@ -77,6 +77,64 @@ class ConfigPayload(BaseModel):
|
||||
return v
|
||||
|
||||
|
||||
class PollingConfigPayload(BaseModel):
|
||||
"""Payload for updating device polling configuration."""
|
||||
poll_interval_seconds: int | None = Field(None, ge=10, le=3600, description="Polling interval in seconds (10-3600)")
|
||||
poll_enabled: bool | None = Field(None, description="Enable or disable background polling for this device")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# GLOBAL POLLING STATUS ENDPOINT (must be before /{unit_id} routes)
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/_polling/status")
|
||||
def get_global_polling_status(db: Session = Depends(get_db)):
|
||||
"""
|
||||
Get global background polling status for all devices.
|
||||
|
||||
Returns information about which devices are being polled, their
|
||||
reachability status, failure counts, and last poll times.
|
||||
|
||||
Useful for monitoring the health of the background polling system.
|
||||
|
||||
Note: Must be defined before /{unit_id} routes to avoid routing conflicts.
|
||||
"""
|
||||
from app.background_poller import poller
|
||||
|
||||
configs = db.query(NL43Config).filter_by(
|
||||
tcp_enabled=True,
|
||||
poll_enabled=True
|
||||
).all()
|
||||
|
||||
device_statuses = []
|
||||
for cfg in configs:
|
||||
status = db.query(NL43Status).filter_by(unit_id=cfg.unit_id).first()
|
||||
|
||||
device_statuses.append({
|
||||
"unit_id": cfg.unit_id,
|
||||
"poll_interval_seconds": cfg.poll_interval_seconds,
|
||||
"poll_enabled": cfg.poll_enabled,
|
||||
"is_reachable": status.is_reachable if status else None,
|
||||
"consecutive_failures": status.consecutive_failures if status else 0,
|
||||
"last_poll_attempt": status.last_poll_attempt.isoformat() if status and status.last_poll_attempt else None,
|
||||
"last_success": status.last_success.isoformat() if status and status.last_success else None,
|
||||
"last_error": status.last_error if status else None
|
||||
})
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"data": {
|
||||
"poller_running": poller._running,
|
||||
"total_devices": len(configs),
|
||||
"devices": device_statuses
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# DEVICE-SPECIFIC ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/{unit_id}/config")
|
||||
def get_config(unit_id: str, db: Session = Depends(get_db)):
|
||||
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
|
||||
@@ -167,6 +225,12 @@ def get_status(unit_id: str, db: Session = Depends(get_db)):
|
||||
"sd_remaining_mb": status.sd_remaining_mb,
|
||||
"sd_free_ratio": status.sd_free_ratio,
|
||||
"raw_payload": status.raw_payload,
|
||||
# Background polling status
|
||||
"is_reachable": status.is_reachable,
|
||||
"consecutive_failures": status.consecutive_failures,
|
||||
"last_poll_attempt": status.last_poll_attempt.isoformat() if status.last_poll_attempt else None,
|
||||
"last_success": status.last_success.isoformat() if status.last_success else None,
|
||||
"last_error": status.last_error,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1480,3 +1544,77 @@ async def run_diagnostics(unit_id: str, db: Session = Depends(get_db)):
|
||||
# All tests passed
|
||||
diagnostics["overall_status"] = "pass"
|
||||
return diagnostics
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# BACKGROUND POLLING CONFIGURATION ENDPOINTS
|
||||
# ============================================================================
|
||||
|
||||
@router.get("/{unit_id}/polling/config")
|
||||
def get_polling_config(unit_id: str, db: Session = Depends(get_db)):
|
||||
"""
|
||||
Get background polling configuration for a device.
|
||||
|
||||
Returns the current polling interval and enabled status for automatic
|
||||
background status polling.
|
||||
"""
|
||||
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
|
||||
if not cfg:
|
||||
raise HTTPException(status_code=404, detail="Device configuration not found")
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"data": {
|
||||
"unit_id": unit_id,
|
||||
"poll_interval_seconds": cfg.poll_interval_seconds,
|
||||
"poll_enabled": cfg.poll_enabled
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@router.put("/{unit_id}/polling/config")
|
||||
def update_polling_config(
|
||||
unit_id: str,
|
||||
payload: PollingConfigPayload,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Update background polling configuration for a device.
|
||||
|
||||
Allows configuring the polling interval (10-3600 seconds) and
|
||||
enabling/disabling automatic background polling per device.
|
||||
|
||||
Changes take effect on the next polling cycle.
|
||||
"""
|
||||
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
|
||||
if not cfg:
|
||||
raise HTTPException(status_code=404, detail="Device configuration not found")
|
||||
|
||||
# Update interval if provided
|
||||
if payload.poll_interval_seconds is not None:
|
||||
if payload.poll_interval_seconds < 10:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Polling interval must be at least 10 seconds"
|
||||
)
|
||||
cfg.poll_interval_seconds = payload.poll_interval_seconds
|
||||
|
||||
# Update enabled status if provided
|
||||
if payload.poll_enabled is not None:
|
||||
cfg.poll_enabled = payload.poll_enabled
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
f"Updated polling config for {unit_id}: "
|
||||
f"interval={cfg.poll_interval_seconds}s, enabled={cfg.poll_enabled}"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"data": {
|
||||
"unit_id": unit_id,
|
||||
"poll_interval_seconds": cfg.poll_interval_seconds,
|
||||
"poll_enabled": cfg.poll_enabled
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user