v0.2.0: async status polling added.

This commit is contained in:
serversdwn
2026-01-16 06:24:13 +00:00
parent d2b47156d8
commit d43ef7427f
8 changed files with 963 additions and 17 deletions

264
app/background_poller.py Normal file
View File

@@ -0,0 +1,264 @@
"""
Background polling service for NL43 devices.
This module provides continuous, automatic polling of configured NL43 devices
at configurable intervals. Status snapshots are persisted to the database
for fast API access without querying devices on every request.
"""
import asyncio
import logging
from datetime import datetime, timedelta
from typing import Optional
from sqlalchemy.orm import Session
from app.database import SessionLocal
from app.models import NL43Config, NL43Status
from app.services import NL43Client, persist_snapshot
logger = logging.getLogger(__name__)
class BackgroundPoller:
"""
Background task that continuously polls NL43 devices and updates status cache.
Features:
- Per-device configurable poll intervals (10-3600 seconds)
- Automatic offline detection (marks unreachable after 3 consecutive failures)
- Dynamic sleep intervals based on device configurations
- Graceful shutdown on application stop
- Respects existing rate limiting (1-second minimum between commands)
"""
def __init__(self):
self._task: Optional[asyncio.Task] = None
self._running = False
self._logger = logger
async def start(self):
"""Start the background polling task."""
if self._running:
self._logger.warning("Background poller already running")
return
self._running = True
self._task = asyncio.create_task(self._poll_loop())
self._logger.info("Background poller task created")
async def stop(self):
"""Gracefully stop the background polling task."""
if not self._running:
return
self._logger.info("Stopping background poller...")
self._running = False
if self._task:
try:
await asyncio.wait_for(self._task, timeout=5.0)
except asyncio.TimeoutError:
self._logger.warning("Background poller task did not stop gracefully, cancelling...")
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._logger.info("Background poller stopped")
async def _poll_loop(self):
"""Main polling loop that runs continuously."""
self._logger.info("Background polling loop started")
while self._running:
try:
await self._poll_all_devices()
except Exception as e:
self._logger.error(f"Error in poll loop: {e}", exc_info=True)
# Calculate dynamic sleep interval
sleep_time = self._calculate_sleep_interval()
self._logger.debug(f"Sleeping for {sleep_time} seconds until next poll cycle")
# Sleep in small intervals to allow graceful shutdown
for _ in range(int(sleep_time)):
if not self._running:
break
await asyncio.sleep(1)
self._logger.info("Background polling loop exited")
async def _poll_all_devices(self):
"""Poll all configured devices that are due for polling."""
db: Session = SessionLocal()
try:
# Get all devices with TCP and polling enabled
configs = db.query(NL43Config).filter_by(
tcp_enabled=True,
poll_enabled=True
).all()
if not configs:
self._logger.debug("No devices configured for polling")
return
self._logger.debug(f"Checking {len(configs)} devices for polling")
now = datetime.utcnow()
polled_count = 0
for cfg in configs:
if not self._running:
break
# Get current status
status = db.query(NL43Status).filter_by(unit_id=cfg.unit_id).first()
# Check if device should be polled
if self._should_poll(cfg, status, now):
await self._poll_device(cfg, db)
polled_count += 1
else:
self._logger.debug(f"Skipping {cfg.unit_id} - interval not elapsed")
if polled_count > 0:
self._logger.info(f"Polled {polled_count}/{len(configs)} devices")
finally:
db.close()
def _should_poll(self, cfg: NL43Config, status: Optional[NL43Status], now: datetime) -> bool:
"""
Determine if a device should be polled based on interval and last poll time.
Args:
cfg: Device configuration
status: Current device status (may be None if never polled)
now: Current UTC timestamp
Returns:
True if device should be polled, False otherwise
"""
# If never polled before, poll now
if not status or not status.last_poll_attempt:
self._logger.debug(f"Device {cfg.unit_id} never polled, polling now")
return True
# Calculate elapsed time since last poll attempt
interval = cfg.poll_interval_seconds or 60
elapsed = (now - status.last_poll_attempt).total_seconds()
should_poll = elapsed >= interval
if should_poll:
self._logger.debug(
f"Device {cfg.unit_id} due for polling: {elapsed:.1f}s elapsed, interval={interval}s"
)
return should_poll
async def _poll_device(self, cfg: NL43Config, db: Session):
"""
Poll a single device and update its status in the database.
Args:
cfg: Device configuration
db: Database session
"""
unit_id = cfg.unit_id
self._logger.info(f"Polling device {unit_id} at {cfg.host}:{cfg.tcp_port}")
# Get or create status record
status = db.query(NL43Status).filter_by(unit_id=unit_id).first()
if not status:
status = NL43Status(unit_id=unit_id)
db.add(status)
# Update last_poll_attempt immediately
status.last_poll_attempt = datetime.utcnow()
db.commit()
# Create client and attempt to poll
client = NL43Client(
cfg.host,
cfg.tcp_port,
timeout=5.0,
ftp_username=cfg.ftp_username,
ftp_password=cfg.ftp_password,
ftp_port=cfg.ftp_port or 21
)
try:
# Send DOD? command to get device status
snap = await client.request_dod()
snap.unit_id = unit_id
# Success - persist snapshot and reset failure counter
persist_snapshot(snap, db)
status.is_reachable = True
status.consecutive_failures = 0
status.last_success = datetime.utcnow()
status.last_error = None
db.commit()
self._logger.info(f"✓ Successfully polled {unit_id}")
except Exception as e:
# Failure - increment counter and potentially mark offline
status.consecutive_failures += 1
error_msg = str(e)[:500] # Truncate to prevent bloat
status.last_error = error_msg
# Mark unreachable after 3 consecutive failures
if status.consecutive_failures >= 3:
if status.is_reachable: # Only log transition
self._logger.warning(
f"Device {unit_id} marked unreachable after {status.consecutive_failures} failures: {error_msg}"
)
status.is_reachable = False
else:
self._logger.warning(
f"Poll failed for {unit_id} (attempt {status.consecutive_failures}/3): {error_msg}"
)
db.commit()
def _calculate_sleep_interval(self) -> int:
"""
Calculate the next sleep interval based on all device poll intervals.
Returns a dynamic sleep time that ensures responsive polling:
- Minimum 10 seconds (prevents tight loops)
- Maximum 30 seconds (ensures responsiveness)
- Generally half the minimum device interval
Returns:
Sleep interval in seconds
"""
db: Session = SessionLocal()
try:
configs = db.query(NL43Config).filter_by(
tcp_enabled=True,
poll_enabled=True
).all()
if not configs:
return 30 # Default sleep when no devices configured
# Get all intervals
intervals = [cfg.poll_interval_seconds or 60 for cfg in configs]
min_interval = min(intervals)
# Use half the minimum interval, but cap between 10-30 seconds
sleep_time = max(10, min(30, min_interval // 2))
return sleep_time
finally:
db.close()
# Global singleton instance
poller = BackgroundPoller()

View File

@@ -1,5 +1,6 @@
import os
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
@@ -7,6 +8,7 @@ from fastapi.templating import Jinja2Templates
from app.database import Base, engine
from app import routers
from app.background_poller import poller
# Configure logging
logging.basicConfig(
@@ -23,10 +25,28 @@ logger = logging.getLogger(__name__)
Base.metadata.create_all(bind=engine)
logger.info("Database tables initialized")
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Manage application lifecycle - startup and shutdown events."""
# Startup
logger.info("Starting background poller...")
await poller.start()
logger.info("Background poller started")
yield # Application runs
# Shutdown
logger.info("Stopping background poller...")
await poller.stop()
logger.info("Background poller stopped")
app = FastAPI(
title="SLMM NL43 Addon",
description="Standalone module for NL43 configuration and status APIs",
version="0.1.0",
description="Standalone module for NL43 configuration and status APIs with background polling",
version="0.2.0",
lifespan=lifespan,
)
# CORS configuration - use environment variable for allowed origins

View File

@@ -19,6 +19,10 @@ class NL43Config(Base):
ftp_password = Column(String, nullable=True) # FTP login password
web_enabled = Column(Boolean, default=False)
# Background polling configuration
poll_interval_seconds = Column(Integer, nullable=True, default=60) # Polling interval (10-3600 seconds)
poll_enabled = Column(Boolean, default=True) # Enable/disable background polling for this device
class NL43Status(Base):
"""
@@ -42,3 +46,10 @@ class NL43Status(Base):
sd_remaining_mb = Column(String, nullable=True)
sd_free_ratio = Column(String, nullable=True)
raw_payload = Column(Text, nullable=True)
# Background polling status
is_reachable = Column(Boolean, default=True) # Device reachability status
consecutive_failures = Column(Integer, default=0) # Count of consecutive poll failures
last_poll_attempt = Column(DateTime, nullable=True) # Last time background poller attempted to poll
last_success = Column(DateTime, nullable=True) # Last successful poll timestamp
last_error = Column(Text, nullable=True) # Last error message (truncated to 500 chars)

View File

@@ -2,7 +2,7 @@ from fastapi import APIRouter, Depends, HTTPException, WebSocket, WebSocketDisco
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from datetime import datetime
from pydantic import BaseModel, field_validator
from pydantic import BaseModel, field_validator, Field
import logging
import ipaddress
import json
@@ -77,6 +77,64 @@ class ConfigPayload(BaseModel):
return v
class PollingConfigPayload(BaseModel):
"""Payload for updating device polling configuration."""
poll_interval_seconds: int | None = Field(None, ge=10, le=3600, description="Polling interval in seconds (10-3600)")
poll_enabled: bool | None = Field(None, description="Enable or disable background polling for this device")
# ============================================================================
# GLOBAL POLLING STATUS ENDPOINT (must be before /{unit_id} routes)
# ============================================================================
@router.get("/_polling/status")
def get_global_polling_status(db: Session = Depends(get_db)):
"""
Get global background polling status for all devices.
Returns information about which devices are being polled, their
reachability status, failure counts, and last poll times.
Useful for monitoring the health of the background polling system.
Note: Must be defined before /{unit_id} routes to avoid routing conflicts.
"""
from app.background_poller import poller
configs = db.query(NL43Config).filter_by(
tcp_enabled=True,
poll_enabled=True
).all()
device_statuses = []
for cfg in configs:
status = db.query(NL43Status).filter_by(unit_id=cfg.unit_id).first()
device_statuses.append({
"unit_id": cfg.unit_id,
"poll_interval_seconds": cfg.poll_interval_seconds,
"poll_enabled": cfg.poll_enabled,
"is_reachable": status.is_reachable if status else None,
"consecutive_failures": status.consecutive_failures if status else 0,
"last_poll_attempt": status.last_poll_attempt.isoformat() if status and status.last_poll_attempt else None,
"last_success": status.last_success.isoformat() if status and status.last_success else None,
"last_error": status.last_error if status else None
})
return {
"status": "ok",
"data": {
"poller_running": poller._running,
"total_devices": len(configs),
"devices": device_statuses
}
}
# ============================================================================
# DEVICE-SPECIFIC ENDPOINTS
# ============================================================================
@router.get("/{unit_id}/config")
def get_config(unit_id: str, db: Session = Depends(get_db)):
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
@@ -167,6 +225,12 @@ def get_status(unit_id: str, db: Session = Depends(get_db)):
"sd_remaining_mb": status.sd_remaining_mb,
"sd_free_ratio": status.sd_free_ratio,
"raw_payload": status.raw_payload,
# Background polling status
"is_reachable": status.is_reachable,
"consecutive_failures": status.consecutive_failures,
"last_poll_attempt": status.last_poll_attempt.isoformat() if status.last_poll_attempt else None,
"last_success": status.last_success.isoformat() if status.last_success else None,
"last_error": status.last_error,
},
}
@@ -1480,3 +1544,77 @@ async def run_diagnostics(unit_id: str, db: Session = Depends(get_db)):
# All tests passed
diagnostics["overall_status"] = "pass"
return diagnostics
# ============================================================================
# BACKGROUND POLLING CONFIGURATION ENDPOINTS
# ============================================================================
@router.get("/{unit_id}/polling/config")
def get_polling_config(unit_id: str, db: Session = Depends(get_db)):
"""
Get background polling configuration for a device.
Returns the current polling interval and enabled status for automatic
background status polling.
"""
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
if not cfg:
raise HTTPException(status_code=404, detail="Device configuration not found")
return {
"status": "ok",
"data": {
"unit_id": unit_id,
"poll_interval_seconds": cfg.poll_interval_seconds,
"poll_enabled": cfg.poll_enabled
}
}
@router.put("/{unit_id}/polling/config")
def update_polling_config(
unit_id: str,
payload: PollingConfigPayload,
db: Session = Depends(get_db)
):
"""
Update background polling configuration for a device.
Allows configuring the polling interval (10-3600 seconds) and
enabling/disabling automatic background polling per device.
Changes take effect on the next polling cycle.
"""
cfg = db.query(NL43Config).filter_by(unit_id=unit_id).first()
if not cfg:
raise HTTPException(status_code=404, detail="Device configuration not found")
# Update interval if provided
if payload.poll_interval_seconds is not None:
if payload.poll_interval_seconds < 10:
raise HTTPException(
status_code=400,
detail="Polling interval must be at least 10 seconds"
)
cfg.poll_interval_seconds = payload.poll_interval_seconds
# Update enabled status if provided
if payload.poll_enabled is not None:
cfg.poll_enabled = payload.poll_enabled
db.commit()
logger.info(
f"Updated polling config for {unit_id}: "
f"interval={cfg.poll_interval_seconds}s, enabled={cfg.poll_enabled}"
)
return {
"status": "ok",
"data": {
"unit_id": unit_id,
"poll_interval_seconds": cfg.poll_interval_seconds,
"poll_enabled": cfg.poll_enabled
}
}