add: device communication lock, Now to send a tcp command, slmm must establish a connection lock to prevent flooding unit.

fixed: Background poller intervals increased.
2026-01-29 06:08:55 +00:00
parent 67d63b4173
commit eb39a9d1d0
3 changed files with 143 additions and 107 deletions
--- a/app/background_poller.py
+++ b/app/background_poller.py
@@ -25,7 +25,7 @@ class BackgroundPoller:
    Background task that continuously polls NL43 devices and updates status cache.

    Features:
-    - Per-device configurable poll intervals (10-3600 seconds)
+    - Per-device configurable poll intervals (30 seconds to 6 hours)
    - Automatic offline detection (marks unreachable after 3 consecutive failures)
    - Dynamic sleep intervals based on device configurations
    - Graceful shutdown on application stop
@@ -230,8 +230,8 @@ class BackgroundPoller:
        Calculate the next sleep interval based on all device poll intervals.

        Returns a dynamic sleep time that ensures responsive polling:
-        - Minimum 10 seconds (prevents tight loops)
-        - Maximum 30 seconds (ensures responsiveness)
+        - Minimum 30 seconds (prevents tight loops)
+        - Maximum 300 seconds / 5 minutes (ensures reasonable responsiveness for long intervals)
        - Generally half the minimum device interval

        Returns:
@@ -245,14 +245,15 @@ class BackgroundPoller:
            ).all()

            if not configs:
-                return 30  # Default sleep when no devices configured
+                return 60  # Default sleep when no devices configured

            # Get all intervals
            intervals = [cfg.poll_interval_seconds or 60 for cfg in configs]
            min_interval = min(intervals)

-            # Use half the minimum interval, but cap between 10-30 seconds
-            sleep_time = max(10, min(30, min_interval // 2))
+            # Use half the minimum interval, but cap between 30-300 seconds
+            # This allows longer sleep times when polling intervals are long (e.g., hourly)
+            sleep_time = max(30, min(300, min_interval // 2))

            return sleep_time

--- a/app/routers.py
+++ b/app/routers.py
@@ -81,14 +81,14 @@ class ConfigPayload(BaseModel):
    @field_validator("poll_interval_seconds")
    @classmethod
    def validate_poll_interval(cls, v):
-        if v is not None and not (10 <= v <= 3600):
-            raise ValueError("Poll interval must be between 10 and 3600 seconds")
+        if v is not None and not (30 <= v <= 21600):
+            raise ValueError("Poll interval must be between 30 and 21600 seconds (30s to 6 hours)")
        return v


 class PollingConfigPayload(BaseModel):
    """Payload for updating device polling configuration."""
-    poll_interval_seconds: int | None = Field(None, ge=10, le=3600, description="Polling interval in seconds (10-3600)")
+    poll_interval_seconds: int | None = Field(None, ge=30, le=21600, description="Polling interval in seconds (30s to 6 hours)")
    poll_enabled: bool | None = Field(None, description="Enable or disable background polling for this device")


@@ -233,8 +233,8 @@ class RosterCreatePayload(BaseModel):
    @field_validator("poll_interval_seconds")
    @classmethod
    def validate_poll_interval(cls, v):
-        if v is not None and not (10 <= v <= 3600):
-            raise ValueError("Poll interval must be between 10 and 3600 seconds")
+        if v is not None and not (30 <= v <= 21600):
+            raise ValueError("Poll interval must be between 30 and 21600 seconds (30s to 6 hours)")
        return v


@@ -1880,7 +1880,7 @@ def update_polling_config(
    """
    Update background polling configuration for a device.

-    Allows configuring the polling interval (10-3600 seconds) and
+    Allows configuring the polling interval (30-21600 seconds, i.e. 30s to 6 hours) and
    enabling/disabling automatic background polling per device.

    Changes take effect on the next polling cycle.
@@ -1891,10 +1891,15 @@ def update_polling_config(

    # Update interval if provided
    if payload.poll_interval_seconds is not None:
-        if payload.poll_interval_seconds < 10:
+        if payload.poll_interval_seconds < 30:
            raise HTTPException(
                status_code=400,
-                detail="Polling interval must be at least 10 seconds"
+                detail="Polling interval must be at least 30 seconds"
+            )
+        if payload.poll_interval_seconds > 21600:
+            raise HTTPException(
+                status_code=400,
+                detail="Polling interval must be at most 21600 seconds (6 hours)"
            )
        cfg.poll_interval_seconds = payload.poll_interval_seconds

--- a/app/services.py
+++ b/app/services.py
@@ -14,7 +14,7 @@ import zipfile
 import tempfile
 from dataclasses import dataclass
 from datetime import datetime, timezone, timedelta
-from typing import Optional, List
+from typing import Optional, List, Dict
 from sqlalchemy.orm import Session
 from ftplib import FTP
 from pathlib import Path
@@ -105,6 +105,19 @@ def persist_snapshot(s: NL43Snapshot, db: Session):
 _last_command_time = {}
 _rate_limit_lock = asyncio.Lock()

+# Per-device connection locks: NL43 devices only support one TCP connection at a time
+# This prevents concurrent connections from fighting for the device
+_device_locks: Dict[str, asyncio.Lock] = {}
+_device_locks_lock = asyncio.Lock()
+
+
+async def _get_device_lock(device_key: str) -> asyncio.Lock:
+    """Get or create a lock for a specific device."""
+    async with _device_locks_lock:
+        if device_key not in _device_locks:
+            _device_locks[device_key] = asyncio.Lock()
+        return _device_locks[device_key]
+

 class NL43Client:
    def __init__(self, host: str, port: int, timeout: float = 5.0, ftp_username: str = None, ftp_password: str = None, ftp_port: int = 21):
@@ -133,7 +146,17 @@ class NL43Client:
        NL43 protocol returns two lines for query commands:
        Line 1: Result code (R+0000 for success, error codes otherwise)
        Line 2: Actual data (for query commands ending with '?')
+
+        This method acquires a per-device lock to ensure only one TCP connection
+        is active at a time (NL43 devices only support single connections).
        """
+        # Acquire per-device lock to prevent concurrent connections
+        device_lock = await _get_device_lock(self.device_key)
+        async with device_lock:
+            return await self._send_command_unlocked(cmd)
+
+    async def _send_command_unlocked(self, cmd: str) -> str:
+        """Internal: send command without acquiring device lock (lock must be held by caller)."""
        await self._enforce_rate_limit()

        logger.info(f"Sending command to {self.device_key}: {cmd.strip()}")
@@ -429,7 +452,14 @@ class NL43Client:

        The stream continues until an exception occurs or the connection is closed.
        Send SUB character (0x1A) to stop the stream.
+
+        NOTE: This method holds the device lock for the entire duration of streaming,
+        blocking other commands to this device. This is intentional since NL43 devices
+        only support one TCP connection at a time.
        """
+        # Acquire per-device lock - held for entire streaming session
+        device_lock = await _get_device_lock(self.device_key)
+        async with device_lock:
            await self._enforce_rate_limit()

            logger.info(f"Starting DRD stream for {self.device_key}")