Add:

- db cache dump on diagnostics request. - individual device logs, db and files. -Device logs api endpoints and diagnostics UI. Fix: - slmm standalone now uses local TZ (was UTC only before) - fixed measurement start time logic.
2026-01-29 18:50:47 +00:00
parent eb39a9d1d0
commit bf5f222511
8 changed files with 877 additions and 1 deletions
--- a/app/background_poller.py
+++ b/app/background_poller.py
@@ -15,7 +15,8 @@ from sqlalchemy.orm import Session

 from app.database import SessionLocal
 from app.models import NL43Config, NL43Status
-from app.services import NL43Client, persist_snapshot
+from app.services import NL43Client, persist_snapshot, sync_measurement_start_time_from_ftp
+from app.device_logger import log_device_event, cleanup_old_logs

 logger = logging.getLogger(__name__)

@@ -36,6 +37,7 @@ class BackgroundPoller:
        self._task: Optional[asyncio.Task] = None
        self._running = False
        self._logger = logger
+        self._last_cleanup = None  # Track last log cleanup time

    async def start(self):
        """Start the background polling task."""
@@ -78,6 +80,15 @@ class BackgroundPoller:
            except Exception as e:
                self._logger.error(f"Error in poll loop: {e}", exc_info=True)

+            # Run log cleanup once per hour
+            try:
+                now = datetime.utcnow()
+                if self._last_cleanup is None or (now - self._last_cleanup).total_seconds() > 3600:
+                    cleanup_old_logs()
+                    self._last_cleanup = now
+            except Exception as e:
+                self._logger.warning(f"Log cleanup failed: {e}")
+
            # Calculate dynamic sleep interval
            sleep_time = self._calculate_sleep_interval()
            self._logger.debug(f"Sleeping for {sleep_time} seconds until next poll cycle")
@@ -205,6 +216,71 @@ class BackgroundPoller:
            db.commit()
            self._logger.info(f"✓ Successfully polled {unit_id}")

+            # Log to device log
+            log_device_event(
+                unit_id, "INFO", "POLL",
+                f"Poll success: state={snap.measurement_state}, Leq={snap.leq}, Lp={snap.lp}",
+                db
+            )
+
+            # Check if device is measuring but has no start time recorded
+            # This happens if measurement was started before SLMM began polling
+            # or after a service restart
+            status = db.query(NL43Status).filter_by(unit_id=unit_id).first()
+
+            # Reset the sync flag when measurement stops (so next measurement can sync)
+            if status and status.measurement_state != "Start":
+                if status.start_time_sync_attempted:
+                    status.start_time_sync_attempted = False
+                    db.commit()
+                    self._logger.debug(f"Reset FTP sync flag for {unit_id} (measurement stopped)")
+                    log_device_event(unit_id, "DEBUG", "STATE", "Measurement stopped, reset FTP sync flag", db)
+
+            # Attempt FTP sync if:
+            # - Device is measuring
+            # - No start time recorded
+            # - FTP sync not already attempted for this measurement
+            # - FTP is configured
+            if (status and
+                status.measurement_state == "Start" and
+                status.measurement_start_time is None and
+                not status.start_time_sync_attempted and
+                cfg.ftp_enabled and
+                cfg.ftp_username and
+                cfg.ftp_password):
+
+                self._logger.info(
+                    f"Device {unit_id} is measuring but has no start time - "
+                    f"attempting FTP sync"
+                )
+                log_device_event(unit_id, "INFO", "SYNC", "Attempting FTP sync for measurement start time", db)
+
+                # Mark that we attempted sync (prevents repeated attempts on failure)
+                status.start_time_sync_attempted = True
+                db.commit()
+
+                try:
+                    synced = await sync_measurement_start_time_from_ftp(
+                        unit_id=unit_id,
+                        host=cfg.host,
+                        tcp_port=cfg.tcp_port,
+                        ftp_port=cfg.ftp_port or 21,
+                        ftp_username=cfg.ftp_username,
+                        ftp_password=cfg.ftp_password,
+                        db=db
+                    )
+                    if synced:
+                        self._logger.info(f"✓ FTP sync succeeded for {unit_id}")
+                        log_device_event(unit_id, "INFO", "SYNC", "FTP sync succeeded - measurement start time updated", db)
+                    else:
+                        self._logger.warning(f"FTP sync returned False for {unit_id}")
+                        log_device_event(unit_id, "WARNING", "SYNC", "FTP sync returned False", db)
+                except Exception as sync_err:
+                    self._logger.warning(
+                        f"FTP sync failed for {unit_id}: {sync_err}"
+                    )
+                    log_device_event(unit_id, "ERROR", "SYNC", f"FTP sync failed: {sync_err}", db)
+
        except Exception as e:
            # Failure - increment counter and potentially mark offline
            status.consecutive_failures += 1
@@ -217,11 +293,13 @@ class BackgroundPoller:
                    self._logger.warning(
                        f"Device {unit_id} marked unreachable after {status.consecutive_failures} failures: {error_msg}"
                    )
+                    log_device_event(unit_id, "ERROR", "POLL", f"Device marked UNREACHABLE after {status.consecutive_failures} failures: {error_msg}", db)
                status.is_reachable = False
            else:
                self._logger.warning(
                    f"Poll failed for {unit_id} (attempt {status.consecutive_failures}/3): {error_msg}"
                )
+                log_device_event(unit_id, "WARNING", "POLL", f"Poll failed (attempt {status.consecutive_failures}/3): {error_msg}", db)

            db.commit()

--- a/app/device_logger.py
+++ b/app/device_logger.py
@@ -0,0 +1,277 @@
+"""
+Per-device logging system.
+
+Provides dual output: database entries for structured queries and file logs for backup.
+Each device gets its own log file in data/logs/{unit_id}.log with rotation.
+"""
+
+import logging
+import os
+from datetime import datetime, timedelta
+from logging.handlers import RotatingFileHandler
+from pathlib import Path
+from typing import Optional
+
+from sqlalchemy.orm import Session
+
+from app.database import SessionLocal
+from app.models import DeviceLog
+
+# Configure base logger
+logger = logging.getLogger(__name__)
+
+# Log directory (persisted in Docker volume)
+LOG_DIR = Path(os.path.dirname(os.path.dirname(__file__))) / "data" / "logs"
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+# Per-device file loggers (cached)
+_device_file_loggers: dict = {}
+
+# Log retention (days)
+LOG_RETENTION_DAYS = int(os.getenv("LOG_RETENTION_DAYS", "7"))
+
+
+def _get_file_logger(unit_id: str) -> logging.Logger:
+    """Get or create a file logger for a specific device."""
+    if unit_id in _device_file_loggers:
+        return _device_file_loggers[unit_id]
+
+    # Create device-specific logger
+    device_logger = logging.getLogger(f"device.{unit_id}")
+    device_logger.setLevel(logging.DEBUG)
+
+    # Avoid duplicate handlers
+    if not device_logger.handlers:
+        # Create rotating file handler (5 MB max, keep 3 backups)
+        log_file = LOG_DIR / f"{unit_id}.log"
+        handler = RotatingFileHandler(
+            log_file,
+            maxBytes=5 * 1024 * 1024,  # 5 MB
+            backupCount=3,
+            encoding="utf-8"
+        )
+        handler.setLevel(logging.DEBUG)
+
+        # Format: timestamp [LEVEL] [CATEGORY] message
+        formatter = logging.Formatter(
+            "%(asctime)s [%(levelname)s] [%(category)s] %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S"
+        )
+        handler.setFormatter(formatter)
+        device_logger.addHandler(handler)
+
+        # Don't propagate to root logger
+        device_logger.propagate = False
+
+    _device_file_loggers[unit_id] = device_logger
+    return device_logger
+
+
+def log_device_event(
+    unit_id: str,
+    level: str,
+    category: str,
+    message: str,
+    db: Optional[Session] = None
+):
+    """
+    Log an event for a specific device.
+
+    Writes to both:
+    1. Database (DeviceLog table) for structured queries
+    2. File (data/logs/{unit_id}.log) for backup/debugging
+
+    Args:
+        unit_id: Device identifier
+        level: Log level (DEBUG, INFO, WARNING, ERROR)
+        category: Event category (TCP, FTP, POLL, COMMAND, STATE, SYNC)
+        message: Log message
+        db: Optional database session (creates one if not provided)
+    """
+    timestamp = datetime.utcnow()
+
+    # Write to file log
+    try:
+        file_logger = _get_file_logger(unit_id)
+        log_func = getattr(file_logger, level.lower(), file_logger.info)
+        # Pass category as extra for formatter
+        log_func(message, extra={"category": category})
+    except Exception as e:
+        logger.warning(f"Failed to write file log for {unit_id}: {e}")
+
+    # Write to database
+    close_db = False
+    try:
+        if db is None:
+            db = SessionLocal()
+            close_db = True
+
+        log_entry = DeviceLog(
+            unit_id=unit_id,
+            timestamp=timestamp,
+            level=level.upper(),
+            category=category.upper(),
+            message=message
+        )
+        db.add(log_entry)
+        db.commit()
+
+    except Exception as e:
+        logger.warning(f"Failed to write DB log for {unit_id}: {e}")
+        if db:
+            db.rollback()
+    finally:
+        if close_db and db:
+            db.close()
+
+
+def cleanup_old_logs(retention_days: Optional[int] = None, db: Optional[Session] = None):
+    """
+    Delete log entries older than retention period.
+
+    Args:
+        retention_days: Days to retain (default: LOG_RETENTION_DAYS env var or 7)
+        db: Optional database session
+    """
+    if retention_days is None:
+        retention_days = LOG_RETENTION_DAYS
+
+    cutoff = datetime.utcnow() - timedelta(days=retention_days)
+
+    close_db = False
+    try:
+        if db is None:
+            db = SessionLocal()
+            close_db = True
+
+        deleted = db.query(DeviceLog).filter(DeviceLog.timestamp < cutoff).delete()
+        db.commit()
+
+        if deleted > 0:
+            logger.info(f"Cleaned up {deleted} log entries older than {retention_days} days")
+
+    except Exception as e:
+        logger.error(f"Failed to cleanup old logs: {e}")
+        if db:
+            db.rollback()
+    finally:
+        if close_db and db:
+            db.close()
+
+
+def get_device_logs(
+    unit_id: str,
+    limit: int = 100,
+    offset: int = 0,
+    level: Optional[str] = None,
+    category: Optional[str] = None,
+    since: Optional[datetime] = None,
+    db: Optional[Session] = None
+) -> list:
+    """
+    Query log entries for a specific device.
+
+    Args:
+        unit_id: Device identifier
+        limit: Max entries to return (default: 100)
+        offset: Number of entries to skip (default: 0)
+        level: Filter by level (DEBUG, INFO, WARNING, ERROR)
+        category: Filter by category (TCP, FTP, POLL, COMMAND, STATE, SYNC)
+        since: Filter entries after this timestamp
+        db: Optional database session
+
+    Returns:
+        List of log entries as dicts
+    """
+    close_db = False
+    try:
+        if db is None:
+            db = SessionLocal()
+            close_db = True
+
+        query = db.query(DeviceLog).filter(DeviceLog.unit_id == unit_id)
+
+        if level:
+            query = query.filter(DeviceLog.level == level.upper())
+        if category:
+            query = query.filter(DeviceLog.category == category.upper())
+        if since:
+            query = query.filter(DeviceLog.timestamp >= since)
+
+        # Order by newest first
+        query = query.order_by(DeviceLog.timestamp.desc())
+
+        # Apply pagination
+        entries = query.offset(offset).limit(limit).all()
+
+        return [
+            {
+                "id": e.id,
+                "timestamp": e.timestamp.isoformat() + "Z",
+                "level": e.level,
+                "category": e.category,
+                "message": e.message
+            }
+            for e in entries
+        ]
+
+    finally:
+        if close_db and db:
+            db.close()
+
+
+def get_log_stats(unit_id: str, db: Optional[Session] = None) -> dict:
+    """
+    Get log statistics for a device.
+
+    Returns:
+        Dict with counts by level and category
+    """
+    close_db = False
+    try:
+        if db is None:
+            db = SessionLocal()
+            close_db = True
+
+        total = db.query(DeviceLog).filter(DeviceLog.unit_id == unit_id).count()
+
+        # Count by level
+        level_counts = {}
+        for level in ["DEBUG", "INFO", "WARNING", "ERROR"]:
+            count = db.query(DeviceLog).filter(
+                DeviceLog.unit_id == unit_id,
+                DeviceLog.level == level
+            ).count()
+            if count > 0:
+                level_counts[level] = count
+
+        # Count by category
+        category_counts = {}
+        for category in ["TCP", "FTP", "POLL", "COMMAND", "STATE", "SYNC", "GENERAL"]:
+            count = db.query(DeviceLog).filter(
+                DeviceLog.unit_id == unit_id,
+                DeviceLog.category == category
+            ).count()
+            if count > 0:
+                category_counts[category] = count
+
+        # Get oldest and newest
+        oldest = db.query(DeviceLog).filter(
+            DeviceLog.unit_id == unit_id
+        ).order_by(DeviceLog.timestamp.asc()).first()
+
+        newest = db.query(DeviceLog).filter(
+            DeviceLog.unit_id == unit_id
+        ).order_by(DeviceLog.timestamp.desc()).first()
+
+        return {
+            "total": total,
+            "by_level": level_counts,
+            "by_category": category_counts,
+            "oldest": oldest.timestamp.isoformat() + "Z" if oldest else None,
+            "newest": newest.timestamp.isoformat() + "Z" if newest else None
+        }
+
+    finally:
+        if close_db and db:
+            db.close()
--- a/app/models.py
+++ b/app/models.py
@@ -53,3 +53,22 @@ class NL43Status(Base):
    last_poll_attempt = Column(DateTime, nullable=True)  # Last time background poller attempted to poll
    last_success = Column(DateTime, nullable=True)  # Last successful poll timestamp
    last_error = Column(Text, nullable=True)  # Last error message (truncated to 500 chars)
+
+    # FTP start time sync tracking
+    start_time_sync_attempted = Column(Boolean, default=False)  # True if FTP sync was attempted for current measurement
+
+
+class DeviceLog(Base):
+    """
+    Per-device log entries for debugging and audit trail.
+    Stores events like commands, state changes, errors, and FTP operations.
+    """
+
+    __tablename__ = "device_logs"
+
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    unit_id = Column(String, index=True, nullable=False)
+    timestamp = Column(DateTime, default=func.now(), index=True)
+    level = Column(String, default="INFO")  # DEBUG, INFO, WARNING, ERROR
+    category = Column(String, default="GENERAL")  # TCP, FTP, POLL, COMMAND, STATE, SYNC
+    message = Column(Text, nullable=False)
--- a/app/routers.py
+++ b/app/routers.py
@@ -3,6 +3,7 @@ from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session
 from datetime import datetime
 from pydantic import BaseModel, field_validator, Field
+from typing import Optional
 import logging
 import ipaddress
 import json
@@ -1842,9 +1843,134 @@ async def run_diagnostics(unit_id: str, db: Session = Depends(get_db)):

    # All tests passed
    diagnostics["overall_status"] = "pass"
+
+    # Add database dump: config and status cache
+    diagnostics["database_dump"] = {
+        "config": {
+            "unit_id": cfg.unit_id,
+            "host": cfg.host,
+            "tcp_port": cfg.tcp_port,
+            "tcp_enabled": cfg.tcp_enabled,
+            "ftp_enabled": cfg.ftp_enabled,
+            "ftp_port": cfg.ftp_port,
+            "ftp_username": cfg.ftp_username,
+            "ftp_password": "***" if cfg.ftp_password else None,  # Mask password
+            "web_enabled": cfg.web_enabled,
+            "poll_interval_seconds": cfg.poll_interval_seconds,
+            "poll_enabled": cfg.poll_enabled
+        },
+        "status_cache": None
+    }
+
+    # Get cached status if available
+    status = db.query(NL43Status).filter_by(unit_id=unit_id).first()
+    if status:
+        # Helper to format datetime as ISO with Z suffix to indicate UTC
+        def to_utc_iso(dt):
+            return dt.isoformat() + 'Z' if dt else None
+
+        diagnostics["database_dump"]["status_cache"] = {
+            "unit_id": status.unit_id,
+            "last_seen": to_utc_iso(status.last_seen),
+            "measurement_state": status.measurement_state,
+            "measurement_start_time": to_utc_iso(status.measurement_start_time),
+            "counter": status.counter,
+            "lp": status.lp,
+            "leq": status.leq,
+            "lmax": status.lmax,
+            "lmin": status.lmin,
+            "lpeak": status.lpeak,
+            "battery_level": status.battery_level,
+            "power_source": status.power_source,
+            "sd_remaining_mb": status.sd_remaining_mb,
+            "sd_free_ratio": status.sd_free_ratio,
+            "is_reachable": status.is_reachable,
+            "consecutive_failures": status.consecutive_failures,
+            "last_poll_attempt": to_utc_iso(status.last_poll_attempt),
+            "last_success": to_utc_iso(status.last_success),
+            "last_error": status.last_error,
+            "raw_payload": status.raw_payload
+        }
+
    return diagnostics


+# ============================================================================
+# DEVICE LOGS ENDPOINTS
+# ============================================================================
+
+@router.get("/{unit_id}/logs")
+def get_device_logs(
+    unit_id: str,
+    limit: int = 100,
+    offset: int = 0,
+    level: Optional[str] = None,
+    category: Optional[str] = None,
+    db: Session = Depends(get_db)
+):
+    """
+    Get log entries for a specific device.
+
+    Query parameters:
+    - limit: Max entries to return (default: 100, max: 1000)
+    - offset: Number of entries to skip (for pagination)
+    - level: Filter by level (DEBUG, INFO, WARNING, ERROR)
+    - category: Filter by category (TCP, FTP, POLL, COMMAND, STATE, SYNC)
+
+    Returns newest entries first.
+    """
+    from app.device_logger import get_device_logs as fetch_logs, get_log_stats
+
+    # Validate limit
+    limit = min(limit, 1000)
+
+    logs = fetch_logs(
+        unit_id=unit_id,
+        limit=limit,
+        offset=offset,
+        level=level,
+        category=category,
+        db=db
+    )
+
+    stats = get_log_stats(unit_id, db)
+
+    return {
+        "status": "ok",
+        "unit_id": unit_id,
+        "logs": logs,
+        "count": len(logs),
+        "stats": stats,
+        "filters": {
+            "level": level,
+            "category": category
+        },
+        "pagination": {
+            "limit": limit,
+            "offset": offset
+        }
+    }
+
+
+@router.delete("/{unit_id}/logs")
+def clear_device_logs(unit_id: str, db: Session = Depends(get_db)):
+    """
+    Clear all log entries for a specific device.
+    """
+    from app.models import DeviceLog
+
+    deleted = db.query(DeviceLog).filter(DeviceLog.unit_id == unit_id).delete()
+    db.commit()
+
+    logger.info(f"Cleared {deleted} log entries for device {unit_id}")
+
+    return {
+        "status": "ok",
+        "message": f"Cleared {deleted} log entries for {unit_id}",
+        "deleted_count": deleted
+    }
+
+
 # ============================================================================
 # BACKGROUND POLLING CONFIGURATION ENDPOINTS
 # ============================================================================
--- a/app/services.py
+++ b/app/services.py
@@ -76,10 +76,22 @@ def persist_snapshot(s: NL43Snapshot, db: Session):
            # Measurement just started - record the start time
            row.measurement_start_time = datetime.utcnow()
            logger.info(f"✓ Measurement started on {s.unit_id} at {row.measurement_start_time}")
+            # Log state change (lazy import to avoid circular dependency)
+            try:
+                from app.device_logger import log_device_event
+                log_device_event(s.unit_id, "INFO", "STATE", f"Measurement STARTED at {row.measurement_start_time}", db)
+            except Exception:
+                pass
        elif was_measuring and not is_measuring:
            # Measurement stopped - clear the start time
            row.measurement_start_time = None
            logger.info(f"✓ Measurement stopped on {s.unit_id}")
+            # Log state change
+            try:
+                from app.device_logger import log_device_event
+                log_device_event(s.unit_id, "INFO", "STATE", "Measurement STOPPED", db)
+            except Exception:
+                pass

        row.measurement_state = new_state
        row.counter = s.counter
@@ -101,6 +113,109 @@ def persist_snapshot(s: NL43Snapshot, db: Session):
        raise


+async def sync_measurement_start_time_from_ftp(
+    unit_id: str,
+    host: str,
+    tcp_port: int,
+    ftp_port: int,
+    ftp_username: str,
+    ftp_password: str,
+    db: Session
+) -> bool:
+    """
+    Sync measurement start time from the FTP folder timestamp.
+
+    This is called when SLMM detects a device is already measuring but doesn't
+    have a recorded start time (e.g., after service restart or if measurement
+    was started before SLMM began polling).
+
+    The workflow:
+    1. Disable FTP (reset)
+    2. Enable FTP
+    3. List NL-43 folder to get measurement folder timestamps
+    4. Use the most recent folder's timestamp as the start time
+    5. Update the database
+
+    Args:
+        unit_id: Device identifier
+        host: Device IP/hostname
+        tcp_port: TCP control port
+        ftp_port: FTP port (usually 21)
+        ftp_username: FTP username (usually "USER")
+        ftp_password: FTP password (usually "0000")
+        db: Database session
+
+    Returns:
+        True if sync succeeded, False otherwise
+    """
+    logger.info(f"[FTP-SYNC] Attempting to sync measurement start time for {unit_id} via FTP")
+
+    client = NL43Client(
+        host, tcp_port,
+        ftp_username=ftp_username,
+        ftp_password=ftp_password,
+        ftp_port=ftp_port
+    )
+
+    try:
+        # Step 1: Disable FTP to reset it
+        logger.info(f"[FTP-SYNC] Step 1: Disabling FTP on {unit_id}")
+        await client.disable_ftp()
+        await asyncio.sleep(1.5)  # Wait for device to process
+
+        # Step 2: Enable FTP
+        logger.info(f"[FTP-SYNC] Step 2: Enabling FTP on {unit_id}")
+        await client.enable_ftp()
+        await asyncio.sleep(2.0)  # Wait for FTP server to start
+
+        # Step 3: List NL-43 folder
+        logger.info(f"[FTP-SYNC] Step 3: Listing /NL-43 folder on {unit_id}")
+        files = await client.list_ftp_files("/NL-43")
+
+        # Filter for directories only (measurement folders)
+        folders = [f for f in files if f.get('is_dir', False)]
+
+        if not folders:
+            logger.warning(f"[FTP-SYNC] No measurement folders found on {unit_id}")
+            return False
+
+        # Sort by modified timestamp (newest first)
+        folders.sort(key=lambda f: f.get('modified_timestamp', ''), reverse=True)
+
+        latest_folder = folders[0]
+        folder_name = latest_folder['name']
+        logger.info(f"[FTP-SYNC] Found latest measurement folder: {folder_name}")
+
+        # Step 4: Parse timestamp
+        if 'modified_timestamp' in latest_folder and latest_folder['modified_timestamp']:
+            timestamp_str = latest_folder['modified_timestamp']
+            # Parse ISO format timestamp (already in UTC from SLMM FTP listing)
+            start_time = datetime.fromisoformat(timestamp_str.replace('Z', ''))
+
+            # Step 5: Update database
+            status = db.query(NL43Status).filter_by(unit_id=unit_id).first()
+            if status:
+                old_time = status.measurement_start_time
+                status.measurement_start_time = start_time
+                db.commit()
+
+                logger.info(f"[FTP-SYNC] ✓ Successfully synced start time for {unit_id}")
+                logger.info(f"[FTP-SYNC]   Folder: {folder_name}")
+                logger.info(f"[FTP-SYNC]   Old start time: {old_time}")
+                logger.info(f"[FTP-SYNC]   New start time: {start_time}")
+                return True
+            else:
+                logger.warning(f"[FTP-SYNC] Status record not found for {unit_id}")
+                return False
+        else:
+            logger.warning(f"[FTP-SYNC] Could not parse timestamp from folder {folder_name}")
+            return False
+
+    except Exception as e:
+        logger.error(f"[FTP-SYNC] Failed to sync start time for {unit_id}: {e}")
+        return False
+
+
 # Rate limiting: NL43 requires ≥1 second between commands
 _last_command_time = {}
 _rate_limit_lock = asyncio.Lock()