v0.3.0, persistent polling update. Persistent TCP connection pool with all features Connection pool diagnostics (API + UI) All 6 new environment variables Changes to health check, diagnostics, and DRD streaming Technical architecture details and cellular #2

Merged
serversdown merged 3 commits from dev-persistent into main 2026-02-16 21:57:38 -05:00
4 changed files with 352 additions and 99 deletions
Showing only changes of commit a5f8d1b2c7 - Show all commits

View File

@@ -92,10 +92,14 @@ async def health():
@app.get("/health/devices") @app.get("/health/devices")
async def health_devices(): async def health_devices():
"""Enhanced health check that tests device connectivity.""" """Enhanced health check that tests device connectivity.
Uses the connection pool to avoid unnecessary TCP handshakes — if a
cached connection exists and is alive, the device is reachable.
"""
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.database import SessionLocal from app.database import SessionLocal
from app.services import NL43Client from app.services import _connection_pool
from app.models import NL43Config from app.models import NL43Config
db: Session = SessionLocal() db: Session = SessionLocal()
@@ -105,7 +109,7 @@ async def health_devices():
configs = db.query(NL43Config).filter_by(tcp_enabled=True).all() configs = db.query(NL43Config).filter_by(tcp_enabled=True).all()
for cfg in configs: for cfg in configs:
client = NL43Client(cfg.host, cfg.tcp_port, timeout=2.0, ftp_username=cfg.ftp_username, ftp_password=cfg.ftp_password) device_key = f"{cfg.host}:{cfg.tcp_port}"
status = { status = {
"unit_id": cfg.unit_id, "unit_id": cfg.unit_id,
"host": cfg.host, "host": cfg.host,
@@ -115,14 +119,22 @@ async def health_devices():
} }
try: try:
# Try to connect (don't send command to avoid rate limiting issues) # Check if pool already has a live connection (zero-cost check)
import asyncio pool_stats = _connection_pool.get_stats()
reader, writer = await asyncio.wait_for( conn_info = pool_stats["connections"].get(device_key)
asyncio.open_connection(cfg.host, cfg.tcp_port), timeout=2.0 if conn_info and conn_info["alive"]:
) status["reachable"] = True
writer.close() status["source"] = "pool"
await writer.wait_closed() else:
status["reachable"] = True # No cached connection — do a lightweight acquire/release
# This opens a connection if needed but keeps it in the pool
import asyncio
reader, writer, from_cache = await _connection_pool.acquire(
device_key, cfg.host, cfg.tcp_port, timeout=2.0
)
await _connection_pool.release(device_key, reader, writer, cfg.host, cfg.tcp_port)
status["reachable"] = True
status["source"] = "cached" if from_cache else "new"
except Exception as e: except Exception as e:
status["error"] = str(type(e).__name__) status["error"] = str(type(e).__name__)
logger.warning(f"Device {cfg.unit_id} health check failed: {e}") logger.warning(f"Device {cfg.unit_id} health check failed: {e}")

View File

@@ -1755,74 +1755,38 @@ async def run_diagnostics(unit_id: str, db: Session = Depends(get_db)):
"message": "TCP communication enabled" "message": "TCP communication enabled"
} }
# Test 3: Modem/Router reachable (check port 443 HTTPS) # Test 3: TCP connection reachable (device port) — uses connection pool
# This avoids extra TCP handshakes over cellular. If a cached connection
# exists and is alive, we skip the handshake entirely.
from app.services import _connection_pool
device_key = f"{cfg.host}:{cfg.tcp_port}"
try: try:
reader, writer = await asyncio.wait_for( pool_stats = _connection_pool.get_stats()
asyncio.open_connection(cfg.host, 443), timeout=3.0 conn_info = pool_stats["connections"].get(device_key)
) if conn_info and conn_info["alive"]:
writer.close() # Pool already has a live connection — device is reachable
await writer.wait_closed() diagnostics["tests"]["tcp_connection"] = {
diagnostics["tests"]["modem_reachable"] = { "status": "pass",
"status": "pass", "message": f"TCP connection alive in pool for {cfg.host}:{cfg.tcp_port}"
"message": f"Modem/router reachable at {cfg.host}" }
} else:
except asyncio.TimeoutError: # Acquire through the pool (opens new if needed, keeps it cached)
diagnostics["tests"]["modem_reachable"] = { reader, writer, from_cache = await _connection_pool.acquire(
"status": "fail", device_key, cfg.host, cfg.tcp_port, timeout=3.0
"message": f"Modem/router timeout at {cfg.host} (network issue)" )
} await _connection_pool.release(device_key, reader, writer, cfg.host, cfg.tcp_port)
diagnostics["overall_status"] = "fail" diagnostics["tests"]["tcp_connection"] = {
return diagnostics "status": "pass",
except ConnectionRefusedError: "message": f"TCP connection successful to {cfg.host}:{cfg.tcp_port}"
# Connection refused means host is up but port 443 closed - that's ok }
diagnostics["tests"]["modem_reachable"] = {
"status": "pass",
"message": f"Modem/router reachable at {cfg.host} (HTTPS closed)"
}
except Exception as e:
diagnostics["tests"]["modem_reachable"] = {
"status": "fail",
"message": f"Cannot reach modem/router at {cfg.host}: {str(e)}"
}
diagnostics["overall_status"] = "fail"
return diagnostics
# Test 4: TCP connection reachable (device port)
try:
reader, writer = await asyncio.wait_for(
asyncio.open_connection(cfg.host, cfg.tcp_port), timeout=3.0
)
writer.close()
await writer.wait_closed()
diagnostics["tests"]["tcp_connection"] = {
"status": "pass",
"message": f"TCP connection successful to {cfg.host}:{cfg.tcp_port}"
}
except asyncio.TimeoutError:
diagnostics["tests"]["tcp_connection"] = {
"status": "fail",
"message": f"Connection timeout to {cfg.host}:{cfg.tcp_port}"
}
diagnostics["overall_status"] = "fail"
return diagnostics
except ConnectionRefusedError:
diagnostics["tests"]["tcp_connection"] = {
"status": "fail",
"message": f"Connection refused by {cfg.host}:{cfg.tcp_port}"
}
diagnostics["overall_status"] = "fail"
return diagnostics
except Exception as e: except Exception as e:
diagnostics["tests"]["tcp_connection"] = { diagnostics["tests"]["tcp_connection"] = {
"status": "fail", "status": "fail",
"message": f"Connection error: {str(e)}" "message": f"Connection error to {cfg.host}:{cfg.tcp_port}: {str(e)}"
} }
diagnostics["overall_status"] = "fail" diagnostics["overall_status"] = "fail"
return diagnostics return diagnostics
# Wait a bit after connection test to let device settle
await asyncio.sleep(1.5)
# Test 5: Device responds to commands # Test 5: Device responds to commands
# Use longer timeout to account for rate limiting (device requires ≥1s between commands) # Use longer timeout to account for rate limiting (device requires ≥1s between commands)
client = NL43Client(cfg.host, cfg.tcp_port, timeout=10.0, ftp_username=cfg.ftp_username, ftp_password=cfg.ftp_password) client = NL43Client(cfg.host, cfg.tcp_port, timeout=10.0, ftp_username=cfg.ftp_username, ftp_password=cfg.ftp_password)

View File

@@ -242,8 +242,8 @@ async def _get_device_lock(device_key: str) -> asyncio.Lock:
# Configuration via environment variables # Configuration via environment variables
TCP_PERSISTENT_ENABLED = os.getenv("TCP_PERSISTENT_ENABLED", "true").lower() == "true" TCP_PERSISTENT_ENABLED = os.getenv("TCP_PERSISTENT_ENABLED", "true").lower() == "true"
TCP_IDLE_TTL = float(os.getenv("TCP_IDLE_TTL", "120")) # Close idle connections after N seconds TCP_IDLE_TTL = float(os.getenv("TCP_IDLE_TTL", "300")) # Close idle connections after N seconds
TCP_MAX_AGE = float(os.getenv("TCP_MAX_AGE", "300")) # Force reconnect after N seconds TCP_MAX_AGE = float(os.getenv("TCP_MAX_AGE", "1800")) # Force reconnect after N seconds
TCP_KEEPALIVE_IDLE = int(os.getenv("TCP_KEEPALIVE_IDLE", "15")) # Seconds idle before probes TCP_KEEPALIVE_IDLE = int(os.getenv("TCP_KEEPALIVE_IDLE", "15")) # Seconds idle before probes
TCP_KEEPALIVE_INTERVAL = int(os.getenv("TCP_KEEPALIVE_INTERVAL", "10")) # Seconds between probes TCP_KEEPALIVE_INTERVAL = int(os.getenv("TCP_KEEPALIVE_INTERVAL", "10")) # Seconds between probes
TCP_KEEPALIVE_COUNT = int(os.getenv("TCP_KEEPALIVE_COUNT", "3")) # Failed probes before dead TCP_KEEPALIVE_COUNT = int(os.getenv("TCP_KEEPALIVE_COUNT", "3")) # Failed probes before dead

View File

@@ -3,7 +3,7 @@
<head> <head>
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>SLMM Roster - Sound Level Meter Configuration</title> <title>SLMM - Device Roster &amp; Connections</title>
<style> <style>
* { box-sizing: border-box; } * { box-sizing: border-box; }
body { body {
@@ -227,41 +227,165 @@
} }
.toast-success { background: #2da44e; } .toast-success { background: #2da44e; }
.toast-error { background: #cf222e; } .toast-error { background: #cf222e; }
/* Tabs */
.tabs {
display: flex;
gap: 0;
margin-bottom: 0;
border-bottom: 2px solid #d0d7de;
}
.tab-btn {
padding: 10px 20px;
border: none;
background: none;
cursor: pointer;
font-size: 14px;
font-weight: 600;
color: #57606a;
border-bottom: 2px solid transparent;
margin-bottom: -2px;
transition: color 0.2s, border-color 0.2s;
}
.tab-btn:hover { color: #24292f; }
.tab-btn.active {
color: #24292f;
border-bottom-color: #fd8c73;
}
.tab-panel { display: none; }
.tab-panel.active { display: block; }
/* Connection pool panel */
.pool-config {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 12px;
margin-bottom: 20px;
}
.pool-config-card {
background: #f6f8fa;
border: 1px solid #d0d7de;
border-radius: 6px;
padding: 12px;
}
.pool-config-card .label {
font-size: 11px;
color: #57606a;
text-transform: uppercase;
font-weight: 600;
margin-bottom: 4px;
}
.pool-config-card .value {
font-size: 18px;
font-weight: 600;
color: #24292f;
}
.conn-card {
background: white;
border: 1px solid #d0d7de;
border-radius: 6px;
padding: 16px;
margin-bottom: 12px;
}
.conn-card-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 12px;
}
.conn-card-header strong { font-size: 15px; }
.conn-card-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(140px, 1fr));
gap: 8px;
}
.conn-stat .label {
font-size: 11px;
color: #57606a;
text-transform: uppercase;
font-weight: 600;
}
.conn-stat .value {
font-size: 14px;
font-weight: 600;
color: #24292f;
}
.conn-empty {
text-align: center;
padding: 32px;
color: #57606a;
}
.pool-actions {
display: flex;
gap: 8px;
margin-bottom: 16px;
}
</style> </style>
</head> </head>
<body> <body>
<div class="container"> <div class="container">
<div class="header"> <div class="header">
<h1>📊 Sound Level Meter Roster</h1> <h1>SLMM - Roster &amp; Connections</h1>
<div class="nav"> <div class="nav">
<a href="/" class="btn"> Back to Control Panel</a> <a href="/" class="btn">&larr; Back to Control Panel</a>
<button class="btn btn-primary" onclick="openAddModal()">+ Add Device</button> <button class="btn btn-primary" onclick="openAddModal()">+ Add Device</button>
</div> </div>
</div> </div>
<div class="table-container"> <div class="tabs">
<table id="rosterTable"> <button class="tab-btn active" onclick="switchTab('roster')">Device Roster</button>
<thead> <button class="tab-btn" onclick="switchTab('connections')">Connections</button>
<tr> </div>
<th>Unit ID</th>
<th>Host / IP</th> <!-- Roster Tab -->
<th>TCP Port</th> <div id="tab-roster" class="tab-panel active">
<th>FTP Port</th> <div class="table-container" style="border-top-left-radius: 0; border-top-right-radius: 0;">
<th class="checkbox-cell">TCP</th> <table id="rosterTable">
<th class="checkbox-cell">FTP</th> <thead>
<th class="checkbox-cell">Polling</th> <tr>
<th>Status</th> <th>Unit ID</th>
<th class="actions-cell">Actions</th> <th>Host / IP</th>
</tr> <th>TCP Port</th>
</thead> <th>FTP Port</th>
<tbody id="rosterBody"> <th class="checkbox-cell">TCP</th>
<tr> <th class="checkbox-cell">FTP</th>
<td colspan="9" style="text-align: center; padding: 24px;"> <th class="checkbox-cell">Polling</th>
Loading... <th>Status</th>
</td> <th class="actions-cell">Actions</th>
</tr> </tr>
</tbody> </thead>
</table> <tbody id="rosterBody">
<tr>
<td colspan="9" style="text-align: center; padding: 24px;">
Loading...
</td>
</tr>
</tbody>
</table>
</div>
</div>
<!-- Connections Tab -->
<div id="tab-connections" class="tab-panel">
<div class="table-container" style="padding: 20px; border-top-left-radius: 0; border-top-right-radius: 0;">
<div class="pool-actions">
<button class="btn" onclick="loadConnections()">Refresh</button>
<button class="btn btn-danger" onclick="flushConnections()">Flush All Connections</button>
</div>
<h3 style="margin: 0 0 12px 0; font-size: 16px;">Pool Configuration</h3>
<div id="poolConfig" class="pool-config">
<div class="pool-config-card">
<div class="label">Status</div>
<div class="value" id="poolEnabled">--</div>
</div>
</div>
<h3 style="margin: 20px 0 12px 0; font-size: 16px;">Active Connections</h3>
<div id="connectionsList">
<div class="conn-empty">Loading...</div>
</div>
</div>
</div> </div>
</div> </div>
@@ -619,6 +743,159 @@
closeModal(); closeModal();
} }
}); });
// ========== Tab Switching ==========
function switchTab(tabName) {
document.querySelectorAll('.tab-btn').forEach(btn => btn.classList.remove('active'));
document.querySelectorAll('.tab-panel').forEach(panel => panel.classList.remove('active'));
document.querySelector(`.tab-btn[onclick="switchTab('${tabName}')"]`).classList.add('active');
document.getElementById(`tab-${tabName}`).classList.add('active');
if (tabName === 'connections') {
loadConnections();
}
}
// ========== Connection Pool ==========
let connectionsRefreshTimer = null;
async function loadConnections() {
try {
const res = await fetch('/api/nl43/_connections/status');
const data = await res.json();
if (!res.ok) {
showToast('Failed to load connection pool status', 'error');
return;
}
const pool = data.pool;
renderPoolConfig(pool);
renderConnections(pool.connections);
// Auto-refresh while tab is active
clearTimeout(connectionsRefreshTimer);
if (document.getElementById('tab-connections').classList.contains('active')) {
connectionsRefreshTimer = setTimeout(loadConnections, 5000);
}
} catch (err) {
showToast('Error loading connections: ' + err.message, 'error');
console.error('Load connections error:', err);
}
}
function renderPoolConfig(pool) {
document.getElementById('poolConfig').innerHTML = `
<div class="pool-config-card">
<div class="label">Persistent</div>
<div class="value" style="color: ${pool.enabled ? '#1a7f37' : '#cf222e'}">${pool.enabled ? 'Enabled' : 'Disabled'}</div>
</div>
<div class="pool-config-card">
<div class="label">Active</div>
<div class="value">${pool.active_connections}</div>
</div>
<div class="pool-config-card">
<div class="label">Idle TTL</div>
<div class="value">${pool.idle_ttl}s</div>
</div>
<div class="pool-config-card">
<div class="label">Max Age</div>
<div class="value">${pool.max_age}s</div>
</div>
<div class="pool-config-card">
<div class="label">KA Idle</div>
<div class="value">${pool.keepalive_idle}s</div>
</div>
<div class="pool-config-card">
<div class="label">KA Interval</div>
<div class="value">${pool.keepalive_interval}s</div>
</div>
<div class="pool-config-card">
<div class="label">KA Probes</div>
<div class="value">${pool.keepalive_count}</div>
</div>
`;
}
function renderConnections(connections) {
const container = document.getElementById('connectionsList');
const keys = Object.keys(connections);
if (keys.length === 0) {
container.innerHTML = `
<div class="conn-empty">
<div style="font-size: 32px; margin-bottom: 8px;">~</div>
<div><strong>No active connections</strong></div>
<div style="margin-top: 4px; font-size: 13px;">
Connections appear here when devices are actively being polled and the connection is cached between commands.
</div>
</div>
`;
return;
}
container.innerHTML = keys.map(key => {
const conn = connections[key];
const aliveColor = conn.alive ? '#1a7f37' : '#cf222e';
const aliveText = conn.alive ? 'Alive' : 'Stale';
return `
<div class="conn-card">
<div class="conn-card-header">
<strong>${escapeHtml(key)}</strong>
<span class="status-badge ${conn.alive ? 'status-ok' : 'status-error'}">${aliveText}</span>
</div>
<div class="conn-card-grid">
<div class="conn-stat">
<div class="label">Host</div>
<div class="value">${escapeHtml(conn.host)}</div>
</div>
<div class="conn-stat">
<div class="label">Port</div>
<div class="value">${conn.port}</div>
</div>
<div class="conn-stat">
<div class="label">Age</div>
<div class="value">${formatSeconds(conn.age_seconds)}</div>
</div>
<div class="conn-stat">
<div class="label">Idle</div>
<div class="value">${formatSeconds(conn.idle_seconds)}</div>
</div>
</div>
</div>
`;
}).join('');
}
function formatSeconds(s) {
if (s < 60) return Math.round(s) + 's';
if (s < 3600) return Math.floor(s / 60) + 'm ' + Math.round(s % 60) + 's';
return Math.floor(s / 3600) + 'h ' + Math.floor((s % 3600) / 60) + 'm';
}
async function flushConnections() {
if (!confirm('Close all cached TCP connections?\n\nDevices will reconnect on the next poll cycle.')) {
return;
}
try {
const res = await fetch('/api/nl43/_connections/flush', { method: 'POST' });
const data = await res.json();
if (!res.ok) {
showToast(data.detail || 'Failed to flush connections', 'error');
return;
}
showToast('All connections flushed', 'success');
await loadConnections();
} catch (err) {
showToast('Error flushing connections: ' + err.message, 'error');
}
}
</script> </script>
</body> </body>
</html> </html>