diff --git a/.gitignore b/.gitignore index 8458942..abd3d05 100644 --- a/.gitignore +++ b/.gitignore @@ -220,7 +220,6 @@ marimo/_static/ marimo/_lsp/ __marimo__/ -<<<<<<< HEAD # Seismo Fleet Manager # SQLite database files *.db @@ -228,6 +227,3 @@ __marimo__/ /data/ /data-dev/ .aider* -.aider* -======= ->>>>>>> 0c2186f5d89d948b0357d674c0773a67a67d8027 diff --git a/REPORT_PIPELINE_BRIEF.md b/REPORT_PIPELINE_BRIEF.md new file mode 100644 index 0000000..78674fc --- /dev/null +++ b/REPORT_PIPELINE_BRIEF.md @@ -0,0 +1,59 @@ +# FTP Report Pipeline — session brief + +**Branch:** `feat/ftp-report-pipeline` (off `dev`), worktree `/home/serversdown/terra-view-reports`. +**Scope:** Terra-View only. Do NOT touch SLMM — the SLMM alert/monitor work is live in a +parallel session on `slmm` branch `feat/drd-fix`. Pull device data through the **existing** +SLMM FTP proxy endpoints; add no SLMM code (for v1). + +See memory note `client_sound_monitoring_job_2026-07` for the client requirements + timeline. + +## Goal +Automated **daily morning report** for the John Myler 3-location sound job: each AM, last +night's noise levels vs the **baseline week**, per location. Data pulled from the meters via +FTP (the meter records 24/7 to SD regardless of TCP wedges). Alerts are a *separate* workstream +(SLMM, real-time DOD) — not in scope here. + +## The big realization (why this is small) +The hard parts already exist: +- **SLMM (use as-is, via the `/api/slmm/...` proxy):** + - `GET /api/slmm/{unit}/ftp/files?path=/NL-43` → list files/folders + - `POST /api/slmm/{unit}/ftp/download-folder` → returns the `Auto_####` folder as a **ZIP** +- **Terra-View ingest (reuse):** `backend/routers/project_locations.py:1743` `upload_nrl_data` + already accepts a **ZIP**, extracts, keeps `.rnh` + `_Leq_ .rnd` (drops `_Lp_`/junk via + `_is_wanted`), runs `_parse_rnh` (line 1687) → creates `MonitoringSession` + `DataFile`. +- **Report generator (reuse, source-agnostic):** `backend/routers/projects.py`. The `.rnd` + file reads funnel through 3 helpers — `_peek_rnd_headers` (~135), `_is_leq_file` (~147), + `_read_rnd_file_rows` (~256). `.rnd` files live on disk under `data/{file_path}` (DataFile + holds the path, not a BLOB). The stats/Excel/formatting logic doesn't care where bytes come from. + +## Build (Terra-View) +1. **Refactor** `upload_nrl_data`'s core into a callable `ingest_nrl_zip(location_id, zip_bytes, db)` + so it can be invoked programmatically (not only via HTTP UploadFile). +2. **Scheduled pull job** (reuse the existing scheduler): per project location/unit → + `GET /ftp/files` to find new `Auto_####` folders → `POST /ftp/download-folder` (zip) → + `ingest_nrl_zip(...)`. **Dedup** so repeated pulls don't duplicate sessions/files + (track ingested folder names per location). +3. **Baseline aggregation:** aggregate the baseline-week `_Leq_` intervals per location → + reference values (nighttime Leq, L90 floor, typical Lmax). +4. **Nightly report + email:** compute last night's metrics per location, compare to baseline + (deltas), render (reuse the Excel/report machinery), email each morning. + +## Data-location decision (light version, agreed) +Keep `MonitoringSession`/`DataFile` **metadata in TV** for now; reuse the existing on-disk file +store. Optional refinement (later): have SLMM keep the pulled files and TV read them through a +SLMM file-serve endpoint (avoids the copy-into-TV step). Don't do that refinement under the +deadline unless trivial — the report logic is identical either way. + +## Open questions to resolve early +1. **What's actually in a `_Leq_ .rnd`** — Leq only, or Leq + Lmax + Ln per 15-min interval? + Decides whether the night-vs-baseline report can show L90/Lmax or just Leq. Inspect a real file. +2. **Session rollover / dedup** — does a 2-week run write one growing `Auto_####` folder or new + folders? Drives the "what's new" logic. +3. **`download-folder` over a multi-day run** — confirm it zips cleanly (size/time). + +## Client params (confirm with Dave before locking) +Threshold/metric + their "night" window; report recipients + format (email body vs PDF/Excel). + +## Timeline +Setup ~7/1–7/2 (baseline week), shutdown week through ~7/17. Reports needed by ~7/8 (before +shutdown). Today is ~3 weeks out — reliability > features. diff --git a/backend/main.py b/backend/main.py index 799d943..2a73a0c 100644 --- a/backend/main.py +++ b/backend/main.py @@ -167,6 +167,10 @@ app.include_router(deployments.router) from backend.routers import calibration app.include_router(calibration.router) +# Nightly sound-report pipeline (manual triggers; scheduled tick reuses run_nightly_report) +from backend.routers import reports +app.include_router(reports.router) + # Start scheduler service and device status monitor on application startup from backend.services.scheduler import start_scheduler, stop_scheduler from backend.services.device_status_monitor import start_device_status_monitor, stop_device_status_monitor diff --git a/backend/models.py b/backend/models.py index 8c5d5e9..d7c1a5e 100644 --- a/backend/models.py +++ b/backend/models.py @@ -219,6 +219,35 @@ class ProjectModule(Base): __table_args__ = (UniqueConstraint("project_id", "module_type", name="uq_project_module"),) +class SoundReportConfig(Base): + """ + Per-project configuration for the automated nightly sound report + (FTP report pipeline). One row per project. Read by the morning tick in + SchedulerService and by the manual /reports endpoints (as defaults). + + New table → created by Base.metadata.create_all() on startup; no migration + needed (only a rebuild/restart). + """ + __tablename__ = "sound_report_configs" + + id = Column(String, primary_key=True, default=lambda: __import__('uuid').uuid4().__str__()) + project_id = Column(String, nullable=False, index=True, unique=True) # FK to projects.id + + enabled = Column(Boolean, default=False, nullable=False) # run the daily report? + report_time = Column(String, default="08:00", nullable=False) # local HH:MM to run/send + metric_keys = Column(String, default="lmax,l01,l10,l90", nullable=False) # csv of metric keys + # Baseline source: "captured" = compute from recorded nights in the date range below; + # "reference" = use fixed values typed per location (old-report averages or a spec limit). + baseline_mode = Column(String, default="captured", nullable=False) + baseline_start = Column(Date, nullable=True) # captured-mode range + baseline_end = Column(Date, nullable=True) + recipients = Column(Text, nullable=True) # csv; falls back to REPORT_SMTP_RECIPIENTS env + last_run_date = Column(Date, nullable=True) # evening-date of the last reported night (dedup) + + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + class MonitoringLocation(Base): """ Monitoring locations: generic location for monitoring activities. diff --git a/backend/routers/project_locations.py b/backend/routers/project_locations.py index 0b17003..b1c38f8 100644 --- a/backend/routers/project_locations.py +++ b/backend/routers/project_locations.py @@ -1712,6 +1712,19 @@ def _parse_rnh(content: bytes) -> dict: result["stop_time_str"] = value elif key == "Total Measurement Time": result["total_time_str"] = value + elif key == "Frequency Weighting (Main)": + result["frequency_weighting"] = value + elif key == "Time Weighting (Main)": + result["time_weighting"] = value + elif key == "Leq Calculation Interval": + result["leq_interval"] = value + elif key.startswith("Percentile "): + # e.g. "Percentile 4,90.0" → percentiles["4"] = "90.0". + # Lets the report label the LN slots (here LN4 = L90) from the + # device's own config instead of hardcoding which slot is which — + # the percentile assignment is reconfigurable per job. + slot = key[len("Percentile "):].strip() + result.setdefault("percentiles", {})[slot] = value except Exception: pass return result @@ -1740,6 +1753,270 @@ def _classify_file(filename: str) -> str: return "data" +def _is_wanted_nrl_file(fname: str) -> bool: + """Keep only the files an NRL ingest cares about: .rnh metadata + the + averaged Leq .rnd. Drops the 1-second _Lp_ files and everything else. + + - NL-43 writes two .rnd types: _Leq_ (15-min averages, wanted) and + _Lp_ (1-second granular, skipped). + - AU2 (NL-23/older Rion) writes a single Au2_####.rnd — always keep. + + Note this is purely about which *files* to store, not which *metrics* to + report: the kept Leq file carries every column (Leq, Lmax, L1/L10/L50/ + L90/L95, Lpeak, …), so the report layer can select any metric later. + """ + n = fname.lower() + if n.endswith(".rnh"): + return True + if n.endswith(".rnd"): + if "_leq_" in n: # NL-43 Leq file + return True + if n.startswith("au2_"): # AU2 format (NL-23) — Leq equivalent + return True + if "_lp" not in n and "_leq_" not in n: + # Unknown .rnd format — include it so we don't silently drop data + return True + return False + + +class IngestError(Exception): + """Raised when an NRL upload/ZIP has no usable data or an invalid target. + + Kept HTTP-agnostic so the ingest core can be driven programmatically (the + scheduled FTP pull) as well as from the HTTP upload endpoint. Callers + translate it: the endpoint → HTTP 400, the scheduler → logged failure. + """ + pass + + +def _find_existing_session( + db: Session, + location_id: str, + store_name: str, + started_at, + start_time_str: str, +): + """Return an already-ingested session for this location that represents the + same measurement, or None. + + Used to make FTP re-pulls idempotent: a daily cycle closes one Auto_#### + folder per day, so a session is uniquely identified within a location by + (store_name + measurement start time). Store names recycle across jobs, so + we always match on start time too. + """ + if not store_name and not started_at: + return None + candidates = db.query(MonitoringSession).filter( + MonitoringSession.location_id == location_id, + MonitoringSession.session_type == "sound", + ).all() + for s in candidates: + try: + meta = json.loads(s.session_metadata or "{}") + except (json.JSONDecodeError, TypeError): + meta = {} + if store_name and meta.get("store_name") != store_name: + continue + # Same store_name — confirm it's the same measurement by start time. + if start_time_str and meta.get("start_time_str") == start_time_str: + return s + if not meta.get("start_time_str") and started_at and s.started_at == started_at: + return s + return None + + +def _ingest_file_entries( + location: MonitoringLocation, + file_entries: list[tuple[str, bytes]], + db: Session, + *, + source: str = "manual_upload", + dedupe: bool = False, +) -> dict: + """Core NRL ingest, shared by the HTTP upload and the programmatic FTP pull. + + Takes already-normalized (filename, bytes) entries, keeps the wanted files, + parses the .rnh, and creates a MonitoringSession + DataFile rows under the + location's project. Metric-agnostic: the full Leq file is written to disk + and every column preserved; metric selection happens in the report layer. + + Raises IngestError if no usable files are present. + """ + # --- Filter to the files we keep (.rnh + Leq .rnd) --- + file_entries = [(f, b) for f, b in file_entries if _is_wanted_nrl_file(f)] + if not file_entries: + raise IngestError( + "No usable .rnd or .rnh files found. Expected NL-43 _Leq_ files or AU2 format .rnd files." + ) + + # --- Parse .rnh metadata (first one wins) --- + rnh_meta = {} + for fname, fbytes in file_entries: + if fname.lower().endswith(".rnh"): + rnh_meta = _parse_rnh(fbytes) + break + + # RNH stores local time (no UTC offset). Use local for period/label, then + # convert to UTC for storage so the local_datetime filter displays correctly. + started_at_local = _parse_rnh_datetime(rnh_meta.get("start_time_str")) or datetime.utcnow() + stopped_at_local = _parse_rnh_datetime(rnh_meta.get("stop_time_str")) + started_at = local_to_utc(started_at_local) + stopped_at = local_to_utc(stopped_at_local) if stopped_at_local else None + duration_seconds = ( + int((stopped_at - started_at).total_seconds()) + if (started_at and stopped_at) else None + ) + + store_name = rnh_meta.get("store_name", "") + serial_number = rnh_meta.get("serial_number", "") + index_number = rnh_meta.get("index_number", "") + start_time_str = rnh_meta.get("start_time_str", "") + + # --- Dedupe: skip if this exact measurement is already ingested --- + if dedupe: + existing = _find_existing_session(db, location.id, store_name, started_at, start_time_str) + if existing: + return { + "success": True, + "deduped": True, + "session_id": existing.id, + "files_imported": 0, + "leq_files": 0, + "lp_files": 0, + "metadata_files": 0, + "store_name": store_name, + "started_at": started_at.isoformat() if started_at else None, + "stopped_at": stopped_at.isoformat() if stopped_at else None, + } + + # --- Create MonitoringSession (local times drive period/label) --- + period_type = _derive_period_type(started_at_local) if started_at_local else None + session_label = ( + _build_session_label(started_at_local, location.name, period_type) + if started_at_local else None + ) + + session_id = str(uuid.uuid4()) + monitoring_session = MonitoringSession( + id=session_id, + project_id=location.project_id, + location_id=location.id, + unit_id=None, + session_type="sound", + started_at=started_at, + stopped_at=stopped_at, + duration_seconds=duration_seconds, + status="completed", + session_label=session_label, + period_type=period_type, + session_metadata=json.dumps({ + "source": source, + "store_name": store_name, + "serial_number": serial_number, + "index_number": index_number, + "start_time_str": start_time_str, + # Captured from the .rnh so the report can label metrics from the + # device's own config (which LN slot is L90, the weightings, etc.). + "percentiles": rnh_meta.get("percentiles", {}), + "frequency_weighting": rnh_meta.get("frequency_weighting", ""), + "time_weighting": rnh_meta.get("time_weighting", ""), + "leq_interval": rnh_meta.get("leq_interval", ""), + }), + ) + db.add(monitoring_session) + db.commit() + db.refresh(monitoring_session) + + # --- Write files to disk + create DataFile records --- + output_dir = Path("data/Projects") / location.project_id / session_id + output_dir.mkdir(parents=True, exist_ok=True) + + leq_count = lp_count = metadata_count = files_imported = 0 + for fname, fbytes in file_entries: + fname_lower = fname.lower() + if fname_lower.endswith(".rnd"): + if "_leq_" in fname_lower: + leq_count += 1 + elif "_lp" in fname_lower: + lp_count += 1 + elif fname_lower.endswith(".rnh"): + metadata_count += 1 + + dest = output_dir / fname + dest.write_bytes(fbytes) + checksum = hashlib.sha256(fbytes).hexdigest() + rel_path = str(dest.relative_to("data")) + + db.add(DataFile( + id=str(uuid.uuid4()), + session_id=session_id, + file_path=rel_path, + file_type=_classify_file(fname), + file_size_bytes=len(fbytes), + downloaded_at=datetime.utcnow(), + checksum=checksum, + file_metadata=json.dumps({ + "source": source, + "original_filename": fname, + "store_name": store_name, + }), + )) + files_imported += 1 + + db.commit() + + return { + "success": True, + "deduped": False, + "session_id": session_id, + "files_imported": files_imported, + "leq_files": leq_count, + "lp_files": lp_count, + "metadata_files": metadata_count, + "store_name": store_name, + "started_at": started_at.isoformat() if started_at else None, + "stopped_at": stopped_at.isoformat() if stopped_at else None, + } + + +def ingest_nrl_zip( + location_id: str, + zip_bytes: bytes, + db: Session, + *, + source: str = "ftp_pull", + dedupe: bool = True, +) -> dict: + """Programmatically ingest an Auto_#### ZIP (e.g. a scheduled FTP pull). + + Extracts the ZIP (flattening any nested Auto_Leq/Auto_Lp_ folders), keeps + the .rnh + Leq .rnd, parses the header, and creates a MonitoringSession + + DataFile rows for `location_id`. Defaults to dedupe=True so repeated daily + pulls of the same closed folder don't create duplicate sessions. + + Returns the same dict shape as the HTTP upload, plus a `deduped` flag. + Raises IngestError on a bad ZIP, no usable files, or unknown location. + """ + location = db.query(MonitoringLocation).filter_by(id=location_id).first() + if not location: + raise IngestError(f"Location {location_id} not found") + + try: + with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf: + file_entries: list[tuple[str, bytes]] = [] + for info in zf.infolist(): + if info.is_dir(): + continue + name = Path(info.filename).name # strip nested folder paths + if not name: + continue + file_entries.append((name, zf.read(info))) + except zipfile.BadZipFile: + raise IngestError("Downloaded data is not a valid ZIP archive.") + + return _ingest_file_entries(location, file_entries, db, source=source, dedupe=dedupe) + + @router.post("/nrl/{location_id}/upload-data") async def upload_nrl_data( project_id: str, @@ -1754,11 +2031,13 @@ async def upload_nrl_data( - A single .zip file (the Auto_#### folder zipped) — auto-extracted - Multiple .rnd / .rnh files selected directly from the SD card folder - Creates a MonitoringSession from .rnh metadata and DataFile records - for each measurement file. No unit assignment required. + Normalizes the upload to (filename, bytes) entries, then hands off to the + shared ingest core (`_ingest_file_entries`) — the same path the scheduled + FTP pull uses via `ingest_nrl_zip`. Creates a MonitoringSession from the + .rnh metadata and DataFile records for each measurement file. No unit + assignment required. dedupe=False here preserves the prior manual-upload + behaviour (re-uploading creates a fresh session). """ - from datetime import datetime - # Verify project and location exist project = db.query(Project).filter_by(id=project_id).first() _require_module(project, "sound_monitoring", db) @@ -1769,7 +2048,7 @@ async def upload_nrl_data( if not location: raise HTTPException(status_code=404, detail="Location not found") - # --- Step 1: Normalize to (filename, bytes) list --- + # --- Normalize upload to (filename, bytes) entries --- file_entries: list[tuple[str, bytes]] = [] if len(files) == 1 and files[0].filename.lower().endswith(".zip"): @@ -1793,145 +2072,11 @@ async def upload_nrl_data( if not file_entries: raise HTTPException(status_code=400, detail="No usable files found in upload.") - # --- Step 1b: Filter to only relevant files --- - # Keep: .rnh (metadata) and measurement .rnd files - # NL-43 generates two .rnd types: _Leq_ (15-min averages, wanted) and _Lp_ (1-sec granular, skip) - # AU2 (NL-23/older Rion) generates a single Au2_####.rnd per session — always keep those - # Drop: _Lp_ .rnd, .xlsx, .mp3, and anything else - def _is_wanted(fname: str) -> bool: - n = fname.lower() - if n.endswith(".rnh"): - return True - if n.endswith(".rnd"): - if "_leq_" in n: # NL-43 Leq file - return True - if n.startswith("au2_"): # AU2 format (NL-23) — always Leq equivalent - return True - if "_lp" not in n and "_leq_" not in n: - # Unknown .rnd format — include it so we don't silently drop data - return True - return False - - file_entries = [(fname, fbytes) for fname, fbytes in file_entries if _is_wanted(fname)] - - if not file_entries: - raise HTTPException(status_code=400, detail="No usable .rnd or .rnh files found. Expected NL-43 _Leq_ files or AU2 format .rnd files.") - - # --- Step 2: Find and parse .rnh metadata --- - rnh_meta = {} - for fname, fbytes in file_entries: - if fname.lower().endswith(".rnh"): - rnh_meta = _parse_rnh(fbytes) - break - - # RNH files store local time (no UTC offset). Use local values for period - # classification / label generation, then convert to UTC for DB storage so - # the local_datetime Jinja filter displays the correct time. - started_at_local = _parse_rnh_datetime(rnh_meta.get("start_time_str")) or datetime.utcnow() - stopped_at_local = _parse_rnh_datetime(rnh_meta.get("stop_time_str")) - - started_at = local_to_utc(started_at_local) - stopped_at = local_to_utc(stopped_at_local) if stopped_at_local else None - - duration_seconds = None - if started_at and stopped_at: - duration_seconds = int((stopped_at - started_at).total_seconds()) - - store_name = rnh_meta.get("store_name", "") - serial_number = rnh_meta.get("serial_number", "") - index_number = rnh_meta.get("index_number", "") - - # --- Step 3: Create MonitoringSession --- - # Use local times for period/label so classification reflects the clock at the site. - period_type = _derive_period_type(started_at_local) if started_at_local else None - session_label = _build_session_label(started_at_local, location.name, period_type) if started_at_local else None - - session_id = str(uuid.uuid4()) - monitoring_session = MonitoringSession( - id=session_id, - project_id=project_id, - location_id=location_id, - unit_id=None, - session_type="sound", - started_at=started_at, - stopped_at=stopped_at, - duration_seconds=duration_seconds, - status="completed", - session_label=session_label, - period_type=period_type, - session_metadata=json.dumps({ - "source": "manual_upload", - "store_name": store_name, - "serial_number": serial_number, - "index_number": index_number, - }), - ) - db.add(monitoring_session) - db.commit() - db.refresh(monitoring_session) - - # --- Step 4: Write files to disk and create DataFile records --- - output_dir = Path("data/Projects") / project_id / session_id - output_dir.mkdir(parents=True, exist_ok=True) - - leq_count = 0 - lp_count = 0 - metadata_count = 0 - files_imported = 0 - - for fname, fbytes in file_entries: - file_type = _classify_file(fname) - fname_lower = fname.lower() - - # Track counts for summary - if fname_lower.endswith(".rnd"): - if "_leq_" in fname_lower: - leq_count += 1 - elif "_lp" in fname_lower: - lp_count += 1 - elif fname_lower.endswith(".rnh"): - metadata_count += 1 - - # Write to disk - dest = output_dir / fname - dest.write_bytes(fbytes) - - # Compute checksum - checksum = hashlib.sha256(fbytes).hexdigest() - - # Store relative path from data/ dir - rel_path = str(dest.relative_to("data")) - - data_file = DataFile( - id=str(uuid.uuid4()), - session_id=session_id, - file_path=rel_path, - file_type=file_type, - file_size_bytes=len(fbytes), - downloaded_at=datetime.utcnow(), - checksum=checksum, - file_metadata=json.dumps({ - "source": "manual_upload", - "original_filename": fname, - "store_name": store_name, - }), - ) - db.add(data_file) - files_imported += 1 - - db.commit() - - return { - "success": True, - "session_id": session_id, - "files_imported": files_imported, - "leq_files": leq_count, - "lp_files": lp_count, - "metadata_files": metadata_count, - "store_name": store_name, - "started_at": started_at.isoformat() if started_at else None, - "stopped_at": stopped_at.isoformat() if stopped_at else None, - } + # --- Hand off to the shared ingest core --- + try: + return _ingest_file_entries(location, file_entries, db, source="manual_upload", dedupe=False) + except IngestError as e: + raise HTTPException(status_code=400, detail=str(e)) # ============================================================================ diff --git a/backend/routers/reports.py b/backend/routers/reports.py new file mode 100644 index 0000000..1f2ca49 --- /dev/null +++ b/backend/routers/reports.py @@ -0,0 +1,434 @@ +""" +Nightly Report Router. + +Manual triggers for the night-vs-baseline sound report — the same entry point +the scheduled morning tick will reuse. Two endpoints: + + GET …/reports/nightly/view → render and return the HTML inline (preview). + No write, no email. Browser-friendly. + POST …/reports/nightly/run → full run: build → write report.html/json to + disk → (dry-run) email. Returns JSON result. + +Dates are the *evening* date of the night being reported (the 7/7 in "night of +7/7 → morning 7/8"). Defaults to last night. Baseline is optional; pass the +baseline-week range to populate the comparison. +""" + +from __future__ import annotations + +import json +import logging +import re +import uuid +from datetime import datetime, timedelta, date +from html import escape +from pathlib import Path +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Query, Request +from fastapi.responses import HTMLResponse +from sqlalchemy.orm import Session + +from backend.database import get_db +from backend.models import Project, SoundReportConfig, MonitoringLocation +from backend.services.report_pipeline import ( + METRIC_REGISTRY, DEFAULT_METRICS, DEFAULT_WINDOWS, _location_reference_baseline, +) +from backend.services.report_orchestrator import run_nightly_report +from backend.utils.timezone import utc_to_local + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/projects/{project_id}/reports", tags=["reports"]) + + +def _default_night_date() -> date: + """Last night = yesterday in the user's local timezone.""" + return (utc_to_local(datetime.utcnow()) - timedelta(days=1)).date() + + +def _parse_date(s: Optional[str], field: str) -> Optional[date]: + if not s: + return None + try: + return datetime.strptime(s, "%Y-%m-%d").date() + except ValueError: + raise HTTPException(status_code=400, detail=f"{field} must be YYYY-MM-DD (got {s!r})") + + +def _parse_metrics(s: Optional[str]) -> list[str]: + if not s: + return list(DEFAULT_METRICS) + keys = [k.strip().lower() for k in s.split(",") if k.strip()] + unknown = [k for k in keys if k not in METRIC_REGISTRY] + if unknown: + raise HTTPException( + status_code=400, + detail=f"Unknown metric(s): {unknown}. Known: {sorted(METRIC_REGISTRY)}", + ) + return keys or list(DEFAULT_METRICS) + + +def _validate_hhmm(s) -> str: + """Validate a local HH:MM (24h) time string.""" + try: + hh, mm = str(s).split(":") + h, m = int(hh), int(mm) + if 0 <= h < 24 and 0 <= m < 60: + return f"{h:02d}:{m:02d}" + except (ValueError, AttributeError): + pass + raise HTTPException(status_code=400, detail=f"report_time must be HH:MM 24-hour (got {s!r})") + + +def _config_dict(cfg: Optional[SoundReportConfig], project_id: str) -> dict: + """Serialise a config row (or defaults if none yet) to JSON.""" + return { + "project_id": project_id, + "exists": cfg is not None, + "enabled": cfg.enabled if cfg else False, + "report_time": cfg.report_time if cfg else "08:00", + "metric_keys": cfg.metric_keys if cfg else ",".join(DEFAULT_METRICS), + "baseline_mode": cfg.baseline_mode if cfg else "captured", + "baseline_start": cfg.baseline_start.isoformat() if cfg and cfg.baseline_start else None, + "baseline_end": cfg.baseline_end.isoformat() if cfg and cfg.baseline_end else None, + "recipients": (cfg.recipients if cfg and cfg.recipients else ""), + "last_run_date": cfg.last_run_date.isoformat() if cfg and cfg.last_run_date else None, + } + + +@router.get("/config") +async def get_report_config(project_id: str, db: Session = Depends(get_db)): + """Return the project's nightly-report config (or defaults if not set yet).""" + if not db.query(Project).filter_by(id=project_id).first(): + raise HTTPException(status_code=404, detail="Project not found") + cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first() + return _config_dict(cfg, project_id) + + +@router.put("/config") +async def put_report_config(project_id: str, request: Request, db: Session = Depends(get_db)): + """Create or update the project's nightly-report config (JSON body).""" + if not db.query(Project).filter_by(id=project_id).first(): + raise HTTPException(status_code=404, detail="Project not found") + data = await request.json() + + cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first() + created = cfg is None + if cfg is None: + cfg = SoundReportConfig(id=str(uuid.uuid4()), project_id=project_id) + db.add(cfg) + + if "enabled" in data: + cfg.enabled = bool(data["enabled"]) + if "report_time" in data: + cfg.report_time = _validate_hhmm(data["report_time"]) + if "metric_keys" in data: + mk = data["metric_keys"] + mk = mk if isinstance(mk, str) else ",".join(mk or []) + cfg.metric_keys = ",".join(_parse_metrics(mk)) + if "baseline_mode" in data: + bm = str(data["baseline_mode"]).lower() + if bm not in ("captured", "reference"): + raise HTTPException(status_code=400, detail="baseline_mode must be 'captured' or 'reference'") + cfg.baseline_mode = bm + if "baseline_start" in data or "baseline_end" in data: + bs = _parse_date(data.get("baseline_start") or None, "baseline_start") + be = _parse_date(data.get("baseline_end") or None, "baseline_end") + if (bs and not be) or (be and not bs): + raise HTTPException(status_code=400, detail="Provide both baseline dates, or neither.") + if bs and be and bs > be: + raise HTTPException(status_code=400, detail="baseline_start must be on or before baseline_end.") + cfg.baseline_start, cfg.baseline_end = bs, be + if "recipients" in data: + recips = data["recipients"] + if isinstance(recips, list): + recips = ",".join(recips) + cfg.recipients = (recips or "").strip() or None + + db.commit() + db.refresh(cfg) + return {**_config_dict(cfg, project_id), "created": created} + + +def _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics): + """Validate inputs and resolve the baseline source. + + Explicit baseline dates in the query override (captured mode with those + dates). Otherwise the project's saved config supplies the baseline (its + mode + dates) and the default metric set — so the manual view/run match + what the scheduled report does. + Returns (night_date, baseline_mode, baseline_start, baseline_end, metric_keys). + """ + if not db.query(Project).filter_by(id=project_id).first(): + raise HTTPException(status_code=404, detail="Project not found") + nd = _parse_date(night_date, "night_date") or _default_night_date() + bs = _parse_date(baseline_start, "baseline_start") + be = _parse_date(baseline_end, "baseline_end") + if (bs and not be) or (be and not bs): + raise HTTPException(status_code=400, detail="Provide both baseline_start and baseline_end, or neither.") + if bs and be and bs > be: + raise HTTPException(status_code=400, detail="baseline_start must be on or before baseline_end.") + + cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first() + if bs and be: + baseline_mode = "captured" # explicit dates win + elif cfg: + baseline_mode = cfg.baseline_mode # fall back to saved config + bs, be = cfg.baseline_start, cfg.baseline_end + else: + baseline_mode = "captured" + + if metrics: + metric_keys = _parse_metrics(metrics) + elif cfg and cfg.metric_keys: + metric_keys = _parse_metrics(cfg.metric_keys) + else: + metric_keys = list(DEFAULT_METRICS) + + return nd, baseline_mode, bs, be, metric_keys + + +@router.get("/nightly/view", response_class=HTMLResponse) +async def view_nightly_report( + project_id: str, + night_date: Optional[str] = Query(None, description="Evening date of the night (YYYY-MM-DD). Default: last night."), + baseline_start: Optional[str] = Query(None, description="Baseline range start (YYYY-MM-DD)."), + baseline_end: Optional[str] = Query(None, description="Baseline range end (YYYY-MM-DD)."), + metrics: Optional[str] = Query(None, description="Comma list, e.g. lmax,l01,l10,l90. Default: house set."), + db: Session = Depends(get_db), +): + """Render the night report and return the HTML inline (preview — no write, no email).""" + nd, bmode, bs, be, metric_keys = _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics) + try: + result = run_nightly_report( + db, project_id, nd, + metric_keys=metric_keys, baseline_mode=bmode, baseline_start=bs, baseline_end=be, + send=False, # preview: no email + ) + except HTTPException: + raise + except Exception as e: # noqa: BLE001 + logger.error("nightly/view failed for %s (%s): %s", project_id, nd, e, exc_info=True) + raise HTTPException(status_code=500, detail=f"Report generation failed: {e}") + return HTMLResponse(result["html"]) + + +@router.post("/nightly/run") +async def run_nightly_report_endpoint( + project_id: str, + night_date: Optional[str] = Query(None, description="Evening date of the night (YYYY-MM-DD). Default: last night."), + baseline_start: Optional[str] = Query(None, description="Baseline range start (YYYY-MM-DD)."), + baseline_end: Optional[str] = Query(None, description="Baseline range end (YYYY-MM-DD)."), + metrics: Optional[str] = Query(None, description="Comma list, e.g. lmax,l01,l10,l90. Default: house set."), + send: bool = Query(True, description="Attempt email (dry-run until SMTP is configured)."), + db: Session = Depends(get_db), +): + """Run the night report: build → write report.html/report.json to disk → email (best-effort). + + This is the same path the scheduled morning tick will call. The `html` field + is omitted from the JSON response (it's large and on disk); use /view to see it. + """ + nd, bmode, bs, be, metric_keys = _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics) + try: + result = run_nightly_report( + db, project_id, nd, + metric_keys=metric_keys, baseline_mode=bmode, baseline_start=bs, baseline_end=be, + send=send, + ) + except HTTPException: + raise + except Exception as e: # noqa: BLE001 + logger.error("nightly/run failed for %s (%s): %s", project_id, nd, e, exc_info=True) + raise HTTPException(status_code=500, detail=f"Report generation failed: {e}") + result.pop("html", None) # keep the JSON response lean — view it via /view or the file + result["view_url"] = ( + f"/api/projects/{project_id}/reports/nightly/view" + f"?night_date={nd:%Y-%m-%d}" + + (f"&baseline_start={bs:%Y-%m-%d}&baseline_end={be:%Y-%m-%d}" if bs and be else "") + + (f"&metrics={','.join(metric_keys)}") + ) + return result + + +# ============================================================================ +# Test email + generated-report archive +# ============================================================================ + +_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$") + + +@router.post("/test-email") +async def send_test_email(project_id: str, request: Request, db: Session = Depends(get_db)): + """Send a small test email to verify the SMTP relay (dry-run if unconfigured). + + Recipients: JSON body {"recipients": "..."} overrides; else the project's + configured recipients; else the REPORT_SMTP_RECIPIENTS env default. + """ + project = db.query(Project).filter_by(id=project_id).first() + if not project: + raise HTTPException(status_code=404, detail="Project not found") + try: + data = await request.json() + except Exception: + data = {} + + raw = (data or {}).get("recipients") + if not raw: + cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first() + raw = cfg.recipients if cfg else None + recipients = None + if raw: + if isinstance(raw, list): + raw = ",".join(raw) + recipients = [r.strip() for r in raw.split(",") if r.strip()] + + from backend.services.report_email import send_report_email + body = ( + "