feat(reports): reference-baseline mode (typed limits / prior averages)

Baseline can now come from fixed values typed per location, not just captured data — for a spec limit ("L10 = 85") or a prior report's averages when the raw data isn't available. - SoundReportConfig.baseline_mode ("captured" | "reference"). - report_pipeline: _location_reference_baseline() reads per-location values from location_metadata; build_*_night_report honor baseline_mode (reference cells use the typed value; unset metrics compare against nothing). - reports router: GET/PUT /reports/baseline (mode on config + per-location values in location_metadata); config carries baseline_mode; manual view/run fall back to the saved config's baseline when no explicit dates are given. - orchestrator + scheduler tick thread baseline_mode through. Verified end-to-end: PUT/GET /baseline, reference deltas (L10 66.6 vs 85 -> -18.4), unset metrics compare against nothing, captured-mode regression intact. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-11 20:26:23 +00:00
parent 7fb4ba0343
commit c1b5efae56
5 changed files with 184 additions and 16 deletions
@@ -235,7 +235,10 @@ class SoundReportConfig(Base):
    enabled = Column(Boolean, default=False, nullable=False)        # run the daily report?
    report_time = Column(String, default="08:00", nullable=False)   # local HH:MM to run/send
    metric_keys = Column(String, default="lmax,l01,l10,l90", nullable=False)  # csv of metric keys
-    baseline_start = Column(Date, nullable=True)                    # baseline-week range
+    # Baseline source: "captured" = compute from recorded nights in the date range below;
    # "reference" = use fixed values typed per location (old-report averages or a spec limit).
    baseline_mode = Column(String, default="captured", nullable=False)
    baseline_start = Column(Date, nullable=True)                    # captured-mode range
    baseline_end = Column(Date, nullable=True)
    recipients = Column(Text, nullable=True)                        # csv; falls back to REPORT_SMTP_RECIPIENTS env
    last_run_date = Column(Date, nullable=True)                     # evening-date of the last reported night (dedup)
@@ -16,6 +16,7 @@ baseline-week range to populate the comparison.
 from __future__ import annotations
 import json
 import logging
 import re
 import uuid
@@ -29,8 +30,10 @@ from fastapi.responses import HTMLResponse
 from sqlalchemy.orm import Session
 from backend.database import get_db
-from backend.models import Project, SoundReportConfig
+from backend.models import Project, SoundReportConfig, MonitoringLocation
-from backend.services.report_pipeline import METRIC_REGISTRY, DEFAULT_METRICS
+from backend.services.report_pipeline import (
    METRIC_REGISTRY, DEFAULT_METRICS, DEFAULT_WINDOWS, _location_reference_baseline,
 )
 from backend.services.report_orchestrator import run_nightly_report
 from backend.utils.timezone import utc_to_local
@@ -86,6 +89,7 @@ def _config_dict(cfg: Optional[SoundReportConfig], project_id: str) -> dict:
        "enabled": cfg.enabled if cfg else False,
        "report_time": cfg.report_time if cfg else "08:00",
        "metric_keys": cfg.metric_keys if cfg else ",".join(DEFAULT_METRICS),
        "baseline_mode": cfg.baseline_mode if cfg else "captured",
        "baseline_start": cfg.baseline_start.isoformat() if cfg and cfg.baseline_start else None,
        "baseline_end": cfg.baseline_end.isoformat() if cfg and cfg.baseline_end else None,
        "recipients": (cfg.recipients if cfg and cfg.recipients else ""),
@@ -123,6 +127,11 @@ async def put_report_config(project_id: str, request: Request, db: Session = Dep
        mk = data["metric_keys"]
        mk = mk if isinstance(mk, str) else ",".join(mk or [])
        cfg.metric_keys = ",".join(_parse_metrics(mk))
    if "baseline_mode" in data:
        bm = str(data["baseline_mode"]).lower()
        if bm not in ("captured", "reference"):
            raise HTTPException(status_code=400, detail="baseline_mode must be 'captured' or 'reference'")
        cfg.baseline_mode = bm
    if "baseline_start" in data or "baseline_end" in data:
        bs = _parse_date(data.get("baseline_start") or None, "baseline_start")
        be = _parse_date(data.get("baseline_end") or None, "baseline_end")
@@ -143,7 +152,14 @@ async def put_report_config(project_id: str, request: Request, db: Session = Dep
 def _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics):
-    """Shared validation/parsing for both endpoints."""
+    """Validate inputs and resolve the baseline source.
    Explicit baseline dates in the query override (captured mode with those
    dates). Otherwise the project's saved config supplies the baseline (its
    mode + dates) and the default metric set — so the manual view/run match
    what the scheduled report does.
    Returns (night_date, baseline_mode, baseline_start, baseline_end, metric_keys).
    """
    if not db.query(Project).filter_by(id=project_id).first():
        raise HTTPException(status_code=404, detail="Project not found")
    nd = _parse_date(night_date, "night_date") or _default_night_date()
@@ -153,7 +169,24 @@ def _resolve_params(project_id, db, night_date, baseline_start, baseline_end, me
        raise HTTPException(status_code=400, detail="Provide both baseline_start and baseline_end, or neither.")
    if bs and be and bs > be:
        raise HTTPException(status_code=400, detail="baseline_start must be on or before baseline_end.")
-    return nd, bs, be, _parse_metrics(metrics)
+
    cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first()
    if bs and be:
        baseline_mode = "captured"                       # explicit dates win
    elif cfg:
        baseline_mode = cfg.baseline_mode                # fall back to saved config
        bs, be = cfg.baseline_start, cfg.baseline_end
    else:
        baseline_mode = "captured"
    if metrics:
        metric_keys = _parse_metrics(metrics)
    elif cfg and cfg.metric_keys:
        metric_keys = _parse_metrics(cfg.metric_keys)
    else:
        metric_keys = list(DEFAULT_METRICS)
    return nd, baseline_mode, bs, be, metric_keys
@router.get("/nightly/view", response_class=HTMLResponse)
@@ -166,10 +199,10 @@ async def view_nightly_report(
    db: Session = Depends(get_db),
 ):
    """Render the night report and return the HTML inline (preview — no write, no email)."""
-    nd, bs, be, metric_keys = _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics)
+    nd, bmode, bs, be, metric_keys = _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics)
    result = run_nightly_report(
        db, project_id, nd,
-        metric_keys=metric_keys, baseline_start=bs, baseline_end=be,
+        metric_keys=metric_keys, baseline_mode=bmode, baseline_start=bs, baseline_end=be,
        send=False,           # preview: no email
    )
    return HTMLResponse(result["html"])
@@ -190,10 +223,10 @@ async def run_nightly_report_endpoint(
    This is the same path the scheduled morning tick will call.  The `html` field
    is omitted from the JSON response (it's large and on disk); use /view to see it.
    """
-    nd, bs, be, metric_keys = _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics)
+    nd, bmode, bs, be, metric_keys = _resolve_params(project_id, db, night_date, baseline_start, baseline_end, metrics)
    result = run_nightly_report(
        db, project_id, nd,
-        metric_keys=metric_keys, baseline_start=bs, baseline_end=be,
+        metric_keys=metric_keys, baseline_mode=bmode, baseline_start=bs, baseline_end=be,
        send=send,
    )
    result.pop("html", None)  # keep the JSON response lean — view it via /view or the file
@@ -280,3 +313,89 @@ async def view_archived_report(project_id: str, night_date: str, db: Session = D
    if not path.exists():
        raise HTTPException(status_code=404, detail="No saved report for that date")
    return HTMLResponse(path.read_text(encoding="utf-8"))
 # ============================================================================
 # Reference baseline (fixed values typed per location — limits / prior averages)
 # ============================================================================
@router.get("/baseline")
 async def get_baseline(project_id: str, db: Session = Depends(get_db)):
    """Return the baseline mode + per-location reference values + the metric/window
    grid to render the editor."""
    if not db.query(Project).filter_by(id=project_id).first():
        raise HTTPException(status_code=404, detail="Project not found")
    cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first()
    mode = cfg.baseline_mode if cfg else "captured"
    metric_keys = _parse_metrics(cfg.metric_keys) if cfg and cfg.metric_keys else list(DEFAULT_METRICS)
    locations = db.query(MonitoringLocation).filter_by(
        project_id=project_id, location_type="sound",
    ).order_by(MonitoringLocation.sort_order, MonitoringLocation.name).all()
    locations = [l for l in locations if getattr(l, "removed_at", None) is None]
    return {
        "mode": mode,
        "windows": [{"key": w.key, "label": w.label} for w in DEFAULT_WINDOWS],
        "metrics": [{"key": k, "label": METRIC_REGISTRY[k].label} for k in metric_keys],
        "locations": [
            {"id": loc.id, "name": loc.name, "values": _location_reference_baseline(loc)}
            for loc in locations
        ],
    }
@router.put("/baseline")
 async def put_baseline(project_id: str, request: Request, db: Session = Depends(get_db)):
    """Save the baseline mode (on config) and per-location reference values
    (on each location's metadata).  Body:
      {"mode": "reference",
       "locations": {"<loc_id>": {"nighttime": {"l10": 85}, "evening": {...}}}}
    """
    if not db.query(Project).filter_by(id=project_id).first():
        raise HTTPException(status_code=404, detail="Project not found")
    data = await request.json()
    if "mode" in data:
        bm = str(data["mode"]).lower()
        if bm not in ("captured", "reference"):
            raise HTTPException(status_code=400, detail="mode must be 'captured' or 'reference'")
        cfg = db.query(SoundReportConfig).filter_by(project_id=project_id).first()
        if cfg is None:
            cfg = SoundReportConfig(id=str(uuid.uuid4()), project_id=project_id)
            db.add(cfg)
        cfg.baseline_mode = bm
    loc_values = data.get("locations") or {}
    updated = 0
    for loc_id, windows in loc_values.items():
        loc = db.query(MonitoringLocation).filter_by(id=loc_id, project_id=project_id).first()
        if not loc or not isinstance(windows, dict):
            continue
        try:
            meta = json.loads(loc.location_metadata or "{}")
        except (json.JSONDecodeError, TypeError):
            meta = {}
        clean: dict = {}
        for wkey, mvals in windows.items():
            if not isinstance(mvals, dict):
                continue
            cm = {}
            for mkey, val in mvals.items():
                if val in (None, ""):
                    continue
                try:
                    cm[mkey] = round(float(val), 1)
                except (ValueError, TypeError):
                    continue
            if cm:
                clean[wkey] = cm
        if clean:
            meta["report_baseline"] = clean
        else:
            meta.pop("report_baseline", None)
        loc.location_metadata = json.dumps(meta)
        updated += 1
    db.commit()
    return {"ok": True, "locations_updated": updated}
@@ -72,6 +72,7 @@ def run_nightly_report(
    *,
    metric_keys: Optional[list[str]] = None,
    windows: Optional[list[Window]] = None,
    baseline_mode: str = "captured",
    baseline_start: Optional[date] = None,
    baseline_end: Optional[date] = None,
    recipients: Optional[list[str]] = None,
@@ -86,6 +87,7 @@ def run_nightly_report(
    report = build_project_night_report(
        db, project_id, night_date,
        metric_keys=metric_keys, windows=windows,
        baseline_mode=baseline_mode,
        baseline_start=baseline_start, baseline_end=baseline_end,
    )
@@ -266,6 +266,36 @@ class LocationNightReport:
    notes: list[str] = field(default_factory=list)
 def _location_reference_baseline(loc) -> dict:
    """A location's manually-entered reference baseline, from its metadata.
    Shape: {window_key: {metric_key: float}} e.g. {"nighttime": {"l10": 85.0}}.
    Used when baseline_mode == "reference" — fixed targets/limits or prior-report
    averages typed in, rather than computed from captured nights.
    """
    if not loc:
        return {}
    try:
        meta = json.loads(loc.location_metadata or "{}")
    except (json.JSONDecodeError, TypeError):
        return {}
    ref = meta.get("report_baseline") or {}
    out: dict[str, dict[str, float]] = {}
    if isinstance(ref, dict):
        for wkey, mvals in ref.items():
            if not isinstance(mvals, dict):
                continue
            clean = {}
            for mkey, val in mvals.items():
                try:
                    clean[mkey] = float(val)
                except (ValueError, TypeError):
                    continue
            if clean:
                out[wkey] = clean
    return out
 def build_location_night_report(
    db: Session,
    location_id: str,
@@ -273,15 +303,18 @@ def build_location_night_report(
    *,
    metric_keys: Optional[list[str]] = None,
    windows: Optional[list[Window]] = None,
    baseline_mode: str = "captured",
    baseline_start: Optional[date] = None,
    baseline_end: Optional[date] = None,
 ) -> LocationNightReport:
    """Build the night-vs-baseline data model for one location.
    `night_date` is the *evening* date of the night being reported (e.g. the
-    7/7 in "night of 7/7 → morning 7/8").  Baseline is the typical-night value
+    7/7 in "night of 7/7 → morning 7/8").  Baseline comes from one of:
-    across the eligible nights in [baseline_start, baseline_end]; pass neither
+      - "captured": the typical-night value across eligible nights in
-    to skip the comparison (baseline cells become None).
+        [baseline_start, baseline_end] (computed from recorded data);
      - "reference": fixed values typed per location (a spec limit like
        "L10 = 85", or a prior report's averages).
    """
    metric_keys = metric_keys or DEFAULT_METRICS
    metrics = [METRIC_REGISTRY[k] for k in metric_keys]
@@ -293,8 +326,10 @@ def build_location_night_report(
    all_rows = _location_leq_rows(db, location_id)
    night_rows = _rows_in_night(all_rows, night_date)
    reference = _location_reference_baseline(loc) if baseline_mode == "reference" else {}
    baseline_nights: list[date] = []
-    if baseline_start and baseline_end:
+    if baseline_mode != "reference" and baseline_start and baseline_end:
        baseline_nights = _eligible_nights(all_rows, baseline_start, baseline_end)
        # Don't let the reported night double as its own baseline.
        baseline_nights = [n for n in baseline_nights if n != night_date]
@@ -304,13 +339,16 @@ def build_location_night_report(
        table[w.key] = {}
        for m in metrics:
            last_night_val = _window_value(night_rows, m, w)
-            baseline_val = None
+            if baseline_mode == "reference":
-            if baseline_nights:
+                baseline_val = reference.get(w.key, {}).get(m.key)
            elif baseline_nights:
                per_night = [
                    _window_value(_rows_in_night(all_rows, nd), m, w)
                    for nd in baseline_nights
                ]
                baseline_val = _combine_across_nights(per_night, m.agg)
            else:
                baseline_val = None
            table[w.key][m.key] = CellPair(last_night_val, baseline_val)
    interval_series = []
@@ -325,7 +363,10 @@ def build_location_night_report(
    notes: list[str] = []
    if not night_rows:
        notes.append(f"No data found for the night of {night_date:%m/%d/%y}.")
-    if (baseline_start or baseline_end) and not baseline_nights:
+    if baseline_mode == "reference":
        if not any(reference.values()):
            notes.append("Reference-baseline mode is on but no reference values are set for this location.")
    elif (baseline_start or baseline_end) and not baseline_nights:
        notes.append("No baseline nights with data in the configured range.")
    return LocationNightReport(
@@ -358,6 +399,7 @@ def build_project_night_report(
    *,
    metric_keys: Optional[list[str]] = None,
    windows: Optional[list[Window]] = None,
    baseline_mode: str = "captured",
    baseline_start: Optional[date] = None,
    baseline_end: Optional[date] = None,
 ) -> ProjectNightReport:
@@ -375,6 +417,7 @@ def build_project_night_report(
        build_location_night_report(
            db, loc.id, night_date,
            metric_keys=metric_keys, windows=windows,
            baseline_mode=baseline_mode,
            baseline_start=baseline_start, baseline_end=baseline_end,
        )
        for loc in locations
@@ -828,6 +828,7 @@ class SchedulerService:
                    result = run_nightly_report(
                        db, cfg.project_id, night_date,
                        metric_keys=metric_keys,
                        baseline_mode=cfg.baseline_mode,
                        baseline_start=cfg.baseline_start,
                        baseline_end=cfg.baseline_end,
                        recipients=recipients,