feat(reports): FTP night-report pipeline foundation
Terra-View side of the daily night-vs-baseline sound report for the John Myler 24/7 job. Engine is built and verified end-to-end against real meter data; SMTP send + scheduler/capture wiring still pending. - ingest: refactor upload_nrl_data into a callable ingest_nrl_zip(location_id, zip_bytes, db) sharing one core with the HTTP endpoint. Capture the .rnh percentile map + weightings into session metadata; dedup on store-name + start time. Ingest stays metric-agnostic (every Leq column preserved). - report_pipeline.py: metric registry, Evening/Nighttime windows, correct aggregation (Lmax=max, Ln=arithmetic, Leq=logarithmic), baseline = typical night, per-location + per-project builders. - report_renderers.py: HTML email-body renderer (Last/Base/delta layout). - report_email.py: config-driven SMTP via stdlib (env vars) with a dry-run fallback so the pipeline runs without credentials. - report_orchestrator.py: compute -> render -> always write report.html + report.json to disk -> best-effort email. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,389 @@
|
||||
"""
|
||||
Nightly Report Pipeline — computation core.
|
||||
|
||||
Builds the data model for the John-Myler-style "last night vs. baseline" sound
|
||||
report. Source-agnostic: it reads the same on-disk Leq `.rnd` files the manual
|
||||
upload + FTP-pull ingest produce (see `project_locations.ingest_nrl_zip`).
|
||||
|
||||
Design notes
|
||||
------------
|
||||
* **Ingest everything, report selectively.** Ingest preserves every column of
|
||||
the Leq file; this layer chooses which *metrics* to surface via `metric_keys`
|
||||
(a future report wizard is just a UI over that list).
|
||||
* **House format match.** Defaults reproduce the existing Excel report:
|
||||
LAmax (max of interval maxima), LA01 / LA10 (arithmetic average), split into
|
||||
Evening (7–10PM) and Nighttime (10PM–7AM) windows. L90 (background) is added
|
||||
for the baseline comparison.
|
||||
* **Metric labelling from the device.** The LN→percentile assignment is
|
||||
reconfigurable per job; we resolve which `LNx(Main)` column is L90/L10/etc.
|
||||
from the percentile map captured in the session metadata at ingest, falling
|
||||
back to the NL-43 default order.
|
||||
* **Correct averaging.** Leq is energy-averaged (logarithmic); percentiles and
|
||||
Lmax are arithmetic. Baseline references combine the per-night values into a
|
||||
"typical night" (arithmetic mean of per-night values — so baseline Lmax is the
|
||||
typical nightly peak, not the worst-of-week).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, date
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend.models import MonitoringSession, DataFile, MonitoringLocation, Project
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Metric registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Metric:
|
||||
"""A reportable metric.
|
||||
|
||||
`agg` is the *within-night* aggregation used to collapse a window's 15-min
|
||||
intervals into one value:
|
||||
- "max" → loudest interval (LAmax)
|
||||
- "arith" → arithmetic mean (percentiles: L01/L10/L90…)
|
||||
- "log" → energy/logarithmic mean (Leq only)
|
||||
`column` pins a fixed .rnd column; `percentile` instead resolves the LNx
|
||||
column from the session's captured percentile map.
|
||||
"""
|
||||
key: str
|
||||
label: str
|
||||
agg: str
|
||||
column: Optional[str] = None
|
||||
percentile: Optional[float] = None
|
||||
|
||||
|
||||
METRIC_REGISTRY: dict[str, Metric] = {
|
||||
"lmax": Metric("lmax", "LAmax", "max", column="Lmax(Main)"),
|
||||
"leq": Metric("leq", "LAeq", "log", column="Leq(Main)"),
|
||||
"lmin": Metric("lmin", "LAmin", "arith", column="Lmin(Main)"),
|
||||
"l01": Metric("l01", "LA01", "arith", percentile=1.0),
|
||||
"l10": Metric("l10", "LA10", "arith", percentile=10.0),
|
||||
"l50": Metric("l50", "LA50", "arith", percentile=50.0),
|
||||
"l90": Metric("l90", "LA90", "arith", percentile=90.0),
|
||||
"l95": Metric("l95", "LA95", "arith", percentile=95.0),
|
||||
}
|
||||
|
||||
# House report metrics + L90 (background) for the baseline comparison.
|
||||
DEFAULT_METRICS: list[str] = ["lmax", "l01", "l10", "l90"]
|
||||
|
||||
# NL-43 default percentile→slot assignment, used when a session has no captured map.
|
||||
_DEFAULT_SLOT_FOR_PCT: dict[float, int] = {1.0: 1, 10.0: 2, 50.0: 3, 90.0: 4, 95.0: 5}
|
||||
|
||||
|
||||
def _resolve_column(metric: Metric, pct_map: dict) -> Optional[str]:
|
||||
"""Resolve the .rnd column for a metric, using the session's percentile map."""
|
||||
if metric.column:
|
||||
return metric.column
|
||||
if metric.percentile is None:
|
||||
return None
|
||||
# pct_map: {"1": "1.0", "2": "10.0", "4": "90.0", ...} → slot : percentile
|
||||
if pct_map:
|
||||
for slot, pval in pct_map.items():
|
||||
try:
|
||||
if float(pval) == metric.percentile:
|
||||
return f"LN{int(slot)}(Main)"
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
slot = _DEFAULT_SLOT_FOR_PCT.get(metric.percentile)
|
||||
return f"LN{slot}(Main)" if slot else None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Time windows
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Window:
|
||||
key: str
|
||||
label: str
|
||||
start_hour: int
|
||||
end_hour: int
|
||||
|
||||
def contains(self, hour: int) -> bool:
|
||||
if self.start_hour < self.end_hour:
|
||||
return self.start_hour <= hour < self.end_hour
|
||||
return hour >= self.start_hour or hour < self.end_hour
|
||||
|
||||
|
||||
# Matches the existing Excel report's stats table.
|
||||
DEFAULT_WINDOWS: list[Window] = [
|
||||
Window("evening", "Evening (7PM–10PM)", 19, 22),
|
||||
Window("nighttime", "Nighttime (10PM–7AM)", 22, 7),
|
||||
]
|
||||
|
||||
# The full night used to select which intervals belong to "last night".
|
||||
NIGHT_START_HOUR = 19
|
||||
NIGHT_LENGTH_HOURS = 12
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Aggregation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _aggregate(values: list, method: str) -> Optional[float]:
|
||||
"""Collapse a window's interval values into one number per `method`."""
|
||||
vals = [v for v in values if isinstance(v, (int, float))]
|
||||
if not vals:
|
||||
return None
|
||||
if method == "max":
|
||||
return round(max(vals), 1)
|
||||
if method == "log":
|
||||
return round(10 * math.log10(sum(10 ** (v / 10.0) for v in vals) / len(vals)), 1)
|
||||
return round(sum(vals) / len(vals), 1) # arithmetic
|
||||
|
||||
|
||||
def _combine_across_nights(per_night: list, method: str) -> Optional[float]:
|
||||
"""Combine per-night window values into a baseline 'typical night' value.
|
||||
|
||||
Arithmetic mean for max/arith metrics (so baseline Lmax = typical nightly
|
||||
peak, the agreed default), logarithmic mean for Leq.
|
||||
"""
|
||||
vals = [v for v in per_night if v is not None]
|
||||
if not vals:
|
||||
return None
|
||||
if method == "log":
|
||||
return round(10 * math.log10(sum(10 ** (v / 10.0) for v in vals) / len(vals)), 1)
|
||||
return round(sum(vals) / len(vals), 1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Row gathering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_dt(s: str) -> Optional[datetime]:
|
||||
try:
|
||||
return datetime.strptime(s, "%Y/%m/%d %H:%M:%S")
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def _location_leq_rows(db: Session, location_id: str) -> list[tuple[datetime, dict, dict]]:
|
||||
"""All Leq intervals at a location as (interval_dt, row, percentile_map).
|
||||
|
||||
Reuses the same .rnd readers as the report endpoints so parsing stays
|
||||
identical. Times are the meter's local clock (as written in the file).
|
||||
"""
|
||||
# Lazy import avoids a service→router import cycle at module load.
|
||||
from backend.routers.projects import (
|
||||
_read_rnd_file_rows, _normalize_rnd_rows, _is_leq_file, _peek_rnd_headers,
|
||||
)
|
||||
from pathlib import Path
|
||||
|
||||
out: list[tuple[datetime, dict, dict]] = []
|
||||
sessions = db.query(MonitoringSession).filter_by(
|
||||
location_id=location_id, session_type="sound",
|
||||
).all()
|
||||
for s in sessions:
|
||||
try:
|
||||
meta = json.loads(s.session_metadata or "{}")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
meta = {}
|
||||
pct_map = meta.get("percentiles", {}) or {}
|
||||
for f in db.query(DataFile).filter_by(session_id=s.id).all():
|
||||
if not f.file_path or not f.file_path.lower().endswith(".rnd"):
|
||||
continue
|
||||
peek = _peek_rnd_headers(Path("data") / f.file_path)
|
||||
if not _is_leq_file(f.file_path, peek):
|
||||
continue
|
||||
rows = _read_rnd_file_rows(f.file_path)
|
||||
rows, _ = _normalize_rnd_rows(rows)
|
||||
for r in rows:
|
||||
dt = _parse_dt(r.get("Start Time", ""))
|
||||
if dt:
|
||||
out.append((dt, r, pct_map))
|
||||
out.sort(key=lambda t: t[0])
|
||||
return out
|
||||
|
||||
|
||||
def _rows_in_night(rows: list, night_date: date) -> list:
|
||||
"""Rows falling in the night that *starts* on night_date (19:00 → +12h)."""
|
||||
start = datetime(night_date.year, night_date.month, night_date.day, NIGHT_START_HOUR, 0)
|
||||
end = start + timedelta(hours=NIGHT_LENGTH_HOURS)
|
||||
return [(dt, r, p) for (dt, r, p) in rows if start <= dt < end]
|
||||
|
||||
|
||||
def _eligible_nights(rows: list, start_date: date, end_date: date) -> list[date]:
|
||||
"""Evening-dates in [start_date, end_date] that actually have night data."""
|
||||
nights = []
|
||||
cur = start_date
|
||||
while cur <= end_date:
|
||||
if _rows_in_night(rows, cur):
|
||||
nights.append(cur)
|
||||
cur += timedelta(days=1)
|
||||
return nights
|
||||
|
||||
|
||||
def _window_value(rows: list, metric: Metric, window: Window) -> Optional[float]:
|
||||
"""Single aggregated value for one metric over one window of `rows`."""
|
||||
vals = []
|
||||
for dt, r, pct_map in rows:
|
||||
if window.contains(dt.hour):
|
||||
col = _resolve_column(metric, pct_map)
|
||||
if col:
|
||||
vals.append(r.get(col))
|
||||
return _aggregate(vals, metric.agg)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report data model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class CellPair:
|
||||
last_night: Optional[float]
|
||||
baseline: Optional[float]
|
||||
|
||||
@property
|
||||
def delta(self) -> Optional[float]:
|
||||
if self.last_night is None or self.baseline is None:
|
||||
return None
|
||||
return round(self.last_night - self.baseline, 1)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LocationNightReport:
|
||||
location_id: str
|
||||
location_name: str
|
||||
night_date: date
|
||||
metrics: list[Metric]
|
||||
windows: list[Window]
|
||||
# table[window_key][metric_key] = CellPair
|
||||
table: dict[str, dict[str, CellPair]]
|
||||
interval_series: list[dict]
|
||||
night_interval_count: int
|
||||
baseline_nights_used: int
|
||||
notes: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def build_location_night_report(
|
||||
db: Session,
|
||||
location_id: str,
|
||||
night_date: date,
|
||||
*,
|
||||
metric_keys: Optional[list[str]] = None,
|
||||
windows: Optional[list[Window]] = None,
|
||||
baseline_start: Optional[date] = None,
|
||||
baseline_end: Optional[date] = None,
|
||||
) -> LocationNightReport:
|
||||
"""Build the night-vs-baseline data model for one location.
|
||||
|
||||
`night_date` is the *evening* date of the night being reported (e.g. the
|
||||
7/7 in "night of 7/7 → morning 7/8"). Baseline is the typical-night value
|
||||
across the eligible nights in [baseline_start, baseline_end]; pass neither
|
||||
to skip the comparison (baseline cells become None).
|
||||
"""
|
||||
metric_keys = metric_keys or DEFAULT_METRICS
|
||||
metrics = [METRIC_REGISTRY[k] for k in metric_keys]
|
||||
windows = windows or DEFAULT_WINDOWS
|
||||
|
||||
loc = db.query(MonitoringLocation).filter_by(id=location_id).first()
|
||||
loc_name = loc.name if loc else location_id
|
||||
|
||||
all_rows = _location_leq_rows(db, location_id)
|
||||
night_rows = _rows_in_night(all_rows, night_date)
|
||||
|
||||
baseline_nights: list[date] = []
|
||||
if baseline_start and baseline_end:
|
||||
baseline_nights = _eligible_nights(all_rows, baseline_start, baseline_end)
|
||||
# Don't let the reported night double as its own baseline.
|
||||
baseline_nights = [n for n in baseline_nights if n != night_date]
|
||||
|
||||
table: dict[str, dict[str, CellPair]] = {}
|
||||
for w in windows:
|
||||
table[w.key] = {}
|
||||
for m in metrics:
|
||||
last_night_val = _window_value(night_rows, m, w)
|
||||
baseline_val = None
|
||||
if baseline_nights:
|
||||
per_night = [
|
||||
_window_value(_rows_in_night(all_rows, nd), m, w)
|
||||
for nd in baseline_nights
|
||||
]
|
||||
baseline_val = _combine_across_nights(per_night, m.agg)
|
||||
table[w.key][m.key] = CellPair(last_night_val, baseline_val)
|
||||
|
||||
interval_series = []
|
||||
for dt, r, pct_map in night_rows:
|
||||
entry = {"dt": dt, "time": dt.strftime("%H:%M")}
|
||||
for m in metrics:
|
||||
col = _resolve_column(m, pct_map)
|
||||
val = r.get(col) if col else None
|
||||
entry[m.key] = val if isinstance(val, (int, float)) else None
|
||||
interval_series.append(entry)
|
||||
|
||||
notes: list[str] = []
|
||||
if not night_rows:
|
||||
notes.append(f"No data found for the night of {night_date:%m/%d/%y}.")
|
||||
if (baseline_start or baseline_end) and not baseline_nights:
|
||||
notes.append("No baseline nights with data in the configured range.")
|
||||
|
||||
return LocationNightReport(
|
||||
location_id=location_id,
|
||||
location_name=loc_name,
|
||||
night_date=night_date,
|
||||
metrics=metrics,
|
||||
windows=windows,
|
||||
table=table,
|
||||
interval_series=interval_series,
|
||||
night_interval_count=len(night_rows),
|
||||
baseline_nights_used=len(baseline_nights),
|
||||
notes=notes,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectNightReport:
|
||||
project_id: str
|
||||
project_name: str
|
||||
night_date: date
|
||||
metrics: list[Metric]
|
||||
locations: list[LocationNightReport]
|
||||
|
||||
|
||||
def build_project_night_report(
|
||||
db: Session,
|
||||
project_id: str,
|
||||
night_date: date,
|
||||
*,
|
||||
metric_keys: Optional[list[str]] = None,
|
||||
windows: Optional[list[Window]] = None,
|
||||
baseline_start: Optional[date] = None,
|
||||
baseline_end: Optional[date] = None,
|
||||
) -> ProjectNightReport:
|
||||
"""Build the night report for every active sound location in a project."""
|
||||
metric_keys = metric_keys or DEFAULT_METRICS
|
||||
project = db.query(Project).filter_by(id=project_id).first()
|
||||
project_name = project.name if project else project_id
|
||||
|
||||
locations = db.query(MonitoringLocation).filter_by(
|
||||
project_id=project_id, location_type="sound",
|
||||
).order_by(MonitoringLocation.sort_order, MonitoringLocation.name).all()
|
||||
locations = [l for l in locations if getattr(l, "removed_at", None) is None]
|
||||
|
||||
reports = [
|
||||
build_location_night_report(
|
||||
db, loc.id, night_date,
|
||||
metric_keys=metric_keys, windows=windows,
|
||||
baseline_start=baseline_start, baseline_end=baseline_end,
|
||||
)
|
||||
for loc in locations
|
||||
]
|
||||
|
||||
return ProjectNightReport(
|
||||
project_id=project_id,
|
||||
project_name=project_name,
|
||||
night_date=night_date,
|
||||
metrics=[METRIC_REGISTRY[k] for k in metric_keys],
|
||||
locations=reports,
|
||||
)
|
||||
Reference in New Issue
Block a user