feat(reports): FTP night-report pipeline foundation
Terra-View side of the daily night-vs-baseline sound report for the John Myler 24/7 job. Engine is built and verified end-to-end against real meter data; SMTP send + scheduler/capture wiring still pending. - ingest: refactor upload_nrl_data into a callable ingest_nrl_zip(location_id, zip_bytes, db) sharing one core with the HTTP endpoint. Capture the .rnh percentile map + weightings into session metadata; dedup on store-name + start time. Ingest stays metric-agnostic (every Leq column preserved). - report_pipeline.py: metric registry, Evening/Nighttime windows, correct aggregation (Lmax=max, Ln=arithmetic, Leq=logarithmic), baseline = typical night, per-location + per-project builders. - report_renderers.py: HTML email-body renderer (Last/Base/delta layout). - report_email.py: config-driven SMTP via stdlib (env vars) with a dry-run fallback so the pipeline runs without credentials. - report_orchestrator.py: compute -> render -> always write report.html + report.json to disk -> best-effort email. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1712,6 +1712,19 @@ def _parse_rnh(content: bytes) -> dict:
|
||||
result["stop_time_str"] = value
|
||||
elif key == "Total Measurement Time":
|
||||
result["total_time_str"] = value
|
||||
elif key == "Frequency Weighting (Main)":
|
||||
result["frequency_weighting"] = value
|
||||
elif key == "Time Weighting (Main)":
|
||||
result["time_weighting"] = value
|
||||
elif key == "Leq Calculation Interval":
|
||||
result["leq_interval"] = value
|
||||
elif key.startswith("Percentile "):
|
||||
# e.g. "Percentile 4,90.0" → percentiles["4"] = "90.0".
|
||||
# Lets the report label the LN slots (here LN4 = L90) from the
|
||||
# device's own config instead of hardcoding which slot is which —
|
||||
# the percentile assignment is reconfigurable per job.
|
||||
slot = key[len("Percentile "):].strip()
|
||||
result.setdefault("percentiles", {})[slot] = value
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
@@ -1740,6 +1753,270 @@ def _classify_file(filename: str) -> str:
|
||||
return "data"
|
||||
|
||||
|
||||
def _is_wanted_nrl_file(fname: str) -> bool:
|
||||
"""Keep only the files an NRL ingest cares about: .rnh metadata + the
|
||||
averaged Leq .rnd. Drops the 1-second _Lp_ files and everything else.
|
||||
|
||||
- NL-43 writes two .rnd types: _Leq_ (15-min averages, wanted) and
|
||||
_Lp_ (1-second granular, skipped).
|
||||
- AU2 (NL-23/older Rion) writes a single Au2_####.rnd — always keep.
|
||||
|
||||
Note this is purely about which *files* to store, not which *metrics* to
|
||||
report: the kept Leq file carries every column (Leq, Lmax, L1/L10/L50/
|
||||
L90/L95, Lpeak, …), so the report layer can select any metric later.
|
||||
"""
|
||||
n = fname.lower()
|
||||
if n.endswith(".rnh"):
|
||||
return True
|
||||
if n.endswith(".rnd"):
|
||||
if "_leq_" in n: # NL-43 Leq file
|
||||
return True
|
||||
if n.startswith("au2_"): # AU2 format (NL-23) — Leq equivalent
|
||||
return True
|
||||
if "_lp" not in n and "_leq_" not in n:
|
||||
# Unknown .rnd format — include it so we don't silently drop data
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class IngestError(Exception):
|
||||
"""Raised when an NRL upload/ZIP has no usable data or an invalid target.
|
||||
|
||||
Kept HTTP-agnostic so the ingest core can be driven programmatically (the
|
||||
scheduled FTP pull) as well as from the HTTP upload endpoint. Callers
|
||||
translate it: the endpoint → HTTP 400, the scheduler → logged failure.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _find_existing_session(
|
||||
db: Session,
|
||||
location_id: str,
|
||||
store_name: str,
|
||||
started_at,
|
||||
start_time_str: str,
|
||||
):
|
||||
"""Return an already-ingested session for this location that represents the
|
||||
same measurement, or None.
|
||||
|
||||
Used to make FTP re-pulls idempotent: a daily cycle closes one Auto_####
|
||||
folder per day, so a session is uniquely identified within a location by
|
||||
(store_name + measurement start time). Store names recycle across jobs, so
|
||||
we always match on start time too.
|
||||
"""
|
||||
if not store_name and not started_at:
|
||||
return None
|
||||
candidates = db.query(MonitoringSession).filter(
|
||||
MonitoringSession.location_id == location_id,
|
||||
MonitoringSession.session_type == "sound",
|
||||
).all()
|
||||
for s in candidates:
|
||||
try:
|
||||
meta = json.loads(s.session_metadata or "{}")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
meta = {}
|
||||
if store_name and meta.get("store_name") != store_name:
|
||||
continue
|
||||
# Same store_name — confirm it's the same measurement by start time.
|
||||
if start_time_str and meta.get("start_time_str") == start_time_str:
|
||||
return s
|
||||
if not meta.get("start_time_str") and started_at and s.started_at == started_at:
|
||||
return s
|
||||
return None
|
||||
|
||||
|
||||
def _ingest_file_entries(
|
||||
location: MonitoringLocation,
|
||||
file_entries: list[tuple[str, bytes]],
|
||||
db: Session,
|
||||
*,
|
||||
source: str = "manual_upload",
|
||||
dedupe: bool = False,
|
||||
) -> dict:
|
||||
"""Core NRL ingest, shared by the HTTP upload and the programmatic FTP pull.
|
||||
|
||||
Takes already-normalized (filename, bytes) entries, keeps the wanted files,
|
||||
parses the .rnh, and creates a MonitoringSession + DataFile rows under the
|
||||
location's project. Metric-agnostic: the full Leq file is written to disk
|
||||
and every column preserved; metric selection happens in the report layer.
|
||||
|
||||
Raises IngestError if no usable files are present.
|
||||
"""
|
||||
# --- Filter to the files we keep (.rnh + Leq .rnd) ---
|
||||
file_entries = [(f, b) for f, b in file_entries if _is_wanted_nrl_file(f)]
|
||||
if not file_entries:
|
||||
raise IngestError(
|
||||
"No usable .rnd or .rnh files found. Expected NL-43 _Leq_ files or AU2 format .rnd files."
|
||||
)
|
||||
|
||||
# --- Parse .rnh metadata (first one wins) ---
|
||||
rnh_meta = {}
|
||||
for fname, fbytes in file_entries:
|
||||
if fname.lower().endswith(".rnh"):
|
||||
rnh_meta = _parse_rnh(fbytes)
|
||||
break
|
||||
|
||||
# RNH stores local time (no UTC offset). Use local for period/label, then
|
||||
# convert to UTC for storage so the local_datetime filter displays correctly.
|
||||
started_at_local = _parse_rnh_datetime(rnh_meta.get("start_time_str")) or datetime.utcnow()
|
||||
stopped_at_local = _parse_rnh_datetime(rnh_meta.get("stop_time_str"))
|
||||
started_at = local_to_utc(started_at_local)
|
||||
stopped_at = local_to_utc(stopped_at_local) if stopped_at_local else None
|
||||
duration_seconds = (
|
||||
int((stopped_at - started_at).total_seconds())
|
||||
if (started_at and stopped_at) else None
|
||||
)
|
||||
|
||||
store_name = rnh_meta.get("store_name", "")
|
||||
serial_number = rnh_meta.get("serial_number", "")
|
||||
index_number = rnh_meta.get("index_number", "")
|
||||
start_time_str = rnh_meta.get("start_time_str", "")
|
||||
|
||||
# --- Dedupe: skip if this exact measurement is already ingested ---
|
||||
if dedupe:
|
||||
existing = _find_existing_session(db, location.id, store_name, started_at, start_time_str)
|
||||
if existing:
|
||||
return {
|
||||
"success": True,
|
||||
"deduped": True,
|
||||
"session_id": existing.id,
|
||||
"files_imported": 0,
|
||||
"leq_files": 0,
|
||||
"lp_files": 0,
|
||||
"metadata_files": 0,
|
||||
"store_name": store_name,
|
||||
"started_at": started_at.isoformat() if started_at else None,
|
||||
"stopped_at": stopped_at.isoformat() if stopped_at else None,
|
||||
}
|
||||
|
||||
# --- Create MonitoringSession (local times drive period/label) ---
|
||||
period_type = _derive_period_type(started_at_local) if started_at_local else None
|
||||
session_label = (
|
||||
_build_session_label(started_at_local, location.name, period_type)
|
||||
if started_at_local else None
|
||||
)
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
monitoring_session = MonitoringSession(
|
||||
id=session_id,
|
||||
project_id=location.project_id,
|
||||
location_id=location.id,
|
||||
unit_id=None,
|
||||
session_type="sound",
|
||||
started_at=started_at,
|
||||
stopped_at=stopped_at,
|
||||
duration_seconds=duration_seconds,
|
||||
status="completed",
|
||||
session_label=session_label,
|
||||
period_type=period_type,
|
||||
session_metadata=json.dumps({
|
||||
"source": source,
|
||||
"store_name": store_name,
|
||||
"serial_number": serial_number,
|
||||
"index_number": index_number,
|
||||
"start_time_str": start_time_str,
|
||||
# Captured from the .rnh so the report can label metrics from the
|
||||
# device's own config (which LN slot is L90, the weightings, etc.).
|
||||
"percentiles": rnh_meta.get("percentiles", {}),
|
||||
"frequency_weighting": rnh_meta.get("frequency_weighting", ""),
|
||||
"time_weighting": rnh_meta.get("time_weighting", ""),
|
||||
"leq_interval": rnh_meta.get("leq_interval", ""),
|
||||
}),
|
||||
)
|
||||
db.add(monitoring_session)
|
||||
db.commit()
|
||||
db.refresh(monitoring_session)
|
||||
|
||||
# --- Write files to disk + create DataFile records ---
|
||||
output_dir = Path("data/Projects") / location.project_id / session_id
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
leq_count = lp_count = metadata_count = files_imported = 0
|
||||
for fname, fbytes in file_entries:
|
||||
fname_lower = fname.lower()
|
||||
if fname_lower.endswith(".rnd"):
|
||||
if "_leq_" in fname_lower:
|
||||
leq_count += 1
|
||||
elif "_lp" in fname_lower:
|
||||
lp_count += 1
|
||||
elif fname_lower.endswith(".rnh"):
|
||||
metadata_count += 1
|
||||
|
||||
dest = output_dir / fname
|
||||
dest.write_bytes(fbytes)
|
||||
checksum = hashlib.sha256(fbytes).hexdigest()
|
||||
rel_path = str(dest.relative_to("data"))
|
||||
|
||||
db.add(DataFile(
|
||||
id=str(uuid.uuid4()),
|
||||
session_id=session_id,
|
||||
file_path=rel_path,
|
||||
file_type=_classify_file(fname),
|
||||
file_size_bytes=len(fbytes),
|
||||
downloaded_at=datetime.utcnow(),
|
||||
checksum=checksum,
|
||||
file_metadata=json.dumps({
|
||||
"source": source,
|
||||
"original_filename": fname,
|
||||
"store_name": store_name,
|
||||
}),
|
||||
))
|
||||
files_imported += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"deduped": False,
|
||||
"session_id": session_id,
|
||||
"files_imported": files_imported,
|
||||
"leq_files": leq_count,
|
||||
"lp_files": lp_count,
|
||||
"metadata_files": metadata_count,
|
||||
"store_name": store_name,
|
||||
"started_at": started_at.isoformat() if started_at else None,
|
||||
"stopped_at": stopped_at.isoformat() if stopped_at else None,
|
||||
}
|
||||
|
||||
|
||||
def ingest_nrl_zip(
|
||||
location_id: str,
|
||||
zip_bytes: bytes,
|
||||
db: Session,
|
||||
*,
|
||||
source: str = "ftp_pull",
|
||||
dedupe: bool = True,
|
||||
) -> dict:
|
||||
"""Programmatically ingest an Auto_#### ZIP (e.g. a scheduled FTP pull).
|
||||
|
||||
Extracts the ZIP (flattening any nested Auto_Leq/Auto_Lp_ folders), keeps
|
||||
the .rnh + Leq .rnd, parses the header, and creates a MonitoringSession +
|
||||
DataFile rows for `location_id`. Defaults to dedupe=True so repeated daily
|
||||
pulls of the same closed folder don't create duplicate sessions.
|
||||
|
||||
Returns the same dict shape as the HTTP upload, plus a `deduped` flag.
|
||||
Raises IngestError on a bad ZIP, no usable files, or unknown location.
|
||||
"""
|
||||
location = db.query(MonitoringLocation).filter_by(id=location_id).first()
|
||||
if not location:
|
||||
raise IngestError(f"Location {location_id} not found")
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
|
||||
file_entries: list[tuple[str, bytes]] = []
|
||||
for info in zf.infolist():
|
||||
if info.is_dir():
|
||||
continue
|
||||
name = Path(info.filename).name # strip nested folder paths
|
||||
if not name:
|
||||
continue
|
||||
file_entries.append((name, zf.read(info)))
|
||||
except zipfile.BadZipFile:
|
||||
raise IngestError("Downloaded data is not a valid ZIP archive.")
|
||||
|
||||
return _ingest_file_entries(location, file_entries, db, source=source, dedupe=dedupe)
|
||||
|
||||
|
||||
@router.post("/nrl/{location_id}/upload-data")
|
||||
async def upload_nrl_data(
|
||||
project_id: str,
|
||||
@@ -1754,11 +2031,13 @@ async def upload_nrl_data(
|
||||
- A single .zip file (the Auto_#### folder zipped) — auto-extracted
|
||||
- Multiple .rnd / .rnh files selected directly from the SD card folder
|
||||
|
||||
Creates a MonitoringSession from .rnh metadata and DataFile records
|
||||
for each measurement file. No unit assignment required.
|
||||
Normalizes the upload to (filename, bytes) entries, then hands off to the
|
||||
shared ingest core (`_ingest_file_entries`) — the same path the scheduled
|
||||
FTP pull uses via `ingest_nrl_zip`. Creates a MonitoringSession from the
|
||||
.rnh metadata and DataFile records for each measurement file. No unit
|
||||
assignment required. dedupe=False here preserves the prior manual-upload
|
||||
behaviour (re-uploading creates a fresh session).
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
# Verify project and location exist
|
||||
project = db.query(Project).filter_by(id=project_id).first()
|
||||
_require_module(project, "sound_monitoring", db)
|
||||
@@ -1769,7 +2048,7 @@ async def upload_nrl_data(
|
||||
if not location:
|
||||
raise HTTPException(status_code=404, detail="Location not found")
|
||||
|
||||
# --- Step 1: Normalize to (filename, bytes) list ---
|
||||
# --- Normalize upload to (filename, bytes) entries ---
|
||||
file_entries: list[tuple[str, bytes]] = []
|
||||
|
||||
if len(files) == 1 and files[0].filename.lower().endswith(".zip"):
|
||||
@@ -1793,145 +2072,11 @@ async def upload_nrl_data(
|
||||
if not file_entries:
|
||||
raise HTTPException(status_code=400, detail="No usable files found in upload.")
|
||||
|
||||
# --- Step 1b: Filter to only relevant files ---
|
||||
# Keep: .rnh (metadata) and measurement .rnd files
|
||||
# NL-43 generates two .rnd types: _Leq_ (15-min averages, wanted) and _Lp_ (1-sec granular, skip)
|
||||
# AU2 (NL-23/older Rion) generates a single Au2_####.rnd per session — always keep those
|
||||
# Drop: _Lp_ .rnd, .xlsx, .mp3, and anything else
|
||||
def _is_wanted(fname: str) -> bool:
|
||||
n = fname.lower()
|
||||
if n.endswith(".rnh"):
|
||||
return True
|
||||
if n.endswith(".rnd"):
|
||||
if "_leq_" in n: # NL-43 Leq file
|
||||
return True
|
||||
if n.startswith("au2_"): # AU2 format (NL-23) — always Leq equivalent
|
||||
return True
|
||||
if "_lp" not in n and "_leq_" not in n:
|
||||
# Unknown .rnd format — include it so we don't silently drop data
|
||||
return True
|
||||
return False
|
||||
|
||||
file_entries = [(fname, fbytes) for fname, fbytes in file_entries if _is_wanted(fname)]
|
||||
|
||||
if not file_entries:
|
||||
raise HTTPException(status_code=400, detail="No usable .rnd or .rnh files found. Expected NL-43 _Leq_ files or AU2 format .rnd files.")
|
||||
|
||||
# --- Step 2: Find and parse .rnh metadata ---
|
||||
rnh_meta = {}
|
||||
for fname, fbytes in file_entries:
|
||||
if fname.lower().endswith(".rnh"):
|
||||
rnh_meta = _parse_rnh(fbytes)
|
||||
break
|
||||
|
||||
# RNH files store local time (no UTC offset). Use local values for period
|
||||
# classification / label generation, then convert to UTC for DB storage so
|
||||
# the local_datetime Jinja filter displays the correct time.
|
||||
started_at_local = _parse_rnh_datetime(rnh_meta.get("start_time_str")) or datetime.utcnow()
|
||||
stopped_at_local = _parse_rnh_datetime(rnh_meta.get("stop_time_str"))
|
||||
|
||||
started_at = local_to_utc(started_at_local)
|
||||
stopped_at = local_to_utc(stopped_at_local) if stopped_at_local else None
|
||||
|
||||
duration_seconds = None
|
||||
if started_at and stopped_at:
|
||||
duration_seconds = int((stopped_at - started_at).total_seconds())
|
||||
|
||||
store_name = rnh_meta.get("store_name", "")
|
||||
serial_number = rnh_meta.get("serial_number", "")
|
||||
index_number = rnh_meta.get("index_number", "")
|
||||
|
||||
# --- Step 3: Create MonitoringSession ---
|
||||
# Use local times for period/label so classification reflects the clock at the site.
|
||||
period_type = _derive_period_type(started_at_local) if started_at_local else None
|
||||
session_label = _build_session_label(started_at_local, location.name, period_type) if started_at_local else None
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
monitoring_session = MonitoringSession(
|
||||
id=session_id,
|
||||
project_id=project_id,
|
||||
location_id=location_id,
|
||||
unit_id=None,
|
||||
session_type="sound",
|
||||
started_at=started_at,
|
||||
stopped_at=stopped_at,
|
||||
duration_seconds=duration_seconds,
|
||||
status="completed",
|
||||
session_label=session_label,
|
||||
period_type=period_type,
|
||||
session_metadata=json.dumps({
|
||||
"source": "manual_upload",
|
||||
"store_name": store_name,
|
||||
"serial_number": serial_number,
|
||||
"index_number": index_number,
|
||||
}),
|
||||
)
|
||||
db.add(monitoring_session)
|
||||
db.commit()
|
||||
db.refresh(monitoring_session)
|
||||
|
||||
# --- Step 4: Write files to disk and create DataFile records ---
|
||||
output_dir = Path("data/Projects") / project_id / session_id
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
leq_count = 0
|
||||
lp_count = 0
|
||||
metadata_count = 0
|
||||
files_imported = 0
|
||||
|
||||
for fname, fbytes in file_entries:
|
||||
file_type = _classify_file(fname)
|
||||
fname_lower = fname.lower()
|
||||
|
||||
# Track counts for summary
|
||||
if fname_lower.endswith(".rnd"):
|
||||
if "_leq_" in fname_lower:
|
||||
leq_count += 1
|
||||
elif "_lp" in fname_lower:
|
||||
lp_count += 1
|
||||
elif fname_lower.endswith(".rnh"):
|
||||
metadata_count += 1
|
||||
|
||||
# Write to disk
|
||||
dest = output_dir / fname
|
||||
dest.write_bytes(fbytes)
|
||||
|
||||
# Compute checksum
|
||||
checksum = hashlib.sha256(fbytes).hexdigest()
|
||||
|
||||
# Store relative path from data/ dir
|
||||
rel_path = str(dest.relative_to("data"))
|
||||
|
||||
data_file = DataFile(
|
||||
id=str(uuid.uuid4()),
|
||||
session_id=session_id,
|
||||
file_path=rel_path,
|
||||
file_type=file_type,
|
||||
file_size_bytes=len(fbytes),
|
||||
downloaded_at=datetime.utcnow(),
|
||||
checksum=checksum,
|
||||
file_metadata=json.dumps({
|
||||
"source": "manual_upload",
|
||||
"original_filename": fname,
|
||||
"store_name": store_name,
|
||||
}),
|
||||
)
|
||||
db.add(data_file)
|
||||
files_imported += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"session_id": session_id,
|
||||
"files_imported": files_imported,
|
||||
"leq_files": leq_count,
|
||||
"lp_files": lp_count,
|
||||
"metadata_files": metadata_count,
|
||||
"store_name": store_name,
|
||||
"started_at": started_at.isoformat() if started_at else None,
|
||||
"stopped_at": stopped_at.isoformat() if stopped_at else None,
|
||||
}
|
||||
# --- Hand off to the shared ingest core ---
|
||||
try:
|
||||
return _ingest_file_entries(location, file_entries, db, source="manual_upload", dedupe=False)
|
||||
except IngestError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user