""" project_merge.py — consolidate a duplicate project into another. Use case: the metadata-backfill parser (and operators) create projects with slight name variations ("SR81" vs "SR 81", "Swank-Karns Crossing" vs "Swank-Karns Crossings", "Trumbull-Bryman Mont.Dam" vs "Trumbull-Brayman-Mont Dam"). Operator picks a SOURCE project to merge into a TARGET project; everything attached to source moves to target, same-named locations consolidate, and source is soft-deleted. Public API: preview(db, source_id, target_id) → MergePreview execute(db, source_id, target_id, *, decided_by="operator") → MergeResult Both raise HTTPException with appropriate 4xx codes for validation failures. """ from __future__ import annotations import logging from dataclasses import dataclass, field from datetime import datetime from typing import Optional from fastapi import HTTPException from sqlalchemy.orm import Session from backend.models import ( Project, ProjectModule, MonitoringLocation, UnitAssignment, UnitHistory, MonitoringSession, DataFile, ) log = logging.getLogger("backend.services.project_merge") # ── Dataclasses ─────────────────────────────────────────────────────────────── @dataclass class LocationMergePlan: source_id: str source_name: str target_id: Optional[str] # None = will be inserted as-new under target project target_name: Optional[str] # name in target after merge action: str # "move" | "consolidate" assignments_moving: int sessions_moving: int @dataclass class MergePreview: source_project_id: str source_project_name: str target_project_id: str target_project_name: str location_plans: list[LocationMergePlan] = field(default_factory=list) total_assignments_moving: int = 0 total_sessions_moving: int = 0 total_data_files_moving: int = 0 modules_to_add: list[str] = field(default_factory=list) warnings: list[str] = field(default_factory=list) @dataclass class MergeResult: source_project_id: str target_project_id: str assignments_moved: int locations_moved: int locations_consolidated: int sessions_moved: int data_files_moved: int modules_added: list[str] audit_rows_written: int # ── Helpers ─────────────────────────────────────────────────────────────────── def _normalise_name(s: Optional[str]) -> str: """Case-insensitive, whitespace-collapsing name normalisation. Lighter than metadata_backfill._normalise (no punctuation stripping) — for merging we want "Loc 1" and "Loc 1" to match but NOT "Loc 1" and "Loc-1" (those might be intentionally different). If operators DO want loose matching, they can rename one before merging. """ if not s: return "" import re return re.sub(r"\s+", " ", s.strip()).casefold() def _validate_pair(db: Session, source_id: str, target_id: str) -> tuple[Project, Project]: if source_id == target_id: raise HTTPException(status_code=400, detail="Cannot merge a project into itself.") source = db.query(Project).filter_by(id=source_id).first() target = db.query(Project).filter_by(id=target_id).first() if source is None: raise HTTPException(status_code=404, detail=f"Source project not found.") if target is None: raise HTTPException(status_code=404, detail=f"Target project not found.") if source.status == "deleted": raise HTTPException(status_code=400, detail=f"Source project '{source.name}' is already deleted.") if target.status == "deleted": raise HTTPException(status_code=400, detail=f"Target project '{target.name}' is deleted.") return source, target # ── Preview ─────────────────────────────────────────────────────────────────── def preview(db: Session, source_id: str, target_id: str) -> MergePreview: """Build a preview of what the merge will do. No writes.""" source, target = _validate_pair(db, source_id, target_id) # Locations in source vs target. source_locs = ( db.query(MonitoringLocation) .filter(MonitoringLocation.project_id == source_id) .all() ) target_locs = ( db.query(MonitoringLocation) .filter(MonitoringLocation.project_id == target_id) .all() ) target_by_norm = {_normalise_name(l.name): l for l in target_locs} location_plans: list[LocationMergePlan] = [] total_assignments_moving = 0 total_sessions_moving = 0 for sl in source_locs: n = _normalise_name(sl.name) tl = target_by_norm.get(n) a_count = ( db.query(UnitAssignment) .filter(UnitAssignment.location_id == sl.id) .count() ) s_count = ( db.query(MonitoringSession) .filter(MonitoringSession.location_id == sl.id) .count() ) total_assignments_moving += a_count total_sessions_moving += s_count if tl is not None: location_plans.append(LocationMergePlan( source_id = sl.id, source_name = sl.name, target_id = tl.id, target_name = tl.name, action = "consolidate", assignments_moving = a_count, sessions_moving = s_count, )) else: location_plans.append(LocationMergePlan( source_id = sl.id, source_name = sl.name, target_id = None, target_name = sl.name, action = "move", assignments_moving = a_count, sessions_moving = s_count, )) # DataFiles attached to the source project (if the table exists with a # project_id column). Optional — terra-view's DataFile model may not # always FK to project, so handle gracefully. df_count = 0 try: df_count = ( db.query(DataFile) .filter(DataFile.project_id == source_id) .count() ) except Exception: df_count = 0 total_data_files_moving = df_count # Modules: add anything in source missing from target. src_modules = { m.module_type for m in db.query(ProjectModule) .filter(ProjectModule.project_id == source_id, ProjectModule.enabled.is_(True)) .all() } tgt_modules = { m.module_type for m in db.query(ProjectModule) .filter(ProjectModule.project_id == target_id, ProjectModule.enabled.is_(True)) .all() } modules_to_add = sorted(src_modules - tgt_modules) warnings: list[str] = [] # Surface conditions the operator should think about. consolidations = sum(1 for p in location_plans if p.action == "consolidate") if consolidations: warnings.append( f"{consolidations} location(s) with matching names will be consolidated " f"(source assignments will move to the target's existing location). " f"If your same-named locations are actually different sites, rename one first." ) if source.client_name and target.client_name and source.client_name.strip().casefold() != target.client_name.strip().casefold(): warnings.append( f"Client names differ: source is \"{source.client_name}\", target is " f"\"{target.client_name}\". Target's client name will be kept." ) return MergePreview( source_project_id = source.id, source_project_name = source.name, target_project_id = target.id, target_project_name = target.name, location_plans = location_plans, total_assignments_moving = total_assignments_moving, total_sessions_moving = total_sessions_moving, total_data_files_moving = total_data_files_moving, modules_to_add = modules_to_add, warnings = warnings, ) # ── Execute ─────────────────────────────────────────────────────────────────── def execute( db: Session, source_id: str, target_id: str, *, decided_by: str = "operator", ) -> MergeResult: """Perform the merge in a single transaction. Steps: 1. Re-validate the pair. 2. For each location in source: - if a same-name location exists in target → "consolidate" mode: move source's assignments + sessions to target's location id, delete source's location. - else → "move" mode: just re-point the location's project_id. 3. Move any remaining direct-to-project FK rows (DataFiles). 4. Ensure target has all of source's modules. 5. Soft-delete source project. 6. Write a UnitHistory row per assignment that was moved (change_type='assignment_merged') so the deployment timeline on each affected unit reflects the merge. 7. Commit. """ source, target = _validate_pair(db, source_id, target_id) src_modules = { m.module_type for m in db.query(ProjectModule) .filter(ProjectModule.project_id == source_id, ProjectModule.enabled.is_(True)) .all() } tgt_modules = { m.module_type for m in db.query(ProjectModule) .filter(ProjectModule.project_id == target_id, ProjectModule.enabled.is_(True)) .all() } modules_to_add = sorted(src_modules - tgt_modules) # ── 1. Locations + their dependents ─────────────────────────────── source_locs = ( db.query(MonitoringLocation) .filter(MonitoringLocation.project_id == source_id) .all() ) target_locs = ( db.query(MonitoringLocation) .filter(MonitoringLocation.project_id == target_id) .all() ) target_by_norm = {_normalise_name(l.name): l for l in target_locs} assignments_moved = 0 sessions_moved = 0 locations_moved = 0 locations_consolidated = 0 audit_rows_written = 0 for sl in source_locs: n = _normalise_name(sl.name) tl = target_by_norm.get(n) # Pull this location's assignments + sessions (we'll re-point them). assignments = ( db.query(UnitAssignment) .filter(UnitAssignment.location_id == sl.id) .all() ) sessions = ( db.query(MonitoringSession) .filter(MonitoringSession.location_id == sl.id) .all() ) if tl is not None: # Consolidate: move dependents to target's existing location; # then delete the source location. for a in assignments: old_loc_id = a.location_id a.location_id = tl.id a.project_id = target.id db.add(UnitHistory( unit_id = a.unit_id, change_type = "assignment_merged", field_name = "unit_assignment.project_id", old_value = f"{source.name} / {sl.name}", new_value = f"{target.name} / {tl.name}", changed_at = datetime.utcnow(), source = "project_merge", notes = ( f"Project merge: '{source.name}' → '{target.name}'. " f"Location consolidated by name match. " f"By: {decided_by}." ), )) audit_rows_written += 1 assignments_moved += 1 for s in sessions: s.location_id = tl.id s.project_id = target.id sessions_moved += 1 # Delete the now-empty source location. db.delete(sl) locations_consolidated += 1 else: # Move: just re-point this location to the target project. sl.project_id = target.id for a in assignments: old_proj_id = a.project_id a.project_id = target.id db.add(UnitHistory( unit_id = a.unit_id, change_type = "assignment_merged", field_name = "unit_assignment.project_id", old_value = f"{source.name} / {sl.name}", new_value = f"{target.name} / {sl.name}", changed_at = datetime.utcnow(), source = "project_merge", notes = ( f"Project merge: '{source.name}' → '{target.name}'. " f"Location moved as-is. By: {decided_by}." ), )) audit_rows_written += 1 assignments_moved += 1 for s in sessions: s.project_id = target.id sessions_moved += 1 locations_moved += 1 # ── 2. Direct-to-project rows (DataFiles, ScheduledActions) ────── data_files_moved = 0 try: data_files = ( db.query(DataFile) .filter(DataFile.project_id == source_id) .all() ) for df in data_files: df.project_id = target.id data_files_moved += 1 except Exception as e: log.warning("DataFile move skipped (model may differ): %s", e) # ── 3. UnitAssignments that point directly at source.project_id with # no location (shouldn't happen but be defensive) ────────────── orphan_assignments = ( db.query(UnitAssignment) .filter(UnitAssignment.project_id == source_id) .all() ) for a in orphan_assignments: # Already moved if its location was moved. Catch any stragglers. if a.project_id == source_id: a.project_id = target.id # ── 4. Modules ──────────────────────────────────────────────────── import uuid for mod_type in modules_to_add: db.add(ProjectModule( id = str(uuid.uuid4()), project_id = target.id, module_type = mod_type, enabled = True, )) # Disable source's modules (defensive — source is being soft-deleted # but its modules table rows could still be inspected). for m in db.query(ProjectModule).filter(ProjectModule.project_id == source_id).all(): m.enabled = False # ── 5. Soft-delete source ───────────────────────────────────────── source.status = "deleted" source.deleted_at = datetime.utcnow() # Final audit row on the source project itself (operator-facing). # We don't have a Project-level history table, so log on every # affected unit as a marker. Already done per-assignment above. db.commit() return MergeResult( source_project_id = source.id, target_project_id = target.id, assignments_moved = assignments_moved, locations_moved = locations_moved, locations_consolidated = locations_consolidated, sessions_moved = sessions_moved, data_files_moved = data_files_moved, modules_added = modules_to_add, audit_rows_written = audit_rows_written, )