feat(projects): Tidy page for fuzzy-detecting + bulk-merging duplicate projects
Phase 5b first slice. Surfaces near-duplicate projects (typo variants, abbreviation differences, spacing variations like "SR81" vs "SR 81") as side-by-side pairs the operator can merge with one click. Backend (backend/services/project_tidy.py): - find_duplicate_pairs(db, threshold=0.85) walks all active projects and computes rapidfuzz.WRatio similarity for every pair. Pre-filters too-short normalised names (< 4 chars) to avoid noise. Skips soft-deleted projects. Returns pairs sorted by score desc, then by total content (more assignments → review first). - Each pair carries a suggested merge target with a human-readable reason. Priorities (in order): manual source over parser source, populated project_number, more locations, more assignments, shorter name. Operator can override the suggestion by clicking the OTHER direction button. - O(N^2) over project count. Fine up to ~500 projects. Token-prefix blocking is the obvious next optimisation if it becomes slow. Backend (backend/routers/projects.py): - GET /api/projects/admin/duplicate_pairs?threshold=&max_pairs= returns pairs as JSON for the Tidy page. Frontend (templates/admin/project_tidy.html): - New admin page at /settings/developer/project-tidy. Threshold selector (95% / 90% / 85% / 80%) at the top; rescan button next to it; auto- scans on load. - Each pair card shows side-by-side project summaries (name, project_ number, client, source-badge, location/assignment counts) with the suggested target visually highlighted (orange border). Three buttons: "Merge A → B", "Merge B → A", "Not a dup" (hide locally). - Click-to-merge opens a native confirm with the preview totals (assignments/sessions/data files moving, consolidations) — same data the project_header.html merge modal shows. On confirm, hits the existing /merge_into endpoint and re-scans automatically. - Source badges distinguish parser-created (`metadata_backfill`) from manual projects — at a glance the operator can see "this duplicate is parser-generated; safe to merge into the manual one". Frontend (templates/admin/metadata_backfill.html): - Apply-result handling now surfaces failed[] cluster reasons in a dedicated failure panel (bottom-left, dismissable). Previously a 200 OK with all-failures showed a misleading "1 cluster applied" success toast because the count and the failure list weren't being reconciled. This bit us during the DB-revert recovery earlier — the project_modules table was missing, every apply silently rolled back, user saw success toasts. Fixed. Smoke-verified against current state (10K events, 9 projects, post- merge): tool correctly finds 0 pairs at threshold 0.85 (data is clean), 1 false-positive at 0.70 (two unrelated projects sharing the token "81" — example of why the 0.85 default is correct). Settings link added under Developer → Project Tidy. Phase 5c (swap-detection daily background job + notification inbox) remains deferred to the next session. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -251,6 +251,13 @@ async def metadata_backfill_wizard_page(request: Request):
|
|||||||
return templates.TemplateResponse("admin/metadata_backfill.html", {"request": request})
|
return templates.TemplateResponse("admin/metadata_backfill.html", {"request": request})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/settings/developer/project-tidy", response_class=HTMLResponse)
|
||||||
|
async def project_tidy_page(request: Request):
|
||||||
|
"""Tidy duplicate-looking projects: detect by fuzzy name match, merge
|
||||||
|
by clicking through pairs (Phase 5b)."""
|
||||||
|
return templates.TemplateResponse("admin/project_tidy.html", {"request": request})
|
||||||
|
|
||||||
|
|
||||||
@app.get("/modems", response_class=HTMLResponse)
|
@app.get("/modems", response_class=HTMLResponse)
|
||||||
async def modems_page(request: Request):
|
async def modems_page(request: Request):
|
||||||
"""Field modems management dashboard"""
|
"""Field modems management dashboard"""
|
||||||
|
|||||||
@@ -729,6 +729,49 @@ async def project_merge_preview(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/admin/duplicate_pairs")
|
||||||
|
async def get_duplicate_pairs(
|
||||||
|
threshold: float = 0.85,
|
||||||
|
max_pairs: int = 200,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""Return all active-project pairs whose names fuzzy-match above the
|
||||||
|
threshold. Used by the Tidy page to surface duplicates that would
|
||||||
|
otherwise have to be hunted down one at a time.
|
||||||
|
|
||||||
|
Each pair carries a suggested merge-target with the reasoning so the
|
||||||
|
operator can decide direction with one click.
|
||||||
|
"""
|
||||||
|
from backend.services import project_tidy as pt
|
||||||
|
pairs = pt.find_duplicate_pairs(db, threshold=threshold, max_pairs=max_pairs)
|
||||||
|
|
||||||
|
def _ps(p):
|
||||||
|
return {
|
||||||
|
"id": p.id,
|
||||||
|
"name": p.name,
|
||||||
|
"project_number": p.project_number,
|
||||||
|
"client_name": p.client_name,
|
||||||
|
"source": p.source,
|
||||||
|
"status": p.status,
|
||||||
|
"location_count": p.location_count,
|
||||||
|
"assignment_count": p.assignment_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"pairs": [
|
||||||
|
{
|
||||||
|
"a": _ps(pair.a),
|
||||||
|
"b": _ps(pair.b),
|
||||||
|
"score": round(pair.score, 3),
|
||||||
|
"suggested_target_id": pair.suggested_target_id,
|
||||||
|
"reason": pair.reason,
|
||||||
|
}
|
||||||
|
for pair in pairs
|
||||||
|
],
|
||||||
|
"threshold": threshold,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{source_id}/merge_into")
|
@router.post("/{source_id}/merge_into")
|
||||||
async def project_merge_execute(
|
async def project_merge_execute(
|
||||||
source_id: str,
|
source_id: str,
|
||||||
|
|||||||
@@ -0,0 +1,235 @@
|
|||||||
|
"""
|
||||||
|
project_tidy.py — find duplicate-looking projects + offer bulk merge.
|
||||||
|
|
||||||
|
The metadata-backfill parser is good at clustering events into candidate
|
||||||
|
projects but doesn't compare its proposed project names against EACH OTHER
|
||||||
|
(it only checks against existing terra-view projects). After a bulk
|
||||||
|
apply, you can end up with many near-duplicate projects — typo variants,
|
||||||
|
abbreviation differences, etc. This module surfaces them as pairs the
|
||||||
|
operator can merge.
|
||||||
|
|
||||||
|
Pairs vs clusters: a fully-connected group like (A, B, C) where each pair
|
||||||
|
scores >= threshold becomes 3 pairs. The operator has to do 2 merges to
|
||||||
|
fully consolidate. We don't try to be smarter about transitive grouping —
|
||||||
|
in practice operators want to review the highest-similarity pair first
|
||||||
|
anyway, and the list re-computes after each merge.
|
||||||
|
|
||||||
|
Public API:
|
||||||
|
find_duplicate_pairs(db, *, threshold=0.85, max_pairs=200) → list[DuplicatePair]
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import rapidfuzz
|
||||||
|
from sqlalchemy import func
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from backend.models import (
|
||||||
|
Project,
|
||||||
|
MonitoringLocation,
|
||||||
|
UnitAssignment,
|
||||||
|
)
|
||||||
|
from backend.services.metadata_backfill import _normalise as _meta_normalise
|
||||||
|
|
||||||
|
log = logging.getLogger("backend.services.project_tidy")
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_THRESHOLD = 0.85 # WRatio similarity above which we surface a pair
|
||||||
|
DEFAULT_MAX_PAIRS = 200 # Cap the result list to keep response small
|
||||||
|
MIN_NORMALISED_LENGTH = 4 # Skip projects whose normalised name is too short
|
||||||
|
# to fuzzy-match safely (avoids "1" / "1" pairs).
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProjectSummary:
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
project_number: Optional[str]
|
||||||
|
client_name: Optional[str]
|
||||||
|
source: str # 'manual' | 'metadata_backfill' | ...
|
||||||
|
status: str
|
||||||
|
location_count: int
|
||||||
|
assignment_count: int
|
||||||
|
event_count_total: int # approx — sum across assignments
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DuplicatePair:
|
||||||
|
a: ProjectSummary
|
||||||
|
b: ProjectSummary
|
||||||
|
score: float
|
||||||
|
suggested_target_id: str # the recommended "keep" side
|
||||||
|
reason: str # why we picked that target
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_project_name(name: str) -> str:
|
||||||
|
"""Project-name normalisation for tidy comparison.
|
||||||
|
|
||||||
|
Reuses the metadata_backfill normaliser (lowercase, punctuation→space,
|
||||||
|
collapse whitespace). Returns "" for None or all-punctuation names.
|
||||||
|
"""
|
||||||
|
return _meta_normalise(name)
|
||||||
|
|
||||||
|
|
||||||
|
def _summarise_projects(db: Session) -> list[ProjectSummary]:
|
||||||
|
"""One row per active project with cached counts. Excludes deleted."""
|
||||||
|
projects = (
|
||||||
|
db.query(Project)
|
||||||
|
.filter(Project.status != "deleted")
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Bulk lookup: assignment counts + location counts per project.
|
||||||
|
loc_counts: dict[str, int] = dict(
|
||||||
|
db.query(MonitoringLocation.project_id, func.count(MonitoringLocation.id))
|
||||||
|
.filter(MonitoringLocation.project_id.in_([p.id for p in projects]) if projects else False)
|
||||||
|
.group_by(MonitoringLocation.project_id)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
asgn_counts: dict[str, int] = dict(
|
||||||
|
db.query(UnitAssignment.project_id, func.count(UnitAssignment.id))
|
||||||
|
.filter(UnitAssignment.project_id.in_([p.id for p in projects]) if projects else False)
|
||||||
|
.group_by(UnitAssignment.project_id)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
summaries: list[ProjectSummary] = []
|
||||||
|
for p in projects:
|
||||||
|
summaries.append(ProjectSummary(
|
||||||
|
id = p.id,
|
||||||
|
name = p.name,
|
||||||
|
project_number = p.project_number,
|
||||||
|
client_name = p.client_name,
|
||||||
|
source = None, # filled below per assignment
|
||||||
|
status = p.status or "active",
|
||||||
|
location_count = loc_counts.get(p.id, 0),
|
||||||
|
assignment_count = asgn_counts.get(p.id, 0),
|
||||||
|
event_count_total = 0, # not cheap to compute here; left 0
|
||||||
|
))
|
||||||
|
|
||||||
|
# Determine each project's dominant assignment source. Used to break ties
|
||||||
|
# when picking the "keep" target — prefer manual over parser-created.
|
||||||
|
rows = (
|
||||||
|
db.query(UnitAssignment.project_id, UnitAssignment.source, func.count(UnitAssignment.id))
|
||||||
|
.group_by(UnitAssignment.project_id, UnitAssignment.source)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
by_proj_src: dict[str, dict[str, int]] = {}
|
||||||
|
for proj_id, src, cnt in rows:
|
||||||
|
by_proj_src.setdefault(proj_id, {})[src or "manual"] = cnt
|
||||||
|
for s in summaries:
|
||||||
|
src_map = by_proj_src.get(s.id, {})
|
||||||
|
if not src_map:
|
||||||
|
s.source = "manual"
|
||||||
|
else:
|
||||||
|
# Dominant source (most assignments).
|
||||||
|
s.source = max(src_map.items(), key=lambda kv: kv[1])[0]
|
||||||
|
|
||||||
|
return summaries
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_target(a: ProjectSummary, b: ProjectSummary) -> tuple[str, str]:
|
||||||
|
"""Decide which project should be the merge target (the one we keep).
|
||||||
|
|
||||||
|
Priorities (in order):
|
||||||
|
1. The one with `source='manual'` over `source='metadata_backfill'`
|
||||||
|
— operator-curated projects beat parser-created ones.
|
||||||
|
2. The one with a populated `project_number`.
|
||||||
|
3. The one with more locations (more curation history).
|
||||||
|
4. The one with more assignments.
|
||||||
|
5. The one with the shorter, cleaner name (tiebreaker).
|
||||||
|
|
||||||
|
Returns (target_id, reason_string).
|
||||||
|
"""
|
||||||
|
# 1. Source provenance.
|
||||||
|
a_manual = a.source == "manual"
|
||||||
|
b_manual = b.source == "manual"
|
||||||
|
if a_manual and not b_manual:
|
||||||
|
return a.id, "A is manually-created; B is parser-created"
|
||||||
|
if b_manual and not a_manual:
|
||||||
|
return b.id, "B is manually-created; A is parser-created"
|
||||||
|
|
||||||
|
# 2. project_number populated.
|
||||||
|
if a.project_number and not b.project_number:
|
||||||
|
return a.id, "A has a project_number; B doesn't"
|
||||||
|
if b.project_number and not a.project_number:
|
||||||
|
return b.id, "B has a project_number; A doesn't"
|
||||||
|
|
||||||
|
# 3. More locations.
|
||||||
|
if a.location_count > b.location_count:
|
||||||
|
return a.id, f"A has more locations ({a.location_count} vs {b.location_count})"
|
||||||
|
if b.location_count > a.location_count:
|
||||||
|
return b.id, f"B has more locations ({b.location_count} vs {a.location_count})"
|
||||||
|
|
||||||
|
# 4. More assignments.
|
||||||
|
if a.assignment_count > b.assignment_count:
|
||||||
|
return a.id, f"A has more assignments ({a.assignment_count} vs {b.assignment_count})"
|
||||||
|
if b.assignment_count > a.assignment_count:
|
||||||
|
return b.id, f"B has more assignments ({b.assignment_count} vs {a.assignment_count})"
|
||||||
|
|
||||||
|
# 5. Shorter name (less likely to have baked-in junk).
|
||||||
|
if len(a.name) <= len(b.name):
|
||||||
|
return a.id, "A has the shorter / cleaner name"
|
||||||
|
return b.id, "B has the shorter / cleaner name"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Public ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicate_pairs(
|
||||||
|
db: Session,
|
||||||
|
*,
|
||||||
|
threshold: float = DEFAULT_THRESHOLD,
|
||||||
|
max_pairs: int = DEFAULT_MAX_PAIRS,
|
||||||
|
) -> list[DuplicatePair]:
|
||||||
|
"""Compute all project-pair similarities above `threshold`.
|
||||||
|
|
||||||
|
O(N^2) over the project count — fine up to ~500 projects; beyond that
|
||||||
|
we'd want a blocked / token-indexed approach. In practice
|
||||||
|
`metadata_backfill` projects tend to share tokens, so a simple
|
||||||
|
pre-filter (skip pairs that share NO tokens) would cheaply cut the
|
||||||
|
inner loop. Deferred until profiling motivates it.
|
||||||
|
"""
|
||||||
|
summaries = _summarise_projects(db)
|
||||||
|
|
||||||
|
# Pre-compute normalised names; skip too-short ones.
|
||||||
|
norm_by_id: dict[str, str] = {}
|
||||||
|
candidates: list[ProjectSummary] = []
|
||||||
|
for s in summaries:
|
||||||
|
n = _normalise_project_name(s.name)
|
||||||
|
if len(n) < MIN_NORMALISED_LENGTH:
|
||||||
|
continue
|
||||||
|
norm_by_id[s.id] = n
|
||||||
|
candidates.append(s)
|
||||||
|
|
||||||
|
pairs: list[DuplicatePair] = []
|
||||||
|
n = len(candidates)
|
||||||
|
for i in range(n):
|
||||||
|
a = candidates[i]
|
||||||
|
a_norm = norm_by_id[a.id]
|
||||||
|
for j in range(i + 1, n):
|
||||||
|
b = candidates[j]
|
||||||
|
b_norm = norm_by_id[b.id]
|
||||||
|
score = rapidfuzz.fuzz.WRatio(a_norm, b_norm) / 100.0
|
||||||
|
if score < threshold:
|
||||||
|
continue
|
||||||
|
target_id, reason = _pick_target(a, b)
|
||||||
|
pairs.append(DuplicatePair(
|
||||||
|
a = a,
|
||||||
|
b = b,
|
||||||
|
score = score,
|
||||||
|
suggested_target_id = target_id,
|
||||||
|
reason = reason,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Sort by score desc, then by total content (more data → review first).
|
||||||
|
pairs.sort(key=lambda p: (-p.score, -(p.a.assignment_count + p.b.assignment_count)))
|
||||||
|
|
||||||
|
return pairs[:max_pairs]
|
||||||
@@ -595,9 +595,24 @@ async function _apply(clusterIds) {
|
|||||||
});
|
});
|
||||||
if (!r.ok) throw new Error('HTTP ' + r.status);
|
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||||
const d = await r.json();
|
const d = await r.json();
|
||||||
const sub = `${d.applied} applied · ${d.project_ids_created.length} new project(s) · ${d.location_ids_created.length} new location(s)` + (d.failed.length ? ` · ${d.failed.length} failed` : '');
|
const failedCount = (d.failed || []).length;
|
||||||
_showToast(`${d.applied} cluster${d.applied === 1 ? '' : 's'} applied`, sub, d.failed.length ? 'error' : 'success');
|
|
||||||
|
// Three states:
|
||||||
|
// total success — applied N, no failures → green toast, 4s
|
||||||
|
// partial — applied N, M failures → red toast + modal listing reasons
|
||||||
|
// total failure — applied 0, failures → red toast + modal
|
||||||
|
if (failedCount === 0) {
|
||||||
|
const sub = `${d.applied} applied · ${d.project_ids_created.length} new project(s) · ${d.location_ids_created.length} new location(s)`;
|
||||||
|
_showToast(`${d.applied} cluster${d.applied === 1 ? '' : 's'} applied`, sub, 'success');
|
||||||
_hideToast(4000);
|
_hideToast(4000);
|
||||||
|
} else {
|
||||||
|
const title = d.applied > 0
|
||||||
|
? `${d.applied} applied, ${failedCount} failed`
|
||||||
|
: `Apply failed — ${failedCount} cluster${failedCount === 1 ? '' : 's'} could not be applied`;
|
||||||
|
_showToast(title, 'See the details panel.', 'error');
|
||||||
|
_hideToast(6000);
|
||||||
|
_showFailureDetails(d.failed);
|
||||||
|
}
|
||||||
await runScan(true); // refresh
|
await runScan(true); // refresh
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
_showToast('Apply failed', e.message, 'error');
|
_showToast('Apply failed', e.message, 'error');
|
||||||
@@ -605,6 +620,46 @@ async function _apply(clusterIds) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Modal-ish panel that lists each failed cluster with its server-side
|
||||||
|
// reason. Common failure modes seen in dev: missing DB tables after a
|
||||||
|
// stale schema, blocking conflicts that slipped past the front-end guard,
|
||||||
|
// rapidfuzz/SQLAlchemy edge cases. Operator can dismiss and either
|
||||||
|
// retry the cluster, skip it, or fix the underlying issue.
|
||||||
|
function _showFailureDetails(failed) {
|
||||||
|
let panel = document.getElementById('apply-failure-panel');
|
||||||
|
if (!panel) {
|
||||||
|
panel = document.createElement('div');
|
||||||
|
panel.id = 'apply-failure-panel';
|
||||||
|
panel.className = 'fixed bottom-6 left-6 right-6 sm:right-auto sm:max-w-xl bg-white dark:bg-slate-800 rounded-xl shadow-2xl border border-red-200 dark:border-red-800 p-4 z-40';
|
||||||
|
document.body.appendChild(panel);
|
||||||
|
}
|
||||||
|
const rows = failed.map(f => `
|
||||||
|
<li class="flex items-start gap-2 text-sm border-l-2 border-red-300 dark:border-red-700 pl-3 py-1">
|
||||||
|
<code class="font-mono text-xs text-gray-500 dark:text-gray-400">${(f.cluster_id || '').slice(0, 8)}…</code>
|
||||||
|
<span class="text-gray-800 dark:text-gray-200 flex-1">${_esc(f.reason || '(no reason)')}</span>
|
||||||
|
</li>
|
||||||
|
`).join('');
|
||||||
|
panel.innerHTML = `
|
||||||
|
<div class="flex items-start justify-between gap-3 mb-2">
|
||||||
|
<h4 class="font-semibold text-gray-900 dark:text-white">
|
||||||
|
<svg class="w-5 h-5 inline text-red-500 -mt-0.5 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/></svg>
|
||||||
|
${failed.length} cluster${failed.length === 1 ? '' : 's'} failed to apply
|
||||||
|
</h4>
|
||||||
|
<button onclick="document.getElementById('apply-failure-panel').remove()"
|
||||||
|
class="text-gray-400 hover:text-gray-600 dark:hover:text-gray-200">
|
||||||
|
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"/></svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<ul class="space-y-1 max-h-64 overflow-y-auto">${rows}</ul>
|
||||||
|
<p class="mt-3 text-xs text-gray-500 dark:text-gray-400">
|
||||||
|
Common causes: missing DB schema (run the migration sweep), blocking conflict
|
||||||
|
with an existing UnitAssignment, or a UNIQUE constraint collision on the
|
||||||
|
project name. Re-scan and the failed clusters reappear as pending — fix the
|
||||||
|
underlying issue and retry.
|
||||||
|
</p>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
|
||||||
async function applyOne(clusterId) { return _apply([clusterId]); }
|
async function applyOne(clusterId) { return _apply([clusterId]); }
|
||||||
|
|
||||||
async function applyBulkHighConfidence() {
|
async function applyBulkHighConfidence() {
|
||||||
|
|||||||
@@ -0,0 +1,267 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Project Tidy - Seismo Fleet Manager{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<!-- Breadcrumb -->
|
||||||
|
<div class="mb-6">
|
||||||
|
<nav class="flex items-center space-x-2 text-sm">
|
||||||
|
<a href="/settings" class="text-seismo-orange hover:text-seismo-navy flex items-center">
|
||||||
|
<svg class="w-4 h-4 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7"></path>
|
||||||
|
</svg>
|
||||||
|
Settings
|
||||||
|
</a>
|
||||||
|
<svg class="w-4 h-4 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7"></path>
|
||||||
|
</svg>
|
||||||
|
<span class="text-gray-900 dark:text-white font-medium">Project Tidy</span>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Header -->
|
||||||
|
<div class="mb-6">
|
||||||
|
<h1 class="text-3xl font-bold text-gray-900 dark:text-white">Project Tidy</h1>
|
||||||
|
<p class="text-gray-600 dark:text-gray-400 mt-1">
|
||||||
|
Find duplicate-looking projects via fuzzy name matching, then merge them with one click.
|
||||||
|
Useful after the metadata-backfill parser creates near-duplicates from operator name variations.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Controls -->
|
||||||
|
<div class="bg-white dark:bg-slate-800 rounded-xl shadow-lg p-4 mb-5">
|
||||||
|
<div class="flex flex-wrap items-end gap-3">
|
||||||
|
<div class="flex flex-col gap-1">
|
||||||
|
<label class="text-xs text-gray-500 dark:text-gray-400">Similarity threshold</label>
|
||||||
|
<select id="threshold" onchange="runScan()"
|
||||||
|
class="px-3 py-1.5 text-sm border border-gray-300 dark:border-gray-600 rounded-lg bg-white dark:bg-slate-700 text-gray-900 dark:text-white">
|
||||||
|
<option value="0.95">≥ 95% — near-identical only (typos)</option>
|
||||||
|
<option value="0.90">≥ 90% — close variants</option>
|
||||||
|
<option value="0.85" selected>≥ 85% — fuzzy match floor (recommended)</option>
|
||||||
|
<option value="0.80">≥ 80% — aggressive (more false positives)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<button onclick="runScan()"
|
||||||
|
class="ml-auto px-4 py-1.5 text-sm bg-seismo-orange text-white rounded-lg hover:bg-seismo-navy transition-colors">
|
||||||
|
↻ Scan for duplicates
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Results -->
|
||||||
|
<div id="results" class="space-y-3">
|
||||||
|
<div class="text-center py-12 text-gray-500 dark:text-gray-400">
|
||||||
|
Click "Scan for duplicates" to find pairs.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Apply progress toast -->
|
||||||
|
<div id="tidy-toast" class="hidden fixed bottom-6 right-6 bg-white dark:bg-slate-800 rounded-xl shadow-2xl border border-gray-200 dark:border-gray-700 p-4 z-50 max-w-md">
|
||||||
|
<div class="flex items-center gap-3">
|
||||||
|
<div id="toast-icon" class="shrink-0">
|
||||||
|
<div class="animate-spin rounded-full h-6 w-6 border-b-2 border-seismo-orange"></div>
|
||||||
|
</div>
|
||||||
|
<div class="flex-1">
|
||||||
|
<p id="toast-message" class="text-sm font-medium text-gray-900 dark:text-white">Working…</p>
|
||||||
|
<p id="toast-sub" class="text-xs text-gray-500 dark:text-gray-400 mt-0.5"></p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
let _pairs = [];
|
||||||
|
|
||||||
|
function _esc(s) {
|
||||||
|
if (s == null) return '';
|
||||||
|
return String(s).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
|
||||||
|
}
|
||||||
|
|
||||||
|
function _sourceBadge(source) {
|
||||||
|
if (source === 'metadata_backfill' || source === 'metadata_backfill_swap') {
|
||||||
|
return '<span class="px-1.5 py-0.5 rounded text-xs bg-amber-100 text-amber-800 dark:bg-amber-900/30 dark:text-amber-300" title="Auto-created by the metadata-backfill parser">parser</span>';
|
||||||
|
}
|
||||||
|
return '<span class="px-1.5 py-0.5 rounded text-xs bg-blue-100 text-blue-800 dark:bg-blue-900/30 dark:text-blue-300" title="Manually created via the UI">manual</span>';
|
||||||
|
}
|
||||||
|
|
||||||
|
function _showToast(message, sub, kind) {
|
||||||
|
const toast = document.getElementById('tidy-toast');
|
||||||
|
const icon = document.getElementById('toast-icon');
|
||||||
|
document.getElementById('toast-message').textContent = message;
|
||||||
|
document.getElementById('toast-sub').textContent = sub || '';
|
||||||
|
if (kind === 'success') {
|
||||||
|
icon.innerHTML = '<svg class="w-6 h-6 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"></path></svg>';
|
||||||
|
} else if (kind === 'error') {
|
||||||
|
icon.innerHTML = '<svg class="w-6 h-6 text-red-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>';
|
||||||
|
} else {
|
||||||
|
icon.innerHTML = '<div class="animate-spin rounded-full h-6 w-6 border-b-2 border-seismo-orange"></div>';
|
||||||
|
}
|
||||||
|
toast.classList.remove('hidden');
|
||||||
|
}
|
||||||
|
|
||||||
|
function _hideToast(after) {
|
||||||
|
setTimeout(() => document.getElementById('tidy-toast').classList.add('hidden'), after || 3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runScan() {
|
||||||
|
const results = document.getElementById('results');
|
||||||
|
results.innerHTML = '<div class="text-center py-12 text-gray-500 dark:text-gray-400"><div class="animate-spin rounded-full h-8 w-8 border-b-2 border-seismo-orange mx-auto mb-3"></div>Scanning…</div>';
|
||||||
|
const threshold = document.getElementById('threshold').value;
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/projects/admin/duplicate_pairs?threshold=${threshold}`);
|
||||||
|
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||||
|
const d = await r.json();
|
||||||
|
_pairs = d.pairs || [];
|
||||||
|
render();
|
||||||
|
} catch (e) {
|
||||||
|
results.innerHTML = `<div class="bg-white dark:bg-slate-800 rounded-xl shadow-lg p-6 text-center text-red-500">Scan failed: ${_esc(e.message)}</div>`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function render() {
|
||||||
|
const results = document.getElementById('results');
|
||||||
|
if (_pairs.length === 0) {
|
||||||
|
results.innerHTML = `<div class="bg-white dark:bg-slate-800 rounded-xl shadow-lg p-8 text-center">
|
||||||
|
<svg class="w-16 h-16 mx-auto mb-4 text-green-500" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
||||||
|
</svg>
|
||||||
|
<h3 class="text-lg font-semibold text-gray-900 dark:text-white mb-1">✨ No duplicates above the threshold</h3>
|
||||||
|
<p class="text-gray-500 dark:text-gray-400">Lower the threshold or call it good.</p>
|
||||||
|
</div>`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const summary = `<div class="bg-white dark:bg-slate-800 rounded-xl shadow-lg p-4 mb-3">
|
||||||
|
<div class="text-sm text-gray-700 dark:text-gray-300">
|
||||||
|
Found <strong>${_pairs.length}</strong> duplicate pair${_pairs.length === 1 ? '' : 's'}.
|
||||||
|
Review the suggested merge direction (arrow points at the target project to keep),
|
||||||
|
adjust if needed, then click <strong>Merge</strong>.
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
results.innerHTML = summary + _pairs.map(_renderPair).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
function _renderPair(pair, i) {
|
||||||
|
const sourceTarget = pair.suggested_target_id === pair.a.id ? 'a' : 'b';
|
||||||
|
return `<div class="bg-white dark:bg-slate-800 rounded-xl shadow-lg p-4" data-idx="${i}">
|
||||||
|
<div class="flex items-center justify-between mb-3">
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<span class="px-2 py-0.5 rounded text-xs font-medium bg-amber-100 text-amber-800 dark:bg-amber-900/30 dark:text-amber-300">${(pair.score * 100).toFixed(0)}% match</span>
|
||||||
|
<span class="text-xs text-gray-500 dark:text-gray-400">${_esc(pair.reason)}</span>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center gap-2">
|
||||||
|
<button onclick="confirmMerge(${i}, 'a_into_b')"
|
||||||
|
class="px-3 py-1.5 text-xs rounded ${sourceTarget === 'b' ? 'bg-seismo-orange hover:bg-seismo-navy text-white font-medium' : 'border border-gray-300 dark:border-gray-600 text-gray-700 dark:text-gray-300 hover:bg-gray-50 dark:hover:bg-gray-700'}">
|
||||||
|
Merge A → B
|
||||||
|
</button>
|
||||||
|
<button onclick="confirmMerge(${i}, 'b_into_a')"
|
||||||
|
class="px-3 py-1.5 text-xs rounded ${sourceTarget === 'a' ? 'bg-seismo-orange hover:bg-seismo-navy text-white font-medium' : 'border border-gray-300 dark:border-gray-600 text-gray-700 dark:text-gray-300 hover:bg-gray-50 dark:hover:bg-gray-700'}">
|
||||||
|
Merge B → A
|
||||||
|
</button>
|
||||||
|
<button onclick="dismissPair(${i})"
|
||||||
|
title="Hide this pair (not actually a duplicate)"
|
||||||
|
class="px-3 py-1.5 text-xs border border-gray-300 dark:border-gray-600 text-gray-500 dark:text-gray-400 rounded hover:bg-gray-50 dark:hover:bg-gray-700">
|
||||||
|
Not a dup
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="grid grid-cols-2 gap-3">
|
||||||
|
${_renderProject(pair.a, 'A', sourceTarget === 'a')}
|
||||||
|
${_renderProject(pair.b, 'B', sourceTarget === 'b')}
|
||||||
|
</div>
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function _renderProject(p, label, isTarget) {
|
||||||
|
const borderCls = isTarget ? 'border-seismo-orange ring-1 ring-seismo-orange/30' : 'border-gray-200 dark:border-gray-700';
|
||||||
|
return `<a href="/projects/${_esc(p.id)}" target="_blank"
|
||||||
|
class="block bg-gray-50 dark:bg-slate-900/50 rounded-lg p-3 border ${borderCls} hover:shadow-md transition-shadow">
|
||||||
|
<div class="flex items-start justify-between gap-2 mb-1">
|
||||||
|
<div class="text-xs text-gray-500 dark:text-gray-400">Project ${label}${isTarget ? ' · suggested target' : ''}</div>
|
||||||
|
${_sourceBadge(p.source)}
|
||||||
|
</div>
|
||||||
|
<div class="font-semibold text-gray-900 dark:text-white text-sm">${_esc(p.name)}</div>
|
||||||
|
${p.project_number ? `<div class="text-xs text-gray-500 dark:text-gray-400 mt-0.5">#${_esc(p.project_number)}</div>` : ''}
|
||||||
|
${p.client_name ? `<div class="text-xs text-gray-500 dark:text-gray-400 mt-0.5">${_esc(p.client_name)}</div>` : ''}
|
||||||
|
<div class="flex items-center gap-3 text-xs text-gray-600 dark:text-gray-400 mt-2">
|
||||||
|
<span><strong>${p.location_count}</strong> location${p.location_count === 1 ? '' : 's'}</span>
|
||||||
|
<span><strong>${p.assignment_count}</strong> assignment${p.assignment_count === 1 ? '' : 's'}</span>
|
||||||
|
</div>
|
||||||
|
</a>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function confirmMerge(idx, direction) {
|
||||||
|
const pair = _pairs[idx];
|
||||||
|
if (!pair) return;
|
||||||
|
let sourceId, targetId, sourceName, targetName;
|
||||||
|
if (direction === 'a_into_b') {
|
||||||
|
sourceId = pair.a.id; targetId = pair.b.id;
|
||||||
|
sourceName = pair.a.name; targetName = pair.b.name;
|
||||||
|
} else {
|
||||||
|
sourceId = pair.b.id; targetId = pair.a.id;
|
||||||
|
sourceName = pair.b.name; targetName = pair.a.name;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pull preview to surface conflicts / consolidation count BEFORE merging.
|
||||||
|
let preview;
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/projects/${sourceId}/merge_preview?target_id=${targetId}`);
|
||||||
|
if (!r.ok) {
|
||||||
|
const err = await r.json().catch(() => ({detail: 'HTTP ' + r.status}));
|
||||||
|
throw new Error(err.detail || ('HTTP ' + r.status));
|
||||||
|
}
|
||||||
|
preview = await r.json();
|
||||||
|
} catch (e) {
|
||||||
|
alert('Preview failed: ' + e.message);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const summary = [
|
||||||
|
`${preview.total_assignments_moving} assignment(s)`,
|
||||||
|
`${preview.total_sessions_moving} session(s)`,
|
||||||
|
`${preview.total_data_files_moving} data file(s)`,
|
||||||
|
].join(', ');
|
||||||
|
let consolidation = '';
|
||||||
|
const consolidates = preview.location_plans.filter(p => p.action === 'consolidate').length;
|
||||||
|
if (consolidates > 0) {
|
||||||
|
consolidation = `\n\n${consolidates} same-named location(s) will be consolidated.`;
|
||||||
|
}
|
||||||
|
const ok = confirm(
|
||||||
|
`Merge "${sourceName}" into "${targetName}"?\n\n` +
|
||||||
|
`Will move: ${summary}.${consolidation}\n\n` +
|
||||||
|
`Source will be soft-deleted. This is reversible only via direct DB edit.`
|
||||||
|
);
|
||||||
|
if (!ok) return;
|
||||||
|
|
||||||
|
_showToast(`Merging "${sourceName}" → "${targetName}"…`);
|
||||||
|
try {
|
||||||
|
const r = await fetch(`/api/projects/${sourceId}/merge_into?target_id=${targetId}`, { method: 'POST' });
|
||||||
|
if (!r.ok) {
|
||||||
|
const err = await r.json().catch(() => ({detail: 'HTTP ' + r.status}));
|
||||||
|
throw new Error(err.detail || ('HTTP ' + r.status));
|
||||||
|
}
|
||||||
|
const d = await r.json();
|
||||||
|
_showToast(`Merged into "${targetName}"`,
|
||||||
|
`${d.assignments_moved} assignment(s), ${d.locations_moved + d.locations_consolidated} location(s)`,
|
||||||
|
'success');
|
||||||
|
_hideToast(3500);
|
||||||
|
// Re-scan: list updates without the merged pair.
|
||||||
|
await runScan();
|
||||||
|
} catch (e) {
|
||||||
|
_showToast('Merge failed', e.message, 'error');
|
||||||
|
_hideToast(5000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function dismissPair(idx) {
|
||||||
|
// Just hide locally for now; doesn't persist across re-scans.
|
||||||
|
// A persistent "ignore pair" feature would need a new table; defer.
|
||||||
|
_pairs.splice(idx, 1);
|
||||||
|
render();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-scan on load with default threshold.
|
||||||
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
|
runScan();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
@@ -574,6 +574,20 @@
|
|||||||
Open
|
Open
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Project Tidy (Phase 5b) -->
|
||||||
|
<div class="flex items-center justify-between p-4 bg-gray-50 dark:bg-slate-700 rounded-lg">
|
||||||
|
<div>
|
||||||
|
<div class="font-medium text-gray-900 dark:text-white">Project Tidy</div>
|
||||||
|
<div class="text-sm text-gray-500 dark:text-gray-400 mt-0.5">
|
||||||
|
Find duplicate-looking projects via fuzzy name match (typos, abbreviations, spacing variations) and bulk-merge them.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<a href="/settings/developer/project-tidy"
|
||||||
|
class="ml-6 px-4 py-2 bg-seismo-orange hover:bg-orange-600 text-white text-sm font-medium rounded-lg transition-colors whitespace-nowrap">
|
||||||
|
Open
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user