feat(projects): Tidy page for fuzzy-detecting + bulk-merging duplicate projects
Phase 5b first slice. Surfaces near-duplicate projects (typo variants, abbreviation differences, spacing variations like "SR81" vs "SR 81") as side-by-side pairs the operator can merge with one click. Backend (backend/services/project_tidy.py): - find_duplicate_pairs(db, threshold=0.85) walks all active projects and computes rapidfuzz.WRatio similarity for every pair. Pre-filters too-short normalised names (< 4 chars) to avoid noise. Skips soft-deleted projects. Returns pairs sorted by score desc, then by total content (more assignments → review first). - Each pair carries a suggested merge target with a human-readable reason. Priorities (in order): manual source over parser source, populated project_number, more locations, more assignments, shorter name. Operator can override the suggestion by clicking the OTHER direction button. - O(N^2) over project count. Fine up to ~500 projects. Token-prefix blocking is the obvious next optimisation if it becomes slow. Backend (backend/routers/projects.py): - GET /api/projects/admin/duplicate_pairs?threshold=&max_pairs= returns pairs as JSON for the Tidy page. Frontend (templates/admin/project_tidy.html): - New admin page at /settings/developer/project-tidy. Threshold selector (95% / 90% / 85% / 80%) at the top; rescan button next to it; auto- scans on load. - Each pair card shows side-by-side project summaries (name, project_ number, client, source-badge, location/assignment counts) with the suggested target visually highlighted (orange border). Three buttons: "Merge A → B", "Merge B → A", "Not a dup" (hide locally). - Click-to-merge opens a native confirm with the preview totals (assignments/sessions/data files moving, consolidations) — same data the project_header.html merge modal shows. On confirm, hits the existing /merge_into endpoint and re-scans automatically. - Source badges distinguish parser-created (`metadata_backfill`) from manual projects — at a glance the operator can see "this duplicate is parser-generated; safe to merge into the manual one". Frontend (templates/admin/metadata_backfill.html): - Apply-result handling now surfaces failed[] cluster reasons in a dedicated failure panel (bottom-left, dismissable). Previously a 200 OK with all-failures showed a misleading "1 cluster applied" success toast because the count and the failure list weren't being reconciled. This bit us during the DB-revert recovery earlier — the project_modules table was missing, every apply silently rolled back, user saw success toasts. Fixed. Smoke-verified against current state (10K events, 9 projects, post- merge): tool correctly finds 0 pairs at threshold 0.85 (data is clean), 1 false-positive at 0.70 (two unrelated projects sharing the token "81" — example of why the 0.85 default is correct). Settings link added under Developer → Project Tidy. Phase 5c (swap-detection daily background job + notification inbox) remains deferred to the next session. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -595,9 +595,24 @@ async function _apply(clusterIds) {
|
||||
});
|
||||
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||
const d = await r.json();
|
||||
const sub = `${d.applied} applied · ${d.project_ids_created.length} new project(s) · ${d.location_ids_created.length} new location(s)` + (d.failed.length ? ` · ${d.failed.length} failed` : '');
|
||||
_showToast(`${d.applied} cluster${d.applied === 1 ? '' : 's'} applied`, sub, d.failed.length ? 'error' : 'success');
|
||||
_hideToast(4000);
|
||||
const failedCount = (d.failed || []).length;
|
||||
|
||||
// Three states:
|
||||
// total success — applied N, no failures → green toast, 4s
|
||||
// partial — applied N, M failures → red toast + modal listing reasons
|
||||
// total failure — applied 0, failures → red toast + modal
|
||||
if (failedCount === 0) {
|
||||
const sub = `${d.applied} applied · ${d.project_ids_created.length} new project(s) · ${d.location_ids_created.length} new location(s)`;
|
||||
_showToast(`${d.applied} cluster${d.applied === 1 ? '' : 's'} applied`, sub, 'success');
|
||||
_hideToast(4000);
|
||||
} else {
|
||||
const title = d.applied > 0
|
||||
? `${d.applied} applied, ${failedCount} failed`
|
||||
: `Apply failed — ${failedCount} cluster${failedCount === 1 ? '' : 's'} could not be applied`;
|
||||
_showToast(title, 'See the details panel.', 'error');
|
||||
_hideToast(6000);
|
||||
_showFailureDetails(d.failed);
|
||||
}
|
||||
await runScan(true); // refresh
|
||||
} catch (e) {
|
||||
_showToast('Apply failed', e.message, 'error');
|
||||
@@ -605,6 +620,46 @@ async function _apply(clusterIds) {
|
||||
}
|
||||
}
|
||||
|
||||
// Modal-ish panel that lists each failed cluster with its server-side
|
||||
// reason. Common failure modes seen in dev: missing DB tables after a
|
||||
// stale schema, blocking conflicts that slipped past the front-end guard,
|
||||
// rapidfuzz/SQLAlchemy edge cases. Operator can dismiss and either
|
||||
// retry the cluster, skip it, or fix the underlying issue.
|
||||
function _showFailureDetails(failed) {
|
||||
let panel = document.getElementById('apply-failure-panel');
|
||||
if (!panel) {
|
||||
panel = document.createElement('div');
|
||||
panel.id = 'apply-failure-panel';
|
||||
panel.className = 'fixed bottom-6 left-6 right-6 sm:right-auto sm:max-w-xl bg-white dark:bg-slate-800 rounded-xl shadow-2xl border border-red-200 dark:border-red-800 p-4 z-40';
|
||||
document.body.appendChild(panel);
|
||||
}
|
||||
const rows = failed.map(f => `
|
||||
<li class="flex items-start gap-2 text-sm border-l-2 border-red-300 dark:border-red-700 pl-3 py-1">
|
||||
<code class="font-mono text-xs text-gray-500 dark:text-gray-400">${(f.cluster_id || '').slice(0, 8)}…</code>
|
||||
<span class="text-gray-800 dark:text-gray-200 flex-1">${_esc(f.reason || '(no reason)')}</span>
|
||||
</li>
|
||||
`).join('');
|
||||
panel.innerHTML = `
|
||||
<div class="flex items-start justify-between gap-3 mb-2">
|
||||
<h4 class="font-semibold text-gray-900 dark:text-white">
|
||||
<svg class="w-5 h-5 inline text-red-500 -mt-0.5 mr-1" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/></svg>
|
||||
${failed.length} cluster${failed.length === 1 ? '' : 's'} failed to apply
|
||||
</h4>
|
||||
<button onclick="document.getElementById('apply-failure-panel').remove()"
|
||||
class="text-gray-400 hover:text-gray-600 dark:hover:text-gray-200">
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12"/></svg>
|
||||
</button>
|
||||
</div>
|
||||
<ul class="space-y-1 max-h-64 overflow-y-auto">${rows}</ul>
|
||||
<p class="mt-3 text-xs text-gray-500 dark:text-gray-400">
|
||||
Common causes: missing DB schema (run the migration sweep), blocking conflict
|
||||
with an existing UnitAssignment, or a UNIQUE constraint collision on the
|
||||
project name. Re-scan and the failed clusters reappear as pending — fix the
|
||||
underlying issue and retry.
|
||||
</p>
|
||||
`;
|
||||
}
|
||||
|
||||
async function applyOne(clusterId) { return _apply([clusterId]); }
|
||||
|
||||
async function applyBulkHighConfidence() {
|
||||
|
||||
Reference in New Issue
Block a user