diff --git a/backend/routers/metadata_backfill.py b/backend/routers/metadata_backfill.py index 3e77a6d..3e002a1 100644 --- a/backend/routers/metadata_backfill.py +++ b/backend/routers/metadata_backfill.py @@ -22,6 +22,7 @@ from fastapi.responses import JSONResponse from sqlalchemy.orm import Session from backend.database import get_db +from backend.models import Project, MonitoringLocation from backend.services import metadata_backfill as svc router = APIRouter(prefix="/api/admin/metadata_backfill", tags=["metadata-backfill"]) @@ -165,27 +166,62 @@ async def apply( if s is None: not_found.append(cid) continue - # Apply overrides. + # Apply overrides. Per-cluster overrides take precedence over the + # parser's suggested match. Four override fields supported: + # project_id — attach to an existing Project (operator picked + # from the typeahead) + # project_name — create new project with this name (operator + # typed a custom name not matching anything) + # location_id — attach to an existing MonitoringLocation + # location_name — create new location with this name + # project_id + location_id pairings: location_id is only honored + # if its project_id matches the chosen project (otherwise treated + # as a create-new). ov = overrides.get(cid) or {} - if "project_name" in ov: - s.project_suggested_name = (ov["project_name"] or "").strip() or s.project_suggested_name - # Override implies operator wants to create new (or rename). - # If they wanted an exact match, they'd not have overridden. - if s.project_match in ("create_new",): - pass # keep create_new + + if ov.get("project_id"): + target_id = ov["project_id"] + existing = db.query(svc.Project).filter_by(id=target_id).first() + if existing is not None: + s.project_existing_id = existing.id + s.project_existing_name = existing.name + s.project_suggested_name = existing.name + s.project_match = "exact" else: - # Operator typed a custom name — force create-new behaviour - # so we don't accidentally attach to a different existing - # project by exact-match. + # Stale ID — treat as create_new with the cluster's typed name. s.project_existing_id = None s.project_match = "create_new" - if "location_name" in ov: - s.location_suggested_name = (ov["location_name"] or "").strip() or s.location_suggested_name - if s.location_match in ("create_new",): - pass + elif "project_name" in ov: + new_name = (ov["project_name"] or "").strip() + if new_name: + s.project_suggested_name = new_name + s.project_existing_id = None + s.project_existing_name = None + s.project_match = "create_new" + + if ov.get("location_id"): + target_id = ov["location_id"] + existing = db.query(svc.MonitoringLocation).filter_by(id=target_id).first() + # Only attach if the location belongs to the (now chosen) project. + chosen_project_id = s.project_existing_id + if existing is not None and ( + chosen_project_id is None or existing.project_id == chosen_project_id + ): + s.location_existing_id = existing.id + s.location_existing_name = existing.name + s.location_suggested_name = existing.name + s.location_match = "exact" else: s.location_existing_id = None s.location_match = "create_new" + elif "location_name" in ov: + new_name = (ov["location_name"] or "").strip() + if new_name: + s.location_suggested_name = new_name + s.location_existing_id = None + s.location_existing_name = None + s.location_match = "create_new" + selected.append(s) apply_result = svc.apply_suggestions(db, selected, decided_by="operator") @@ -225,3 +261,134 @@ async def skip( _SCAN_CACHE["result"] = None return {"skipped": n} + + +@router.get("/projects_search") +def projects_search( + q: str = "", + limit: int = 10, + db: Session = Depends(get_db), +): + """Typeahead search of existing projects for the wizard's per-cluster + override inputs. Combines case-insensitive substring match with + rapidfuzz scoring so partial typing and slight typos both surface + candidates. Always returns a 'Create new' option at the end so the + operator can confirm they want to create rather than match. + + Returns: + { + "matches": [ + {"id": "...", "name": "...", "score": 0.91, "location_count": 3}, + ... + ], + "create_new": {"label": "Create new: \"\""} + } + """ + q_clean = (q or "").strip() + q_norm = svc._normalise(q_clean) + + projects = ( + db.query(Project) + .filter(Project.status != "deleted") + .all() + ) + + scored: list[tuple[Project, float]] = [] + for p in projects: + p_norm = svc._normalise(p.name) + if not q_norm: + # Empty query → return top projects by latest activity + # (cheap heuristic: keep them all and sort by name). + scored.append((p, 0.0)) + continue + # Cheap substring boost: if the normalised query is a substring, + # treat that as 1.0 regardless of WRatio. + if q_norm in p_norm: + scored.append((p, 1.0)) + continue + score = svc.similarity(q_norm, p_norm) + if score >= 0.50: # surfacing threshold; not the match threshold + scored.append((p, score)) + + # Sort: score desc, then name asc. + scored.sort(key=lambda t: (-t[1], t[0].name.lower())) + scored = scored[:limit] + + # Compute location counts in one batch query. + loc_counts: dict[str, int] = {} + if scored: + from sqlalchemy import func + ids = [p.id for p, _ in scored] + rows = ( + db.query(MonitoringLocation.project_id, func.count(MonitoringLocation.id)) + .filter(MonitoringLocation.project_id.in_(ids)) + .group_by(MonitoringLocation.project_id) + .all() + ) + loc_counts = {pid: cnt for pid, cnt in rows} + + return { + "matches": [ + { + "id": p.id, + "name": p.name, + "project_number": p.project_number, + "client_name": p.client_name, + "score": round(score, 3), + "location_count": loc_counts.get(p.id, 0), + } + for p, score in scored + ], + "create_new": {"label": f'Create new: "{q_clean}"' if q_clean else None}, + } + + +@router.get("/locations_search") +def locations_search( + project_id: str, + q: str = "", + limit: int = 10, + db: Session = Depends(get_db), +): + """Typeahead search of existing locations within a project.""" + if not project_id: + raise HTTPException(status_code=400, detail="project_id required") + + q_clean = (q or "").strip() + q_norm = svc._normalise(q_clean) + + locations = ( + db.query(MonitoringLocation) + .filter(MonitoringLocation.project_id == project_id) + .filter(MonitoringLocation.location_type == "vibration") + .all() + ) + + scored: list[tuple[MonitoringLocation, float]] = [] + for l in locations: + l_norm = svc._normalise(l.name) + if not q_norm: + scored.append((l, 0.0)) + continue + if q_norm in l_norm: + scored.append((l, 1.0)) + continue + score = svc.similarity(q_norm, l_norm) + if score >= 0.50: + scored.append((l, score)) + + scored.sort(key=lambda t: (-t[1], t[0].name.lower())) + scored = scored[:limit] + + return { + "matches": [ + { + "id": l.id, + "name": l.name, + "address": l.address, + "score": round(score, 3), + } + for l, score in scored + ], + "create_new": {"label": f'Create new: "{q_clean}"' if q_clean else None}, + } diff --git a/templates/admin/metadata_backfill.html b/templates/admin/metadata_backfill.html index 3c42541..9b66412 100644 --- a/templates/admin/metadata_backfill.html +++ b/templates/admin/metadata_backfill.html @@ -177,6 +177,161 @@ function _matchPill(match, score, suggestedName, existingName) { return `+ Create new: ${_esc(suggestedName)}`; } +// Compact "hint" line under each typeahead input, showing what the parser +// thinks the current value will do (match existing vs create new). +function _matchHint(match, score, existingName, suggestedName) { + if (match === 'exact') { + return `✓ matches existing`; + } + if (match === 'fuzzy') { + return `≈ fuzzy match to "${_esc(existingName)}" (${(score*100).toFixed(0)}%)`; + } + if (match === 'ambiguous') { + return `? ambiguous — pick from dropdown`; + } + return `+ will create new`; +} + +// ── Typeahead ──────────────────────────────────────────────────────────── +// Per-cluster project + location inputs with debounced typeahead search. +// Selecting a result writes the existing entity's id into the hidden +// project_id / location_id input; clearing-and-typing falls back to +// "create new" semantics. + +let _typeaheadDebounce = null; + +function onTypeaheadInput(e, fieldKind) { + // fieldKind ∈ {'project', 'location'} + const inp = e.target; + const cid = inp.dataset.clusterId; + // Clear the "id" hidden input — operator is typing freely now. + const hidden = document.querySelector(`input[type="hidden"][data-cluster-id="${cid}"][data-field="${fieldKind}_id"]`); + if (hidden) hidden.value = ''; + // Debounce the search. + if (_typeaheadDebounce) clearTimeout(_typeaheadDebounce); + _typeaheadDebounce = setTimeout(() => _fetchTypeahead(inp, fieldKind), 150); +} + +function onTypeaheadFocus(e, fieldKind) { + _fetchTypeahead(e.target, fieldKind); +} + +function onTypeaheadBlur(e) { + // Delayed hide so a click on the dropdown can register first. + const dropdown = e.target.parentElement.querySelector('.typeahead-dropdown'); + if (dropdown) { + setTimeout(() => dropdown.classList.add('hidden'), 150); + } +} + +async function _fetchTypeahead(input, fieldKind) { + const dropdown = input.parentElement.querySelector('.typeahead-dropdown'); + if (!dropdown) return; + const q = input.value.trim(); + const cid = input.dataset.clusterId; + + let url; + if (fieldKind === 'project') { + url = `/api/admin/metadata_backfill/projects_search?q=${encodeURIComponent(q)}`; + } else { + // For locations, scope to the currently-chosen project (if any). + const projectIdInput = document.querySelector(`input[type="hidden"][data-cluster-id="${cid}"][data-field="project_id"]`); + const projectId = projectIdInput ? projectIdInput.value : ''; + if (!projectId) { + // Operator hasn't picked an existing project — there are no + // existing locations to match against (location is implicitly + // "create new" inside a new project). + dropdown.innerHTML = `
+ ${q ? `+ Will create new: "${_esc(q)}"` : 'Pick a project first, or type a new location name.'} +
`; + dropdown.classList.remove('hidden'); + return; + } + url = `/api/admin/metadata_backfill/locations_search?project_id=${encodeURIComponent(projectId)}&q=${encodeURIComponent(q)}`; + } + + let data; + try { + const r = await fetch(url); + if (!r.ok) throw new Error('HTTP ' + r.status); + data = await r.json(); + } catch (err) { + dropdown.innerHTML = `
Search failed: ${_esc(err.message)}
`; + dropdown.classList.remove('hidden'); + return; + } + + const items = []; + for (const m of (data.matches || [])) { + items.push({ kind: 'match', payload: m }); + } + if (data.create_new && data.create_new.label) { + items.push({ kind: 'create_new', label: data.create_new.label, name: q }); + } + + if (items.length === 0) { + dropdown.innerHTML = `
No matches. Type to create.
`; + dropdown.classList.remove('hidden'); + return; + } + + dropdown.innerHTML = items.map((it, idx) => { + if (it.kind === 'match') { + const m = it.payload; + const scoreBadge = m.score >= 0.99 + ? 'exact' + : `${(m.score*100).toFixed(0)}%`; + const meta = []; + if (fieldKind === 'project') { + if (m.project_number) meta.push(_esc(m.project_number)); + if (m.client_name) meta.push(_esc(m.client_name)); + if (m.location_count > 0) meta.push(`${m.location_count} location${m.location_count === 1 ? '' : 's'}`); + } else { + if (m.address) meta.push(_esc(m.address)); + } + const metaLine = meta.length ? `
${meta.join(' · ')}
` : ''; + return ``; + } + return ``; + }).join(''); + dropdown.classList.remove('hidden'); +} + +function onTypeaheadPick(event, clusterId, fieldKind, entityId, name) { + // entityId is empty string for "create new", or a UUID for matched existing. + const inputs = document.querySelectorAll(`input[data-cluster-id="${clusterId}"]`); + let textInput = null; + let idInput = null; + inputs.forEach(i => { + if (i.dataset.field === fieldKind) textInput = i; + if (i.dataset.field === fieldKind + '_id') idInput = i; + }); + if (textInput) textInput.value = name; + if (idInput) idInput.value = entityId; + // Hide this dropdown. + const dropdown = textInput.parentElement.querySelector('.typeahead-dropdown'); + if (dropdown) dropdown.classList.add('hidden'); + + // If operator just picked a NEW project, clear the location id (forces + // operator to pick a location under the new project rather than leaving + // a stale id from another project). + if (fieldKind === 'project') { + const locId = document.querySelector(`input[type="hidden"][data-cluster-id="${clusterId}"][data-field="location_id"]`); + if (locId) locId.value = ''; + } +} + async function runScan(force) { const initial = document.getElementById('summary-initial'); const results = document.getElementById('summary-results'); @@ -286,12 +441,52 @@ function _renderCluster(s) { (${s.event_count} event${s.event_count === 1 ? '' : 's'}, ${spanDays.toFixed(0)}d span) ${consistencyNote} -
-
Project: ${_matchPill(s.project_match, s.project_match_score, s.project_suggested_name, s.project_existing_name)}
- ${(s.project_root && s.project_raw && s.project_root !== s.project_raw) - ? `
↳ stripped trailing "Loc N" suffix; operator typed: "${_esc(s.project_raw)}"
` - : ''} -
Location: ${_matchPill(s.location_match, s.location_match_score, s.location_suggested_name, s.location_existing_name)}
+
+ +
+ Project: +
+ + + +
${_matchHint(s.project_match, s.project_match_score, s.project_existing_name, s.project_suggested_name)}
+ ${(s.project_root && s.project_raw && s.project_root !== s.project_raw) + ? `
↳ stripped trailing "Loc N" suffix; operator typed: "${_esc(s.project_raw)}"
` + : ''} +
+
+ + +
+ Location: +
+ + + +
${_matchHint(s.location_match, s.location_match_score, s.location_existing_name, s.location_suggested_name)}
+
+
+
Assignment: ${_fmtDateTime(s.proposed_assigned_at)} → ${s.proposed_assigned_until ? _fmtDateTime(s.proposed_assigned_until) : 'present (active)'}
${s.client_raw ? `
Client: ${_esc(s.client_raw)}
` : ''}
@@ -319,15 +514,49 @@ function _renderCluster(s) { } function _gatherOverrides(clusterIds) { + // Per-cluster overrides sent to /apply. The backend understands four + // keys per cluster: project_id, project_name, location_id, location_name. + // We emit project_id+location_id when the operator picked from the + // typeahead dropdown; we emit project_name+location_name when they + // typed a free-form value (no id selected) that differs from the + // parser's original suggestion. const overrides = {}; for (const cid of clusterIds) { const inputs = document.querySelectorAll(`input[data-cluster-id="${cid}"]`); if (inputs.length === 0) continue; const o = {}; + let projectText = null, projectId = null; + let locationText = null, locationId = null; + // Old-style flat fields (kept for blank-meta cluster inputs which + // use data-field="project_name" / "location_name"). + let projectNameRaw = null, locationNameRaw = null; inputs.forEach(i => { - const v = i.value.trim(); - if (v) o[i.dataset.field] = v; + const v = (i.value || '').trim(); + const f = i.dataset.field; + if (f === 'project') projectText = v; + else if (f === 'project_id') projectId = v; + else if (f === 'location') locationText = v; + else if (f === 'location_id') locationId = v; + else if (f === 'project_name') projectNameRaw = v; + else if (f === 'location_name') locationNameRaw = v; }); + + if (projectId) { + o.project_id = projectId; + } else if (projectText) { + o.project_name = projectText; + } else if (projectNameRaw) { + o.project_name = projectNameRaw; + } + + if (locationId) { + o.location_id = locationId; + } else if (locationText) { + o.location_name = locationText; + } else if (locationNameRaw) { + o.location_name = locationNameRaw; + } + if (Object.keys(o).length > 0) overrides[cid] = o; } return overrides;