fix(sfm): broaden Loc-N suffix regex to catch '.Loc' and 'Loc No.' variants

Operators use more separator variations than the original regex caught: - "Trumbull-Brayman-JV- Mont.Dam.Loc 2-R-25" — period as separator - "CMU - RKM Hall - Loc No. 3 - 4615 Forbes" — "No." between Loc and digit Added period to the separator character class and optional "No." token before the digit. Catches both above patterns plus near-variants without false-positives on normal project strings. Real-data impact: 5 more clusters now auto-strip cleanly, including the 1,903-event Trumbull-Brayman-JV- Mont.Dam cluster. Confidence distribution: 43 → 44 high.
2026-05-12 19:19:46 +00:00
parent 6ebbe28308
commit d46f9fccf8
1 changed files with 14 additions and 10 deletions
@@ -103,16 +103,20 @@ def _normalise(s: Optional[str]) -> str:
 # their full project_raw and the operator can edit them in the wizard.
 _PROJECT_LOC_SUFFIX = re.compile(
    r"""
-    \s*               # any leading whitespace
-    [-–—]             # hyphen or em-dash (separator before the Loc marker)
-    \s*               # optional spaces
-    (?:loc|location)  # 'Loc' or 'Location'
-    \.?               # optional period
-    \s*               # optional space
-    \#?               # optional '#'
-    \s*               # optional space
-    \d+               # required digit
-    \b                # word boundary
+    \s*                       # any leading whitespace
+    [-–—.]                    # separator: hyphen, em-dash, or period
+                              # (operators use any of these — see
+                              #  "Mont.Dam.Loc 2-R-25")
+    \s*
+    (?:loc|location)          # 'Loc' or 'Location'
+    \.?                       # optional trailing period after Loc
+    \s*
+    (?:no\.?\s*)?             # optional "No." or "No " before the digit
+                              # (e.g. "Loc No. 3", "Loc No 5")
+    \#?                       # optional '#'
+    \s*
+    \d+                       # required digit
+    \b
    """,
    re.IGNORECASE | re.VERBOSE,
 )