feat: associative cognition — thoughts arise from spreading activation, not a re-read bio

Replaces the thought loop's grist (recent-convo + her own saved narrative, the feedback-loop attractor) with a model of how a thought actually arises: seed (salience-weighted: a recent moment / resurfaced memory / feed item) -> spreading activation: embed the seed, let it light up associatively-near material across ALL her stores (conversations, gists, her own journal/ thoughts), blended by relevance + recency + noise; optional 2nd hop for leaps -> her self-narrative stays the LENS (supplied as interiority), not the input -> the thought is generated from what lit up, routed through a faculty (notice / connect / abstract / project / feel) -> journaled + embedded, so it can light up in future cycles This breaks the feedback loop structurally: the narrative is no longer reread and paraphrased each cycle; grist is genuinely associative and varied; and her past thoughts re-activate (continuity without calcification). - lyra/cognition.py (new): spontaneous_seed, activate (spreading activation), constellation_block, faculties. - memory.py: journal entries now embedded; recall_journal(); backfill_journal_embeddings() (ran once: 341 past entries embedded so her history is associatively retrievable). - thoughts.think(): new-thread mode now uses the associative engine; dropped _grist(). - tests: test_cognition.py (recall_journal ranking, activation, seeding) + fixture reloads cognition. Suite 72 green, ruff clean. Honest scope: this fixes the mechanism (how thoughts arise). The residual "be useful for Brian" voice drift is the separate model/fine-tune problem. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 05:45:39 +00:00
parent 43697f8340
commit c2cee3be4d
7 changed files with 571 additions and 25 deletions
@@ -90,7 +90,8 @@ CREATE TABLE IF NOT EXISTS journal (
    created_at TEXT NOT NULL,
    kind TEXT NOT NULL,
    content TEXT NOT NULL,
-    source TEXT
+    source TEXT,
+    embedding BLOB
 );
 CREATE INDEX IF NOT EXISTS idx_journal_created ON journal(created_at);

@@ -138,7 +139,8 @@ def _connection() -> sqlite3.Connection:
        _conn.execute("PRAGMA synchronous=NORMAL")
        _conn.executescript(SCHEMA)
        # Migrations for DBs created before a column existed (no-op if present).
-        for ddl in ("ALTER TABLE sessions ADD COLUMN mode TEXT",):
+        for ddl in ("ALTER TABLE sessions ADD COLUMN mode TEXT",
+                    "ALTER TABLE journal ADD COLUMN embedding BLOB"):
            try:
                _conn.execute(ddl)
            except sqlite3.OperationalError:
@@ -573,17 +575,70 @@ def get_self_state(state_id: str = "lyra") -> dict | None:


 def add_journal_entry(kind: str, content: str, source: str | None = None) -> int:
-    """Append a permanent journal entry (never truncated). Returns row id."""
+    """Append a permanent journal entry (never truncated), embedded so it can be
+    recalled associatively later (her own thoughts can resurface). Returns row id."""
    now = datetime.now(timezone.utc).isoformat()
+    try:
+        [embedding] = llm.embed([content])
+        blob = _to_blob(embedding)
+    except Exception:  # never let an embed hiccup block her writing something down
+        blob = None
    conn = _connection()
    with conn:
        cur = conn.execute(
-            "INSERT INTO journal (created_at, kind, content, source) VALUES (?, ?, ?, ?)",
-            (now, kind, content, source),
+            "INSERT INTO journal (created_at, kind, content, source, embedding) VALUES (?, ?, ?, ?, ?)",
+            (now, kind, content, source, blob),
        )
    return int(cur.lastrowid)


+def recall_journal(query: str, k: int = 5, kinds: tuple[str, ...] | None = None) -> list[dict]:
+    """Top-k journal entries semantically similar to `query` (embedded rows only).
+    Her own reflections/thoughts/notes, surfaced by meaning — the associative recall
+    the thought loop uses. Each dict gets a `score`."""
+    [q_vec] = llm.embed([query])
+    q = np.asarray(q_vec, dtype=np.float32)
+    conn = _connection()
+    sql = "SELECT id, created_at, kind, content, source, embedding FROM journal WHERE embedding IS NOT NULL"
+    params: list = []
+    if kinds:
+        sql += " AND kind IN (%s)" % ",".join("?" * len(kinds))
+        params += list(kinds)
+    rows = conn.execute(sql, params).fetchall()
+    if not rows:
+        return []
+    matrix = np.stack([_from_blob(r["embedding"]) for r in rows])
+    norms = np.linalg.norm(matrix, axis=1)
+    scores = (matrix @ q) / (norms * np.linalg.norm(q) + 1e-9)
+    top_idx = np.argsort(scores)[::-1][:k]
+    out = []
+    for i in top_idx:
+        d = dict(rows[i])
+        d.pop("embedding", None)
+        d["score"] = float(scores[i])
+        out.append(d)
+    return out
+
+
+def backfill_journal_embeddings(limit: int | None = None) -> int:
+    """Embed any journal entries created before embeddings existed. Returns count."""
+    conn = _connection()
+    sql = "SELECT id, content FROM journal WHERE embedding IS NULL"
+    if limit:
+        sql += f" LIMIT {int(limit)}"
+    rows = conn.execute(sql).fetchall()
+    n = 0
+    for r in rows:
+        try:
+            [emb] = llm.embed([r["content"]])
+        except Exception:
+            continue
+        with conn:
+            conn.execute("UPDATE journal SET embedding = ? WHERE id = ?", (_to_blob(emb), r["id"]))
+        n += 1
+    return n
+
+
 def add_rating(kind: str, rating: int, content: str, context: str | None = None,
               ref: str | None = None, note: str | None = None) -> int:
    """Record (or replace) Brian's feedback on one Lyra output. One row per item: