feat: associative cognition — thoughts arise from spreading activation, not a re-read bio

Replaces the thought loop's grist (recent-convo + her own saved narrative, the
feedback-loop attractor) with a model of how a thought actually arises:

  seed (salience-weighted: a recent moment / resurfaced memory / feed item)
   -> spreading activation: embed the seed, let it light up associatively-near
      material across ALL her stores (conversations, gists, her own journal/
      thoughts), blended by relevance + recency + noise; optional 2nd hop for leaps
   -> her self-narrative stays the LENS (supplied as interiority), not the input
   -> the thought is generated from what lit up, routed through a faculty
      (notice / connect / abstract / project / feel)
   -> journaled + embedded, so it can light up in future cycles

This breaks the feedback loop structurally: the narrative is no longer reread and
paraphrased each cycle; grist is genuinely associative and varied; and her past
thoughts re-activate (continuity without calcification).

- lyra/cognition.py (new): spontaneous_seed, activate (spreading activation),
  constellation_block, faculties.
- memory.py: journal entries now embedded; recall_journal(); backfill_journal_embeddings()
  (ran once: 341 past entries embedded so her history is associatively retrievable).
- thoughts.think(): new-thread mode now uses the associative engine; dropped _grist().
- tests: test_cognition.py (recall_journal ranking, activation, seeding) + fixture
  reloads cognition. Suite 72 green, ruff clean.

Honest scope: this fixes the mechanism (how thoughts arise). The residual
"be useful for Brian" voice drift is the separate model/fine-tune problem.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-22 05:45:39 +00:00
parent 43697f8340
commit c2cee3be4d
7 changed files with 571 additions and 25 deletions
+60 -5
View File
@@ -90,7 +90,8 @@ CREATE TABLE IF NOT EXISTS journal (
created_at TEXT NOT NULL,
kind TEXT NOT NULL,
content TEXT NOT NULL,
source TEXT
source TEXT,
embedding BLOB
);
CREATE INDEX IF NOT EXISTS idx_journal_created ON journal(created_at);
@@ -138,7 +139,8 @@ def _connection() -> sqlite3.Connection:
_conn.execute("PRAGMA synchronous=NORMAL")
_conn.executescript(SCHEMA)
# Migrations for DBs created before a column existed (no-op if present).
for ddl in ("ALTER TABLE sessions ADD COLUMN mode TEXT",):
for ddl in ("ALTER TABLE sessions ADD COLUMN mode TEXT",
"ALTER TABLE journal ADD COLUMN embedding BLOB"):
try:
_conn.execute(ddl)
except sqlite3.OperationalError:
@@ -573,17 +575,70 @@ def get_self_state(state_id: str = "lyra") -> dict | None:
def add_journal_entry(kind: str, content: str, source: str | None = None) -> int:
"""Append a permanent journal entry (never truncated). Returns row id."""
"""Append a permanent journal entry (never truncated), embedded so it can be
recalled associatively later (her own thoughts can resurface). Returns row id."""
now = datetime.now(timezone.utc).isoformat()
try:
[embedding] = llm.embed([content])
blob = _to_blob(embedding)
except Exception: # never let an embed hiccup block her writing something down
blob = None
conn = _connection()
with conn:
cur = conn.execute(
"INSERT INTO journal (created_at, kind, content, source) VALUES (?, ?, ?, ?)",
(now, kind, content, source),
"INSERT INTO journal (created_at, kind, content, source, embedding) VALUES (?, ?, ?, ?, ?)",
(now, kind, content, source, blob),
)
return int(cur.lastrowid)
def recall_journal(query: str, k: int = 5, kinds: tuple[str, ...] | None = None) -> list[dict]:
"""Top-k journal entries semantically similar to `query` (embedded rows only).
Her own reflections/thoughts/notes, surfaced by meaning — the associative recall
the thought loop uses. Each dict gets a `score`."""
[q_vec] = llm.embed([query])
q = np.asarray(q_vec, dtype=np.float32)
conn = _connection()
sql = "SELECT id, created_at, kind, content, source, embedding FROM journal WHERE embedding IS NOT NULL"
params: list = []
if kinds:
sql += " AND kind IN (%s)" % ",".join("?" * len(kinds))
params += list(kinds)
rows = conn.execute(sql, params).fetchall()
if not rows:
return []
matrix = np.stack([_from_blob(r["embedding"]) for r in rows])
norms = np.linalg.norm(matrix, axis=1)
scores = (matrix @ q) / (norms * np.linalg.norm(q) + 1e-9)
top_idx = np.argsort(scores)[::-1][:k]
out = []
for i in top_idx:
d = dict(rows[i])
d.pop("embedding", None)
d["score"] = float(scores[i])
out.append(d)
return out
def backfill_journal_embeddings(limit: int | None = None) -> int:
"""Embed any journal entries created before embeddings existed. Returns count."""
conn = _connection()
sql = "SELECT id, content FROM journal WHERE embedding IS NULL"
if limit:
sql += f" LIMIT {int(limit)}"
rows = conn.execute(sql).fetchall()
n = 0
for r in rows:
try:
[emb] = llm.embed([r["content"]])
except Exception:
continue
with conn:
conn.execute("UPDATE journal SET embedding = ? WHERE id = ?", (_to_blob(emb), r["id"]))
n += 1
return n
def add_rating(kind: str, rating: int, content: str, context: str | None = None,
ref: str | None = None, note: str | None = None) -> int:
"""Record (or replace) Brian's feedback on one Lyra output. One row per item: