From ecf0b852f9a25dbe7413513ce420e1e1b56931bb Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Tue, 16 Jun 2026 04:11:19 +0000
Subject: [PATCH] =?UTF-8?q?feat:=20profile=20layer=20=E2=80=94=20semantic?=
 =?UTF-8?q?=20memory=20(consolidation=20step=202)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Derive a standing profile of the user from session gists and inject it into
every prompt, so identity/abstract questions ("what kind of player am I",
"what are my leaks") are answered from distilled knowledge instead of noisy
single-vector recall (which finds passages, not patterns).

- memory: profile table + get/set_profile, list_summaries
- lyra/profile.py: rebuild_profile map-reduces all gists (batch -> extract
  durable facts -> fold-merge) into one profile doc; `lyra-profile` CLI
- chat.build_messages injects "What you know about Brian" after the persona

Run after lyra-summarize (needs gists). Verified (stubbed): map-reduce, storage,
and prompt injection.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lyra/chat.py    |  8 +++++
 lyra/memory.py  | 47 +++++++++++++++++++++++++++
 lyra/profile.py | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml  |  1 +
 4 files changed, 140 insertions(+)
 create mode 100644 lyra/profile.py

diff --git a/lyra/chat.py b/lyra/chat.py
index 551d58f..d4da349 100644
--- a/lyra/chat.py
+++ b/lyra/chat.py
@@ -39,6 +39,14 @@ def build_messages(session_id: str, user_msg: str) -> list[Message]:
     """Assemble the full, tiered message list for one turn."""
     messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}]
 
+    # Semantic memory: the distilled profile (who Brian is) — answers identity
+    # questions that raw recall can't. Always in context when it exists.
+    profile = memory.get_profile()
+    if profile:
+        messages.append(
+            {"role": "system", "content": "What you know about Brian:\n" + profile}
+        )
+
     recent = memory.recent(session_id, n=RECENT_N)
     recent_ids = {ex.id for ex in recent}
 
diff --git a/lyra/memory.py b/lyra/memory.py
index 6001da4..827295a 100644
--- a/lyra/memory.py
+++ b/lyra/memory.py
@@ -43,6 +43,15 @@ CREATE TABLE IF NOT EXISTS summaries (
     last_exchange_id INTEGER NOT NULL,
     created_at TEXT NOT NULL
 );
+
+-- Derived semantic memory: standing facts about the user, distilled from the
+-- session gists by the consolidation pass. Single row (id='self').
+CREATE TABLE IF NOT EXISTS profile (
+    id TEXT PRIMARY KEY,
+    content TEXT NOT NULL,
+    sessions_covered INTEGER NOT NULL,
+    updated_at TEXT NOT NULL
+);
 """
 
 _conn: sqlite3.Connection | None = None
@@ -290,6 +299,44 @@ def unsummarized_count(session_id: str) -> int:
     return int(r["n"])
 
 
+def list_summaries() -> list[Summary]:
+    """Every session gist (for the profile/era consolidation passes)."""
+    conn = _connection()
+    rows = conn.execute(
+        "SELECT session_id, content, last_exchange_id, created_at FROM summaries "
+        "ORDER BY created_at ASC"
+    ).fetchall()
+    return [
+        Summary(
+            session_id=r["session_id"],
+            content=r["content"],
+            last_exchange_id=r["last_exchange_id"],
+            created_at=r["created_at"],
+        )
+        for r in rows
+    ]
+
+
+def set_profile(content: str, sessions_covered: int, profile_id: str = "self") -> None:
+    """Store/replace the derived semantic profile."""
+    now = datetime.now(timezone.utc).isoformat()
+    conn = _connection()
+    with conn:
+        conn.execute(
+            "INSERT INTO profile (id, content, sessions_covered, updated_at) "
+            "VALUES (?, ?, ?, ?) "
+            "ON CONFLICT(id) DO UPDATE SET content=excluded.content, "
+            "sessions_covered=excluded.sessions_covered, updated_at=excluded.updated_at",
+            (profile_id, content, sessions_covered, now),
+        )
+
+
+def get_profile(profile_id: str = "self") -> str | None:
+    conn = _connection()
+    r = conn.execute("SELECT content FROM profile WHERE id = ?", (profile_id,)).fetchone()
+    return r["content"] if r else None
+
+
 def recall_summaries(query: str, k: int = 3, exclude_session: str | None = None) -> list[Summary]:
     """Top-k session summaries most similar to `query` (the long-term gist tier)."""
     [q_vec] = llm.embed([query])
diff --git a/lyra/profile.py b/lyra/profile.py
new file mode 100644
index 0000000..3929f8e
--- /dev/null
+++ b/lyra/profile.py
@@ -0,0 +1,84 @@
+"""Profile derivation: distill standing facts about the user (semantic memory).
+
+This is consolidation step 2. It reads every session gist and map-reduces them
+into one profile document — who Brian is as a player and person — which is then
+injected into every prompt. This is what answers identity/abstract questions
+("what kind of player am I", "what are my leaks") that raw recall handles badly,
+because those are patterns across many sessions, not facts in any single message.
+"""
+from __future__ import annotations
+
+
+from lyra import config, llm, logbus, memory
+from lyra.llm import Backend, Message
+
+BATCH_CHARS = 18000
+
+_MAP_PROMPT = """From these session summaries, extract durable facts about Brian \
+— things that are stably true, not one-off events. Cover, where present: poker \
+games/formats/stakes he plays, his playing style and strengths, recurring leaks \
+and tendencies, mental-game patterns (tilt triggers, scared money, fatigue), \
+relevant personal context, and how he likes to be coached. Terse bullet points. \
+Omit anything not supported by the summaries."""
+
+_REDUCE_PROMPT = """Merge these fact lists into one deduplicated profile of Brian. \
+Organize under these headings: Poker Style, Leaks & Tendencies, Mental Game, \
+Personal Context, Working With Brian. Keep it tight — bullets, no fluff, no \
+repetition. Resolve contradictions toward the more recent/frequent signal."""
+
+
+def _batch_texts(texts: list[str], budget: int) -> list[str]:
+    """Group texts into joined blocks under `budget` chars."""
+    blocks, buf, size = [], [], 0
+    for t in texts:
+        if size + len(t) > budget and buf:
+            blocks.append("\n\n".join(buf))
+            buf, size = [], 0
+        buf.append(t)
+        size += len(t)
+    if buf:
+        blocks.append("\n\n".join(buf))
+    return blocks
+
+
+def _call(prompt: str, body: str, backend: Backend) -> str:
+    messages: list[Message] = [
+        {"role": "system", "content": prompt},
+        {"role": "user", "content": body},
+    ]
+    return llm.complete(messages, backend=backend)
+
+
+def rebuild_profile(backend: Backend | None = None) -> str | None:
+    """Re-derive the profile from all current session gists and store it."""
+    backend = backend or config.load().summary_backend
+    summaries = memory.list_summaries()
+    if not summaries:
+        return None
+
+    # MAP: extract facts from batches of gists.
+    blocks = _batch_texts([s.content for s in summaries], BATCH_CHARS)
+    partials = [_call(_MAP_PROMPT, b, backend) for b in blocks]
+    logbus.log("info", "profile map done", batches=len(partials), sessions=len(summaries))
+
+    # REDUCE: fold partials together until one remains.
+    while len(partials) > 1:
+        partials = [_call(_REDUCE_PROMPT, g, backend) for g in _batch_texts(partials, BATCH_CHARS)]
+    profile = partials[0]
+
+    memory.set_profile(profile, len(summaries))
+    logbus.log("info", "profile rebuilt", sessions=len(summaries), chars=len(profile))
+    return profile
+
+
+def main() -> int:
+    profile = rebuild_profile()
+    if profile is None:
+        print("No summaries yet — run lyra-summarize first.")
+        return 1
+    print(profile)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/pyproject.toml b/pyproject.toml
index eeff535..221e029 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ lyra = "lyra.__main__:main"
 lyra-web = "lyra.web.server:serve"
 lyra-import = "lyra.ingest:main"
 lyra-summarize = "lyra.summary:main"
+lyra-profile = "lyra.profile:main"
 
 [dependency-groups]
 dev = [