feat: tiered, compacting memory (phase 1.5)

Older sessions fade to a general idea; details stay retrievable. - memory: summaries table (one compacted gist per session, embedded), plus store_summary/get_summary/recall_summaries and unsummarized_count (tracks exchanges newer than the current summary) - lyra/summary.py: summarize_session compacts a session's raw turns into a third-person gist (default SUMMARY_BACKEND=local, so compaction is free); maybe_summarize re-summarizes once SUMMARIZE_AFTER new turns accumulate - chat.build_messages now layers context in tiers: persona -> gists of other sessions -> a few sharp raw cross-session details -> current session raw turns -> new message; respond() compacts the session after each turn - web: POST /sessions/{id}/summarize to compact on demand - summarization activity surfaces in the live log Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 18:52:58 +00:00
parent 84c4f75e03
commit d7c258eba0
6 changed files with 211 additions and 23 deletions
@@ -1,43 +1,62 @@
-"""The chat turn loop: persona + recalled memory + recent context -> reply.
+"""The chat turn loop: persona + tiered memory + recent context -> reply.

-Each turn assembles the persona system prompt, semantically-relevant memories
-recalled from across all past sessions, and the recent turns of the current
-session, then asks the model for a reply and persists both sides.
+Context is assembled in tiers (oldest/most-compacted first):
+  1. persona
+  2. long-term gist  — relevant *summaries* of other sessions
+  3. sharp details   — a few raw cross-session exchanges (so specifics survive)
+  4. recent raw turns of the current session (full fidelity)
+  5. the new user message
+After replying, the session is compacted if enough new turns have accumulated.
 """
 from __future__ import annotations

-from lyra import config, llm, logbus, memory, persona
+from lyra import config, llm, logbus, memory, persona, summary
 from lyra.llm import Backend, Message

-RECALL_K = 5
-RECENT_N = 10
+RECALL_K = 3  # raw cross-session "sharp detail" hits
+RECENT_N = 10  # raw turns of the current session
+SUMMARY_K = 3  # other-session gists


-def _memory_note(exchanges: list[memory.Exchange]) -> Message:
-    """Format recalled memories as a system note Lyra can draw on."""
-    lines = []
-    for ex in exchanges:
-        when = ex.created_at[:10]  # YYYY-MM-DD
-        lines.append(f"- ({when}, {ex.role}) {ex.content}")
-    body = "Relevant things you remember from past conversations:\n" + "\n".join(lines)
+def _summary_note(summaries: list[memory.Summary]) -> Message:
+    lines = [f"- ({s.created_at[:10]}) {s.content}" for s in summaries]
+    body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines)
+    return {"role": "system", "content": body}
+
+
+def _detail_note(exchanges: list[memory.Exchange]) -> Message:
+    lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges]
+    body = "Specific things you recall from past conversations:\n" + "\n".join(lines)
    return {"role": "system", "content": body}


 def build_messages(session_id: str, user_msg: str) -> list[Message]:
-    """Assemble the full message list for one turn."""
+    """Assemble the full, tiered message list for one turn."""
    messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}]

    recent = memory.recent(session_id, n=RECENT_N)
    recent_ids = {ex.id for ex in recent}

-    # Cross-session recall, minus anything already shown in the recent window.
-    recalled = [
-        ex for ex in memory.recall(user_msg, k=RECALL_K) if ex.id not in recent_ids
-    ]
-    logbus.log("debug", "context built", recent=len(recent), recalled=len(recalled))
-    if recalled:
-        messages.append(_memory_note(recalled))
+    # Tier 1: compacted gists of *other* sessions (long-term, general idea).
+    summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id)
+    if summaries:
+        messages.append(_summary_note(summaries))

+    # Tier 2: a few sharp raw details from other sessions (so specifics survive
+    # compaction). Skip the current session (its raw turns are in `recent`).
+    recalled = [
+        ex for ex in memory.recall(user_msg, k=RECALL_K)
+        if ex.id not in recent_ids and ex.session_id != session_id
+    ]
+    if recalled:
+        messages.append(_detail_note(recalled))
+
+    logbus.log(
+        "debug", "context built",
+        recent=len(recent), summaries=len(summaries), details=len(recalled),
+    )
+
+    # Tier 3: current session, full fidelity.
    for ex in recent:
        messages.append({"role": ex.role, "content": ex.content})

@@ -60,4 +79,7 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud") -> str:

    memory.remember(session_id, "user", user_msg)
    memory.remember(session_id, "assistant", reply)
+
+    # Compact this session once enough new turns have piled up.
+    summary.maybe_summarize(session_id)
    return reply