feat: tiered, compacting memory (phase 1.5)

Older sessions fade to a general idea; details stay retrievable.

- memory: summaries table (one compacted gist per session, embedded), plus
  store_summary/get_summary/recall_summaries and unsummarized_count (tracks
  exchanges newer than the current summary)
- lyra/summary.py: summarize_session compacts a session's raw turns into a
  third-person gist (default SUMMARY_BACKEND=local, so compaction is free);
  maybe_summarize re-summarizes once SUMMARIZE_AFTER new turns accumulate
- chat.build_messages now layers context in tiers: persona -> gists of other
  sessions -> a few sharp raw cross-session details -> current session raw
  turns -> new message; respond() compacts the session after each turn
- web: POST /sessions/{id}/summarize to compact on demand
- summarization activity surfaces in the live log

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 18:52:58 +00:00
parent 84c4f75e03
commit d7c258eba0
6 changed files with 211 additions and 23 deletions
+44 -22
View File
@@ -1,43 +1,62 @@
"""The chat turn loop: persona + recalled memory + recent context -> reply.
"""The chat turn loop: persona + tiered memory + recent context -> reply.
Each turn assembles the persona system prompt, semantically-relevant memories
recalled from across all past sessions, and the recent turns of the current
session, then asks the model for a reply and persists both sides.
Context is assembled in tiers (oldest/most-compacted first):
1. persona
2. long-term gist — relevant *summaries* of other sessions
3. sharp details — a few raw cross-session exchanges (so specifics survive)
4. recent raw turns of the current session (full fidelity)
5. the new user message
After replying, the session is compacted if enough new turns have accumulated.
"""
from __future__ import annotations
from lyra import config, llm, logbus, memory, persona
from lyra import config, llm, logbus, memory, persona, summary
from lyra.llm import Backend, Message
RECALL_K = 5
RECENT_N = 10
RECALL_K = 3 # raw cross-session "sharp detail" hits
RECENT_N = 10 # raw turns of the current session
SUMMARY_K = 3 # other-session gists
def _memory_note(exchanges: list[memory.Exchange]) -> Message:
"""Format recalled memories as a system note Lyra can draw on."""
lines = []
for ex in exchanges:
when = ex.created_at[:10] # YYYY-MM-DD
lines.append(f"- ({when}, {ex.role}) {ex.content}")
body = "Relevant things you remember from past conversations:\n" + "\n".join(lines)
def _summary_note(summaries: list[memory.Summary]) -> Message:
lines = [f"- ({s.created_at[:10]}) {s.content}" for s in summaries]
body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines)
return {"role": "system", "content": body}
def _detail_note(exchanges: list[memory.Exchange]) -> Message:
lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges]
body = "Specific things you recall from past conversations:\n" + "\n".join(lines)
return {"role": "system", "content": body}
def build_messages(session_id: str, user_msg: str) -> list[Message]:
"""Assemble the full message list for one turn."""
"""Assemble the full, tiered message list for one turn."""
messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}]
recent = memory.recent(session_id, n=RECENT_N)
recent_ids = {ex.id for ex in recent}
# Cross-session recall, minus anything already shown in the recent window.
recalled = [
ex for ex in memory.recall(user_msg, k=RECALL_K) if ex.id not in recent_ids
]
logbus.log("debug", "context built", recent=len(recent), recalled=len(recalled))
if recalled:
messages.append(_memory_note(recalled))
# Tier 1: compacted gists of *other* sessions (long-term, general idea).
summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id)
if summaries:
messages.append(_summary_note(summaries))
# Tier 2: a few sharp raw details from other sessions (so specifics survive
# compaction). Skip the current session (its raw turns are in `recent`).
recalled = [
ex for ex in memory.recall(user_msg, k=RECALL_K)
if ex.id not in recent_ids and ex.session_id != session_id
]
if recalled:
messages.append(_detail_note(recalled))
logbus.log(
"debug", "context built",
recent=len(recent), summaries=len(summaries), details=len(recalled),
)
# Tier 3: current session, full fidelity.
for ex in recent:
messages.append({"role": ex.role, "content": ex.content})
@@ -60,4 +79,7 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud") -> str:
memory.remember(session_id, "user", user_msg)
memory.remember(session_id, "assistant", reply)
# Compact this session once enough new turns have piled up.
summary.maybe_summarize(session_id)
return reply