feat: tiered, compacting memory (phase 1.5)
Older sessions fade to a general idea; details stay retrievable.
- memory: summaries table (one compacted gist per session, embedded), plus
store_summary/get_summary/recall_summaries and unsummarized_count (tracks
exchanges newer than the current summary)
- lyra/summary.py: summarize_session compacts a session's raw turns into a
third-person gist (default SUMMARY_BACKEND=local, so compaction is free);
maybe_summarize re-summarizes once SUMMARIZE_AFTER new turns accumulate
- chat.build_messages now layers context in tiers: persona -> gists of other
sessions -> a few sharp raw cross-session details -> current session raw
turns -> new message; respond() compacts the session after each turn
- web: POST /sessions/{id}/summarize to compact on demand
- summarization activity surfaces in the live log
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+100
@@ -33,6 +33,16 @@ CREATE TABLE IF NOT EXISTS sessions (
|
||||
name TEXT,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- One compacted "gist" per session. last_exchange_id marks how far the summary
|
||||
-- covers, so we know when enough new turns have accumulated to re-summarize.
|
||||
CREATE TABLE IF NOT EXISTS summaries (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
content TEXT NOT NULL,
|
||||
embedding BLOB NOT NULL,
|
||||
last_exchange_id INTEGER NOT NULL,
|
||||
created_at TEXT NOT NULL
|
||||
);
|
||||
"""
|
||||
|
||||
_conn: sqlite3.Connection | None = None
|
||||
@@ -67,6 +77,15 @@ class Exchange:
|
||||
score: float | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Summary:
|
||||
session_id: str
|
||||
content: str
|
||||
last_exchange_id: int
|
||||
created_at: str
|
||||
score: float | None = None
|
||||
|
||||
|
||||
def _to_blob(vec: list[float]) -> bytes:
|
||||
return np.asarray(vec, dtype=np.float32).tobytes()
|
||||
|
||||
@@ -171,6 +190,7 @@ def delete_session(session_id: str) -> None:
|
||||
with conn:
|
||||
conn.execute("DELETE FROM exchanges WHERE session_id = ?", (session_id,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
|
||||
conn.execute("DELETE FROM summaries WHERE session_id = ?", (session_id,))
|
||||
|
||||
|
||||
def recall(query: str, k: int = 5, session_id: str | None = None) -> list[Exchange]:
|
||||
@@ -204,3 +224,83 @@ def recall(query: str, k: int = 5, session_id: str | None = None) -> list[Exchan
|
||||
)
|
||||
for i in top_idx
|
||||
]
|
||||
|
||||
|
||||
# --- Summary tier (compacted per-session gists) ---
|
||||
|
||||
|
||||
def store_summary(session_id: str, content: str, last_exchange_id: int) -> None:
|
||||
"""Embed and persist the gist of a session, replacing any prior summary."""
|
||||
[embedding] = llm.embed([content])
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
conn = _connection()
|
||||
with conn:
|
||||
conn.execute(
|
||||
"INSERT INTO summaries (session_id, content, embedding, last_exchange_id, created_at) "
|
||||
"VALUES (?, ?, ?, ?, ?) "
|
||||
"ON CONFLICT(session_id) DO UPDATE SET "
|
||||
"content=excluded.content, embedding=excluded.embedding, "
|
||||
"last_exchange_id=excluded.last_exchange_id, created_at=excluded.created_at",
|
||||
(session_id, content, _to_blob(embedding), last_exchange_id, now),
|
||||
)
|
||||
|
||||
|
||||
def get_summary(session_id: str) -> Summary | None:
|
||||
conn = _connection()
|
||||
r = conn.execute(
|
||||
"SELECT session_id, content, last_exchange_id, created_at FROM summaries "
|
||||
"WHERE session_id = ?",
|
||||
(session_id,),
|
||||
).fetchone()
|
||||
if r is None:
|
||||
return None
|
||||
return Summary(
|
||||
session_id=r["session_id"],
|
||||
content=r["content"],
|
||||
last_exchange_id=r["last_exchange_id"],
|
||||
created_at=r["created_at"],
|
||||
)
|
||||
|
||||
|
||||
def unsummarized_count(session_id: str) -> int:
|
||||
"""How many exchanges in this session are newer than its current summary."""
|
||||
conn = _connection()
|
||||
summary = get_summary(session_id)
|
||||
cutoff = summary.last_exchange_id if summary else 0
|
||||
r = conn.execute(
|
||||
"SELECT COUNT(*) AS n FROM exchanges WHERE session_id = ? AND id > ?",
|
||||
(session_id, cutoff),
|
||||
).fetchone()
|
||||
return int(r["n"])
|
||||
|
||||
|
||||
def recall_summaries(query: str, k: int = 3, exclude_session: str | None = None) -> list[Summary]:
|
||||
"""Top-k session summaries most similar to `query` (the long-term gist tier)."""
|
||||
[q_vec] = llm.embed([query])
|
||||
q = np.asarray(q_vec, dtype=np.float32)
|
||||
|
||||
conn = _connection()
|
||||
sql = "SELECT session_id, content, embedding, last_exchange_id, created_at FROM summaries"
|
||||
params: tuple = ()
|
||||
if exclude_session is not None:
|
||||
sql += " WHERE session_id != ?"
|
||||
params = (exclude_session,)
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
matrix = np.stack([_from_blob(r["embedding"]) for r in rows])
|
||||
norms = np.linalg.norm(matrix, axis=1)
|
||||
scores = (matrix @ q) / (norms * np.linalg.norm(q) + 1e-9)
|
||||
|
||||
top_idx = np.argsort(scores)[::-1][:k]
|
||||
return [
|
||||
Summary(
|
||||
session_id=rows[i]["session_id"],
|
||||
content=rows[i]["content"],
|
||||
last_exchange_id=rows[i]["last_exchange_id"],
|
||||
created_at=rows[i]["created_at"],
|
||||
score=float(scores[i]),
|
||||
)
|
||||
for i in top_idx
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user