feat: era-rollup + narrative engine (consolidation steps 3-4)

Complete the consolidation pipeline: summaries -> profile + eras -> narrative.

- memory: eras table (per-month digests) + Era, summaries_by_month, store_era,
  list_eras, recall_eras; narrative table + set/get_narrative
- lyra/era.py (lyra-era): groups session gists by the month the session occurred
  (real timestamps) and map-reduces each month into a "what was happening" digest
- lyra/narrative.py (lyra-narrative): distills profile + recent eras into the
  current arc/trends/callbacks ("remember when…", "you're trending toward…")
- chat.build_messages injects the narrative alongside the profile

Verified on the real corpus: 17 monthly eras (Dec 2024-Jun 2026) + a narrative
that surfaces specific callbacks (the $573 Hollywood session, 4 years sober).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-16 19:28:01 +00:00
parent d7e2fce694
commit bfb81428ab
5 changed files with 277 additions and 0 deletions
+119
View File
@@ -52,6 +52,24 @@ CREATE TABLE IF NOT EXISTS profile (
sessions_covered INTEGER NOT NULL,
updated_at TEXT NOT NULL
);
-- Temporal memory: one "what was happening" digest per calendar month, rolled
-- up from that month's session gists. month is "YYYY-MM".
CREATE TABLE IF NOT EXISTS eras (
month TEXT PRIMARY KEY,
content TEXT NOT NULL,
embedding BLOB NOT NULL,
session_count INTEGER NOT NULL,
created_at TEXT NOT NULL
);
-- The current narrative: time-aware arc/trends/callbacks (vs the timeless
-- profile). Distilled from profile + recent eras. Single row (id='current').
CREATE TABLE IF NOT EXISTS narrative (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
updated_at TEXT NOT NULL
);
"""
_conn: sqlite3.Connection | None = None
@@ -95,6 +113,15 @@ class Summary:
score: float | None = None
@dataclass
class Era:
month: str # "YYYY-MM"
content: str
session_count: int
created_at: str
score: float | None = None
def _to_blob(vec: list[float]) -> bytes:
return np.asarray(vec, dtype=np.float32).tobytes()
@@ -337,6 +364,98 @@ def get_profile(profile_id: str = "self") -> str | None:
return r["content"] if r else None
# --- Era tier (per-month temporal rollups) ---
def summaries_by_month() -> dict[str, list[str]]:
"""Map "YYYY-MM" -> list of session gists for sessions that occurred that month.
A session's month comes from its earliest exchange timestamp (real ChatGPT
dates for imported sessions), not when it was summarized.
"""
conn = _connection()
rows = conn.execute(
"""
SELECT substr(MIN(e.created_at), 1, 7) AS month, s.content AS content
FROM summaries s JOIN exchanges e ON e.session_id = s.session_id
GROUP BY s.session_id
"""
).fetchall()
out: dict[str, list[str]] = {}
for r in rows:
out.setdefault(r["month"], []).append(r["content"])
return out
def store_era(month: str, content: str, session_count: int) -> None:
"""Embed and persist a month's digest, replacing any prior one."""
[embedding] = llm.embed([content])
now = datetime.now(timezone.utc).isoformat()
conn = _connection()
with conn:
conn.execute(
"INSERT INTO eras (month, content, embedding, session_count, created_at) "
"VALUES (?, ?, ?, ?, ?) "
"ON CONFLICT(month) DO UPDATE SET content=excluded.content, "
"embedding=excluded.embedding, session_count=excluded.session_count, "
"created_at=excluded.created_at",
(month, content, _to_blob(embedding), session_count, now),
)
def list_eras() -> list[Era]:
"""All month digests, chronological."""
conn = _connection()
rows = conn.execute(
"SELECT month, content, session_count, created_at FROM eras ORDER BY month ASC"
).fetchall()
return [
Era(month=r["month"], content=r["content"],
session_count=r["session_count"], created_at=r["created_at"])
for r in rows
]
def set_narrative(content: str, narrative_id: str = "current") -> None:
"""Store/replace the current narrative."""
now = datetime.now(timezone.utc).isoformat()
conn = _connection()
with conn:
conn.execute(
"INSERT INTO narrative (id, content, updated_at) VALUES (?, ?, ?) "
"ON CONFLICT(id) DO UPDATE SET content=excluded.content, updated_at=excluded.updated_at",
(narrative_id, content, now),
)
def get_narrative(narrative_id: str = "current") -> str | None:
conn = _connection()
r = conn.execute("SELECT content FROM narrative WHERE id = ?", (narrative_id,)).fetchone()
return r["content"] if r else None
def recall_eras(query: str, k: int = 2) -> list[Era]:
"""Top-k month digests most similar to `query` (time-based context)."""
[q_vec] = llm.embed([query])
q = np.asarray(q_vec, dtype=np.float32)
conn = _connection()
rows = conn.execute(
"SELECT month, content, embedding, session_count, created_at FROM eras"
).fetchall()
if not rows:
return []
matrix = np.stack([_from_blob(r["embedding"]) for r in rows])
norms = np.linalg.norm(matrix, axis=1)
scores = (matrix @ q) / (norms * np.linalg.norm(q) + 1e-9)
top_idx = np.argsort(scores)[::-1][:k]
return [
Era(month=rows[i]["month"], content=rows[i]["content"],
session_count=rows[i]["session_count"], created_at=rows[i]["created_at"],
score=float(scores[i]))
for i in top_idx
]
def recall_summaries(query: str, k: int = 3, exclude_session: str | None = None) -> list[Summary]:
"""Top-k session summaries most similar to `query` (the long-term gist tier)."""
[q_vec] = llm.embed([query])