feat: era-rollup + narrative engine (consolidation steps 3-4)

Complete the consolidation pipeline: summaries -> profile + eras -> narrative. - memory: eras table (per-month digests) + Era, summaries_by_month, store_era, list_eras, recall_eras; narrative table + set/get_narrative - lyra/era.py (lyra-era): groups session gists by the month the session occurred (real timestamps) and map-reduces each month into a "what was happening" digest - lyra/narrative.py (lyra-narrative): distills profile + recent eras into the current arc/trends/callbacks ("remember when…", "you're trending toward…") - chat.build_messages injects the narrative alongside the profile Verified on the real corpus: 17 monthly eras (Dec 2024-Jun 2026) + a narrative that surfaces specific callbacks (the $573 Hollywood session, 4 years sober). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-16 19:28:01 +00:00
parent d7e2fce694
commit bfb81428ab
5 changed files with 277 additions and 0 deletions
@@ -52,6 +52,24 @@ CREATE TABLE IF NOT EXISTS profile (
    sessions_covered INTEGER NOT NULL,
    updated_at TEXT NOT NULL
 );
+
+-- Temporal memory: one "what was happening" digest per calendar month, rolled
+-- up from that month's session gists. month is "YYYY-MM".
+CREATE TABLE IF NOT EXISTS eras (
+    month TEXT PRIMARY KEY,
+    content TEXT NOT NULL,
+    embedding BLOB NOT NULL,
+    session_count INTEGER NOT NULL,
+    created_at TEXT NOT NULL
+);
+
+-- The current narrative: time-aware arc/trends/callbacks (vs the timeless
+-- profile). Distilled from profile + recent eras. Single row (id='current').
+CREATE TABLE IF NOT EXISTS narrative (
+    id TEXT PRIMARY KEY,
+    content TEXT NOT NULL,
+    updated_at TEXT NOT NULL
+);
 """

 _conn: sqlite3.Connection | None = None
@@ -95,6 +113,15 @@ class Summary:
    score: float | None = None


+@dataclass
+class Era:
+    month: str  # "YYYY-MM"
+    content: str
+    session_count: int
+    created_at: str
+    score: float | None = None
+
+
 def _to_blob(vec: list[float]) -> bytes:
    return np.asarray(vec, dtype=np.float32).tobytes()

@@ -337,6 +364,98 @@ def get_profile(profile_id: str = "self") -> str | None:
    return r["content"] if r else None


+# --- Era tier (per-month temporal rollups) ---
+
+
+def summaries_by_month() -> dict[str, list[str]]:
+    """Map "YYYY-MM" -> list of session gists for sessions that occurred that month.
+
+    A session's month comes from its earliest exchange timestamp (real ChatGPT
+    dates for imported sessions), not when it was summarized.
+    """
+    conn = _connection()
+    rows = conn.execute(
+        """
+        SELECT substr(MIN(e.created_at), 1, 7) AS month, s.content AS content
+        FROM summaries s JOIN exchanges e ON e.session_id = s.session_id
+        GROUP BY s.session_id
+        """
+    ).fetchall()
+    out: dict[str, list[str]] = {}
+    for r in rows:
+        out.setdefault(r["month"], []).append(r["content"])
+    return out
+
+
+def store_era(month: str, content: str, session_count: int) -> None:
+    """Embed and persist a month's digest, replacing any prior one."""
+    [embedding] = llm.embed([content])
+    now = datetime.now(timezone.utc).isoformat()
+    conn = _connection()
+    with conn:
+        conn.execute(
+            "INSERT INTO eras (month, content, embedding, session_count, created_at) "
+            "VALUES (?, ?, ?, ?, ?) "
+            "ON CONFLICT(month) DO UPDATE SET content=excluded.content, "
+            "embedding=excluded.embedding, session_count=excluded.session_count, "
+            "created_at=excluded.created_at",
+            (month, content, _to_blob(embedding), session_count, now),
+        )
+
+
+def list_eras() -> list[Era]:
+    """All month digests, chronological."""
+    conn = _connection()
+    rows = conn.execute(
+        "SELECT month, content, session_count, created_at FROM eras ORDER BY month ASC"
+    ).fetchall()
+    return [
+        Era(month=r["month"], content=r["content"],
+            session_count=r["session_count"], created_at=r["created_at"])
+        for r in rows
+    ]
+
+
+def set_narrative(content: str, narrative_id: str = "current") -> None:
+    """Store/replace the current narrative."""
+    now = datetime.now(timezone.utc).isoformat()
+    conn = _connection()
+    with conn:
+        conn.execute(
+            "INSERT INTO narrative (id, content, updated_at) VALUES (?, ?, ?) "
+            "ON CONFLICT(id) DO UPDATE SET content=excluded.content, updated_at=excluded.updated_at",
+            (narrative_id, content, now),
+        )
+
+
+def get_narrative(narrative_id: str = "current") -> str | None:
+    conn = _connection()
+    r = conn.execute("SELECT content FROM narrative WHERE id = ?", (narrative_id,)).fetchone()
+    return r["content"] if r else None
+
+
+def recall_eras(query: str, k: int = 2) -> list[Era]:
+    """Top-k month digests most similar to `query` (time-based context)."""
+    [q_vec] = llm.embed([query])
+    q = np.asarray(q_vec, dtype=np.float32)
+    conn = _connection()
+    rows = conn.execute(
+        "SELECT month, content, embedding, session_count, created_at FROM eras"
+    ).fetchall()
+    if not rows:
+        return []
+    matrix = np.stack([_from_blob(r["embedding"]) for r in rows])
+    norms = np.linalg.norm(matrix, axis=1)
+    scores = (matrix @ q) / (norms * np.linalg.norm(q) + 1e-9)
+    top_idx = np.argsort(scores)[::-1][:k]
+    return [
+        Era(month=rows[i]["month"], content=rows[i]["content"],
+            session_count=rows[i]["session_count"], created_at=rows[i]["created_at"],
+            score=float(scores[i]))
+        for i in top_idx
+    ]
+
+
 def recall_summaries(query: str, k: int = 3, exclude_session: str | None = None) -> list[Summary]:
    """Top-k session summaries most similar to `query` (the long-term gist tier)."""
    [q_vec] = llm.embed([query])