From 236a16b33151406a837a41a6a69694e35529f9a3 Mon Sep 17 00:00:00 2001 From: serversdown Date: Mon, 15 Jun 2026 23:52:35 +0000 Subject: [PATCH 01/51] feat: inspect the full prompt in the live log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "context built" event now carries the fully-rendered prompt (persona, gists, recalled details, recent turns, the new message) plus a total char count. The log panel renders it as a collapsed "view full prompt" block — clean by default, one click to see exactly what hit the model. Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/chat.py | 16 +++++++++++----- lyra/web/static/index.html | 6 +++++- lyra/web/static/style.css | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/lyra/chat.py b/lyra/chat.py index 971bc7d..551d58f 100644 --- a/lyra/chat.py +++ b/lyra/chat.py @@ -30,6 +30,11 @@ def _detail_note(exchanges: list[memory.Exchange]) -> Message: return {"role": "system", "content": body} +def _render(messages: list[Message]) -> str: + """Human-readable dump of the exact prompt, for the live-log inspector.""" + return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages) + + def build_messages(session_id: str, user_msg: str) -> list[Message]: """Assemble the full, tiered message list for one turn.""" messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}] @@ -51,16 +56,17 @@ def build_messages(session_id: str, user_msg: str) -> list[Message]: if recalled: messages.append(_detail_note(recalled)) - logbus.log( - "debug", "context built", - recent=len(recent), summaries=len(summaries), details=len(recalled), - ) - # Tier 3: current session, full fidelity. for ex in recent: messages.append({"role": ex.role, "content": ex.content}) messages.append({"role": "user", "content": user_msg}) + + logbus.log( + "debug", "context built", + recent=len(recent), summaries=len(summaries), details=len(recalled), + chars=sum(len(m["content"]) for m in messages), detail=_render(messages), + ) return messages diff --git a/lyra/web/static/index.html b/lyra/web/static/index.html index 3cbb822..adb30a6 100644 --- a/lyra/web/static/index.html +++ b/lyra/web/static/index.html @@ -734,7 +734,10 @@ const level = event.level || 'info'; const time = new Date((event.ts || 0) * 1000).toLocaleTimeString(); - const fields = event.fields || {}; + const fields = Object.assign({}, event.fields || {}); + // `detail` is rendered as an expandable block, not an inline field. + const detail = fields.detail; + delete fields.detail; const fieldStr = Object.keys(fields).length ? Object.entries(fields).map(([k, v]) => `${k}=${v}`).join(' ') : ''; @@ -746,6 +749,7 @@ ${escapeHtml(level)} ${escapeHtml(event.msg || '')} ${fieldStr ? `${escapeHtml(fieldStr)}` : ''} + ${detail ? `
view full prompt
${escapeHtml(detail)}
` : ''} `; thinkingContent.appendChild(eventDiv); diff --git a/lyra/web/static/style.css b/lyra/web/static/style.css index bdfbb46..a93bf8a 100644 --- a/lyra/web/static/style.css +++ b/lyra/web/static/style.css @@ -941,3 +941,25 @@ select:hover { .log-error .log-level, .log-error .log-msg { color: #fca5a5; } .log-system { border-left-color: #00ff66; } .log-system .log-level { color: #00ff66; } + +.log-detail { width: 100%; margin-top: 4px; } +.log-detail summary { + cursor: pointer; + color: var(--accent); + font-size: 0.72rem; + user-select: none; +} +.log-detail pre { + margin: 6px 0 0; + padding: 8px; + max-height: 340px; + overflow: auto; + background: rgba(0,0,0,0.25); + border-left: 2px solid var(--accent); + border-radius: 4px; + font-size: 0.72rem; + line-height: 1.4; + white-space: pre-wrap; + word-break: break-word; + color: var(--text); +} -- 2.52.0 From f3037b78794d2a119149920535523fc08af18f9b Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 16 Jun 2026 00:51:45 +0000 Subject: [PATCH 02/51] feat: ChatGPT chat-log importer Import the parser's {title, messages} JSON into Lyra's memory so past conversations seed recall (and, later, the era-rollup tier). - lyra/ingest.py: one conversation -> one session, text messages -> exchanges; skips non-text (image asset) messages and non user/assistant roles; embeddings batched; idempotent by filename-derived session id; `lyra-import ` CLI - memory.add_exchanges_bulk: batched insert of pre-embedded rows Format has no timestamps yet, so imports are stamped at import time; a future dated export will let era memory group by real calendar time. Verified on the 68-file lyra dev set: 7519 exchanges, idempotent re-run, recall returns relevant history. Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/ingest.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ lyra/memory.py | 16 +++++++++ pyproject.toml | 1 + 3 files changed, 110 insertions(+) create mode 100644 lyra/ingest.py diff --git a/lyra/ingest.py b/lyra/ingest.py new file mode 100644 index 0000000..7f6ea5b --- /dev/null +++ b/lyra/ingest.py @@ -0,0 +1,93 @@ +"""Import parsed ChatGPT chat logs into Lyra's memory. + +Consumes the parser's `{"title": ..., "messages": [{"role", "content"}]}` format +(one JSON file per conversation). Each conversation becomes a Lyra session; each +text message becomes an exchange. Embeddings are batched. Import is idempotent — +a conversation already present (by session id) is skipped. + +Timestamps: this format carries no dates, so imported exchanges are stamped with +`created_at` (default: now). A future timestamped export will let era memory group +by real calendar time; pass real per-message dates then. +""" +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path + +from lyra import llm, logbus, memory + +EMBED_BATCH = 64 +EMBED_CHAR_CAP = 6000 # cap embed input size; full content is still stored + + +def _session_id(path: Path) -> str: + """Stable id derived from the filename, so re-imports don't duplicate.""" + return "import-" + path.stem + + +def _clean_messages(messages: list[dict]) -> list[tuple[str, str]]: + out: list[tuple[str, str]] = [] + for m in messages: + role = m.get("role") + if role not in ("user", "assistant"): + continue + content = (m.get("content") or "").strip() + if not content or content.startswith('{"content_type"'): # skip empty / image assets + continue + out.append((role, content)) + return out + + +def import_file(path: Path, created_at: str) -> int: + """Import one conversation file. Returns exchanges added (0 if skipped/empty).""" + data = json.loads(path.read_text(encoding="utf-8")) + session_id = _session_id(path) + if memory.history(session_id): # already imported + return 0 + + msgs = _clean_messages(data.get("messages", [])) + if not msgs: + return 0 + + memory.ensure_session(session_id, name=data.get("title") or path.stem) + + rows: list[tuple[str, str, list[float], str]] = [] + for i in range(0, len(msgs), EMBED_BATCH): + batch = msgs[i : i + EMBED_BATCH] + embeddings = llm.embed([content[:EMBED_CHAR_CAP] for _, content in batch]) + for (role, content), emb in zip(batch, embeddings): + rows.append((role, content, emb, created_at)) + + return memory.add_exchanges_bulk(session_id, rows) + + +def import_dir(dirpath: str | Path, created_at: str | None = None) -> dict: + """Import every *.json under dirpath (recursively). Returns a small report.""" + created_at = created_at or datetime.now(timezone.utc).isoformat() + files = sorted(Path(dirpath).rglob("*.json")) + sessions, exchanges = 0, 0 + for path in files: + added = import_file(path, created_at) + if added: + sessions += 1 + exchanges += added + logbus.log( + "info", "import complete", dir=str(dirpath), + files=len(files), sessions=sessions, exchanges=exchanges, + ) + return {"files": len(files), "sessions_imported": sessions, "exchanges": exchanges} + + +def main() -> int: + if len(sys.argv) < 2: + print("usage: lyra-import ", file=sys.stderr) + return 2 + report = import_dir(sys.argv[1]) + print(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/lyra/memory.py b/lyra/memory.py index 6119bdd..6001da4 100644 --- a/lyra/memory.py +++ b/lyra/memory.py @@ -108,6 +108,22 @@ def remember(session_id: str, role: str, content: str) -> int: return int(cur.lastrowid) +def add_exchanges_bulk(session_id: str, rows: list[tuple[str, str, list[float], str]]) -> int: + """Insert many pre-embedded exchanges at once. + + Each row is (role, content, embedding, created_at). Used by the importer to + avoid one INSERT (and one embed round-trip) per message. Returns row count. + """ + conn = _connection() + with conn: + conn.executemany( + "INSERT INTO exchanges (session_id, role, content, embedding, created_at) " + "VALUES (?, ?, ?, ?, ?)", + [(session_id, role, content, _to_blob(emb), ca) for role, content, emb, ca in rows], + ) + return len(rows) + + def recent(session_id: str, n: int = 10) -> list[Exchange]: """Last `n` exchanges from a session, oldest first.""" conn = _connection() diff --git a/pyproject.toml b/pyproject.toml index f035399..d333bfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ [project.scripts] lyra = "lyra.__main__:main" lyra-web = "lyra.web.server:serve" +lyra-import = "lyra.ingest:main" [dependency-groups] dev = [ -- 2.52.0 From 938305f17dd9f2a1f938ff27137215b7eb8ef0ca Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 16 Jun 2026 02:36:54 +0000 Subject: [PATCH 03/51] chore: update gitignore for export data --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5c96f54..858bb37 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,5 @@ data/ *.log #lyra Stuff -/core/relay/sessions/ \ No newline at end of file +/core/relay/sessions/ +/chat-gpt-export/ \ No newline at end of file -- 2.52.0 From 194e3e64b9244da79171e011cd4b43bff6d0c327 Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 16 Jun 2026 02:40:32 +0000 Subject: [PATCH 04/51] feat: import raw ChatGPT export (new sharded format) OpenAI's export changed: conversations.json is now sharded into conversations-000.json..NNN.json, each a JSON array of conversations with the mapping tree and per-message create_time. ingest now reads that format directly (supersedes the old convert/trim/split scripts): walks each conversation's mapping ordered by create_time, keeps text and multimodal_text (drops thoughts/reasoning_recap), captures real per-message timestamps, and imports idempotently by conversation_id. `lyra-import ` auto-detects raw-export vs legacy {title,messages} dirs; optional limit arg. Verified on 15 conversations: real dates, correct ordering, recall returns dated poker history. Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/ingest.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/lyra/ingest.py b/lyra/ingest.py index 7f6ea5b..ef027df 100644 --- a/lyra/ingest.py +++ b/lyra/ingest.py @@ -21,6 +21,10 @@ from lyra import llm, logbus, memory EMBED_BATCH = 64 EMBED_CHAR_CAP = 6000 # cap embed input size; full content is still stored +# Message content types worth keeping from a raw ChatGPT export. We drop +# 'thoughts' (internal chain-of-thought) and 'reasoning_recap' (meta). +KEEP_CONTENT_TYPES = {"text", "multimodal_text"} + def _session_id(path: Path) -> str: """Stable id derived from the filename, so re-imports don't duplicate.""" @@ -80,11 +84,98 @@ def import_dir(dirpath: str | Path, created_at: str | None = None) -> dict: return {"files": len(files), "sessions_imported": sessions, "exchanges": exchanges} +# --- Raw ChatGPT export (sharded conversations-*.json with timestamps) --- + + +def _ts_to_iso(ts: float | None, fallback: str) -> str: + if not ts: + return fallback + return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat() + + +def _message_text(msg: dict) -> str | None: + """Extract plain text from a ChatGPT message node, or None to skip it.""" + content = msg.get("content") or {} + if content.get("content_type") not in KEEP_CONTENT_TYPES: + return None + parts = [p for p in (content.get("parts") or []) if isinstance(p, str) and p.strip()] + text = "\n".join(parts).strip() + return text or None + + +def _convo_rows(convo: dict) -> list[tuple[float, str, str]]: + """(create_time, role, text) for each keepable message, chronologically.""" + rows: list[tuple[float, str, str]] = [] + conv_ct = convo.get("create_time") or 0 + for node in convo.get("mapping", {}).values(): + msg = node.get("message") + if not msg: + continue + role = (msg.get("author") or {}).get("role") + if role not in ("user", "assistant"): + continue + text = _message_text(msg) + if text is None: + continue + rows.append((msg.get("create_time") or conv_ct, role, text)) + rows.sort(key=lambda r: r[0] or 0) + return rows + + +def import_conversation(convo: dict) -> int: + """Import one raw-export conversation. Idempotent by conversation_id.""" + session_id = convo.get("conversation_id") or convo.get("id") + if not session_id or memory.history(session_id): + return 0 + rows = _convo_rows(convo) + if not rows: + return 0 + + memory.ensure_session(session_id, name=convo.get("title") or "untitled") + fallback = datetime.now(timezone.utc).isoformat() + exchanges: list[tuple[str, str, list[float], str]] = [] + for i in range(0, len(rows), EMBED_BATCH): + batch = rows[i : i + EMBED_BATCH] + embeddings = llm.embed([text[:EMBED_CHAR_CAP] for _, _, text in batch]) + for (ts, role, text), emb in zip(batch, embeddings): + exchanges.append((role, text, emb, _ts_to_iso(ts, fallback))) + return memory.add_exchanges_bulk(session_id, exchanges) + + +def import_export(export_dir: str | Path, limit: int | None = None) -> dict: + """Import a raw ChatGPT export directory (sharded conversations-*.json).""" + shards = sorted(Path(export_dir).glob("conversations-*.json")) + convos, exchanges, seen = 0, 0, 0 + for shard in shards: + for convo in json.loads(shard.read_text(encoding="utf-8")): + if limit is not None and seen >= limit: + break + seen += 1 + added = import_conversation(convo) + if added: + convos += 1 + exchanges += added + if limit is not None and seen >= limit: + break + logbus.log( + "info", "export import complete", + shards=len(shards), conversations=convos, exchanges=exchanges, + ) + return {"shards": len(shards), "conversations_imported": convos, "exchanges": exchanges} + + def main() -> int: if len(sys.argv) < 2: - print("usage: lyra-import ", file=sys.stderr) + print("usage: lyra-import [limit]", file=sys.stderr) return 2 - report = import_dir(sys.argv[1]) + path = Path(sys.argv[1]) + limit = int(sys.argv[2]) if len(sys.argv) > 2 else None + # A raw ChatGPT export has sharded conversations-*.json; otherwise treat the + # directory as legacy {title, messages} files. + if list(path.glob("conversations-*.json")): + report = import_export(path, limit=limit) + else: + report = import_dir(path) print(report) return 0 -- 2.52.0 From 071522ea330d578d5f599c0bee1d60e246bea917 Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 16 Jun 2026 04:08:41 +0000 Subject: [PATCH 05/51] feat: summarize-all batch (consolidation step 1) Harden summarize_session to chunk + merge long sessions (imported convos can exceed the local model's context), and add summarize_all: idempotent, resumable batch that summarizes every session needing it (skips up-to-date ones), with progress logged to the live log. `lyra-summarize [limit]` CLI. This is the first consolidation stage feeding the profile (semantic memory) and era-rollup tiers. Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/summary.py | 94 +++++++++++++++++++++++++++++++++++++++---------- pyproject.toml | 1 + 2 files changed, 76 insertions(+), 19 deletions(-) diff --git a/lyra/summary.py b/lyra/summary.py index 3b032d9..1844444 100644 --- a/lyra/summary.py +++ b/lyra/summary.py @@ -1,17 +1,23 @@ """Session summarization: compact a session's raw exchanges into a stored gist. -This is the compaction half of the tiered memory. Raw exchanges stay for detail -recall; the summary is what surfaces when an *older* session is recalled later — -"a month ago is a general idea," per the design. +This is the first consolidation stage. Raw exchanges stay for detail recall; the +summary is what surfaces when an *older* session is recalled, and it's the input +to the profile (semantic memory) and era-rollup tiers. + +Long sessions are summarized in chunks, then the partial gists are merged, so a +big imported conversation doesn't blow the local model's context window. """ from __future__ import annotations -from lyra import config, llm, logbus, memory -from lyra.llm import Backend +import sys -# Re-summarize a session once it has accumulated this many new raw exchanges -# beyond what its current summary covers. +from lyra import config, llm, logbus, memory +from lyra.llm import Backend, Message + +# Re-summarize a session once it has accumulated this many new raw exchanges. SUMMARIZE_AFTER = 20 +# Transcript budget per LLM call; longer sessions are chunked + merged. +MAX_TRANSCRIPT_CHARS = 24000 _PROMPT = """You are compacting a conversation into a long-term memory record \ (not replying to anyone). Write a concise gist of the session below: what was \ @@ -24,25 +30,43 @@ def _transcript(exchanges: list[memory.Exchange]) -> str: return "\n".join(f"{ex.role}: {ex.content}" for ex in exchanges) -def summarize_session(session_id: str, backend: Backend | None = None) -> str | None: - """(Re)generate and store the gist for a session. Returns the summary text. +def _chunk(text: str, budget: int) -> list[str]: + """Split on line boundaries into pieces under `budget` chars.""" + chunks, buf, size = [], [], 0 + for line in text.splitlines(keepends=True): + if size + len(line) > budget and buf: + chunks.append("".join(buf)) + buf, size = [], 0 + buf.append(line) + size += len(line) + if buf: + chunks.append("".join(buf)) + return chunks - Returns None if the session has no exchanges. The summarizer defaults to the - local backend so routine compaction stays free. - """ + +def _summarize_text(text: str, backend: Backend) -> str: + messages: list[Message] = [ + {"role": "system", "content": _PROMPT}, + {"role": "user", "content": text}, + ] + return llm.complete(messages, backend=backend) + + +def summarize_session(session_id: str, backend: Backend | None = None) -> str | None: + """(Re)generate and store the gist for a session. Returns the summary text.""" exchanges = memory.history(session_id) if not exchanges: return None backend = backend or config.load().summary_backend - messages = [ - {"role": "system", "content": _PROMPT}, - {"role": "user", "content": _transcript(exchanges)}, - ] - gist = llm.complete(messages, backend=backend) + transcript = _transcript(exchanges) + if len(transcript) <= MAX_TRANSCRIPT_CHARS: + gist = _summarize_text(transcript, backend) + else: + partials = [_summarize_text(c, backend) for c in _chunk(transcript, MAX_TRANSCRIPT_CHARS)] + gist = _summarize_text("Partial summaries to merge:\n\n" + "\n\n".join(partials), backend) - last_id = exchanges[-1].id - memory.store_summary(session_id, gist, last_id) + memory.store_summary(session_id, gist, exchanges[-1].id) logbus.log( "info", "summarized session", session=session_id, exchanges=len(exchanges), backend=backend, @@ -54,3 +78,35 @@ def maybe_summarize(session_id: str, backend: Backend | None = None) -> None: """Summarize the session if enough new turns have accumulated since last time.""" if memory.unsummarized_count(session_id) >= SUMMARIZE_AFTER: summarize_session(session_id, backend=backend) + + +def summarize_all(backend: Backend | None = None, limit: int | None = None) -> dict: + """Summarize every session that needs it. Idempotent and resumable: sessions + with an up-to-date summary are skipped, so re-running continues where it left off. + """ + sessions = memory.list_sessions() + done, skipped = 0, 0 + for s in sessions: + sid = s["id"] + if memory.get_summary(sid) and memory.unsummarized_count(sid) == 0: + skipped += 1 + continue + summarize_session(sid, backend=backend) + done += 1 + if done % 25 == 0: + logbus.log("info", "summarize-all progress", summarized=done, skipped=skipped) + if limit is not None and done >= limit: + break + report = {"summarized": done, "skipped": skipped, "total": len(sessions)} + logbus.log("info", "summarize-all complete", **report) + return report + + +def main() -> int: + limit = int(sys.argv[1]) if len(sys.argv) > 1 else None + print(summarize_all(limit=limit)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index d333bfa..eeff535 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ lyra = "lyra.__main__:main" lyra-web = "lyra.web.server:serve" lyra-import = "lyra.ingest:main" +lyra-summarize = "lyra.summary:main" [dependency-groups] dev = [ -- 2.52.0 From ecf0b852f9a25dbe7413513ce420e1e1b56931bb Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 16 Jun 2026 04:11:19 +0000 Subject: [PATCH 06/51] =?UTF-8?q?feat:=20profile=20layer=20=E2=80=94=20sem?= =?UTF-8?q?antic=20memory=20(consolidation=20step=202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Derive a standing profile of the user from session gists and inject it into every prompt, so identity/abstract questions ("what kind of player am I", "what are my leaks") are answered from distilled knowledge instead of noisy single-vector recall (which finds passages, not patterns). - memory: profile table + get/set_profile, list_summaries - lyra/profile.py: rebuild_profile map-reduces all gists (batch -> extract durable facts -> fold-merge) into one profile doc; `lyra-profile` CLI - chat.build_messages injects "What you know about Brian" after the persona Run after lyra-summarize (needs gists). Verified (stubbed): map-reduce, storage, and prompt injection. Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/chat.py | 8 +++++ lyra/memory.py | 47 +++++++++++++++++++++++++++ lyra/profile.py | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 4 files changed, 140 insertions(+) create mode 100644 lyra/profile.py diff --git a/lyra/chat.py b/lyra/chat.py index 551d58f..d4da349 100644 --- a/lyra/chat.py +++ b/lyra/chat.py @@ -39,6 +39,14 @@ def build_messages(session_id: str, user_msg: str) -> list[Message]: """Assemble the full, tiered message list for one turn.""" messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}] + # Semantic memory: the distilled profile (who Brian is) — answers identity + # questions that raw recall can't. Always in context when it exists. + profile = memory.get_profile() + if profile: + messages.append( + {"role": "system", "content": "What you know about Brian:\n" + profile} + ) + recent = memory.recent(session_id, n=RECENT_N) recent_ids = {ex.id for ex in recent} diff --git a/lyra/memory.py b/lyra/memory.py index 6001da4..827295a 100644 --- a/lyra/memory.py +++ b/lyra/memory.py @@ -43,6 +43,15 @@ CREATE TABLE IF NOT EXISTS summaries ( last_exchange_id INTEGER NOT NULL, created_at TEXT NOT NULL ); + +-- Derived semantic memory: standing facts about the user, distilled from the +-- session gists by the consolidation pass. Single row (id='self'). +CREATE TABLE IF NOT EXISTS profile ( + id TEXT PRIMARY KEY, + content TEXT NOT NULL, + sessions_covered INTEGER NOT NULL, + updated_at TEXT NOT NULL +); """ _conn: sqlite3.Connection | None = None @@ -290,6 +299,44 @@ def unsummarized_count(session_id: str) -> int: return int(r["n"]) +def list_summaries() -> list[Summary]: + """Every session gist (for the profile/era consolidation passes).""" + conn = _connection() + rows = conn.execute( + "SELECT session_id, content, last_exchange_id, created_at FROM summaries " + "ORDER BY created_at ASC" + ).fetchall() + return [ + Summary( + session_id=r["session_id"], + content=r["content"], + last_exchange_id=r["last_exchange_id"], + created_at=r["created_at"], + ) + for r in rows + ] + + +def set_profile(content: str, sessions_covered: int, profile_id: str = "self") -> None: + """Store/replace the derived semantic profile.""" + now = datetime.now(timezone.utc).isoformat() + conn = _connection() + with conn: + conn.execute( + "INSERT INTO profile (id, content, sessions_covered, updated_at) " + "VALUES (?, ?, ?, ?) " + "ON CONFLICT(id) DO UPDATE SET content=excluded.content, " + "sessions_covered=excluded.sessions_covered, updated_at=excluded.updated_at", + (profile_id, content, sessions_covered, now), + ) + + +def get_profile(profile_id: str = "self") -> str | None: + conn = _connection() + r = conn.execute("SELECT content FROM profile WHERE id = ?", (profile_id,)).fetchone() + return r["content"] if r else None + + def recall_summaries(query: str, k: int = 3, exclude_session: str | None = None) -> list[Summary]: """Top-k session summaries most similar to `query` (the long-term gist tier).""" [q_vec] = llm.embed([query]) diff --git a/lyra/profile.py b/lyra/profile.py new file mode 100644 index 0000000..3929f8e --- /dev/null +++ b/lyra/profile.py @@ -0,0 +1,84 @@ +"""Profile derivation: distill standing facts about the user (semantic memory). + +This is consolidation step 2. It reads every session gist and map-reduces them +into one profile document — who Brian is as a player and person — which is then +injected into every prompt. This is what answers identity/abstract questions +("what kind of player am I", "what are my leaks") that raw recall handles badly, +because those are patterns across many sessions, not facts in any single message. +""" +from __future__ import annotations + + +from lyra import config, llm, logbus, memory +from lyra.llm import Backend, Message + +BATCH_CHARS = 18000 + +_MAP_PROMPT = """From these session summaries, extract durable facts about Brian \ +— things that are stably true, not one-off events. Cover, where present: poker \ +games/formats/stakes he plays, his playing style and strengths, recurring leaks \ +and tendencies, mental-game patterns (tilt triggers, scared money, fatigue), \ +relevant personal context, and how he likes to be coached. Terse bullet points. \ +Omit anything not supported by the summaries.""" + +_REDUCE_PROMPT = """Merge these fact lists into one deduplicated profile of Brian. \ +Organize under these headings: Poker Style, Leaks & Tendencies, Mental Game, \ +Personal Context, Working With Brian. Keep it tight — bullets, no fluff, no \ +repetition. Resolve contradictions toward the more recent/frequent signal.""" + + +def _batch_texts(texts: list[str], budget: int) -> list[str]: + """Group texts into joined blocks under `budget` chars.""" + blocks, buf, size = [], [], 0 + for t in texts: + if size + len(t) > budget and buf: + blocks.append("\n\n".join(buf)) + buf, size = [], 0 + buf.append(t) + size += len(t) + if buf: + blocks.append("\n\n".join(buf)) + return blocks + + +def _call(prompt: str, body: str, backend: Backend) -> str: + messages: list[Message] = [ + {"role": "system", "content": prompt}, + {"role": "user", "content": body}, + ] + return llm.complete(messages, backend=backend) + + +def rebuild_profile(backend: Backend | None = None) -> str | None: + """Re-derive the profile from all current session gists and store it.""" + backend = backend or config.load().summary_backend + summaries = memory.list_summaries() + if not summaries: + return None + + # MAP: extract facts from batches of gists. + blocks = _batch_texts([s.content for s in summaries], BATCH_CHARS) + partials = [_call(_MAP_PROMPT, b, backend) for b in blocks] + logbus.log("info", "profile map done", batches=len(partials), sessions=len(summaries)) + + # REDUCE: fold partials together until one remains. + while len(partials) > 1: + partials = [_call(_REDUCE_PROMPT, g, backend) for g in _batch_texts(partials, BATCH_CHARS)] + profile = partials[0] + + memory.set_profile(profile, len(summaries)) + logbus.log("info", "profile rebuilt", sessions=len(summaries), chars=len(profile)) + return profile + + +def main() -> int: + profile = rebuild_profile() + if profile is None: + print("No summaries yet — run lyra-summarize first.") + return 1 + print(profile) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index eeff535..221e029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ lyra = "lyra.__main__:main" lyra-web = "lyra.web.server:serve" lyra-import = "lyra.ingest:main" lyra-summarize = "lyra.summary:main" +lyra-profile = "lyra.profile:main" [dependency-groups] dev = [ -- 2.52.0 From 30185f3fd8b1eb49be1853f705ddfb00b5440550 Mon Sep 17 00:00:00 2001 From: serversdown Date: Tue, 16 Jun 2026 05:37:22 +0000 Subject: [PATCH 07/51] feat: MI50 as a Lyra backend (OpenAI-compatible local GPU) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MI50 box (CT202) runs an OpenAI-compatible llama.cpp server on 10.0.0.44:8080. Wire it in as a third backend: - llm.complete gains backend="mi50" (OpenAI client pointed at MI50_BASE_URL) - config: MI50_BASE_URL (default http://10.0.0.44:8080/v1) + MI50_MODEL - chat.respond labels the model per backend; web _backend_for maps "mi50" - UI backend selector adds "MI50 — local GPU" Verified end-to-end: llm.complete(backend="mi50") returns from the live server. See homelab-inference memory for the box topology. Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 4 ++++ lyra/chat.py | 4 +++- lyra/config.py | 4 ++++ lyra/llm.py | 8 +++++++- lyra/web/server.py | 5 ++++- lyra/web/static/index.html | 5 +++++ 6 files changed, 27 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index ff584cc..82e07e9 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,10 @@ LOCAL_BASE_URL=http://localhost:11434 LOCAL_MODEL=qwen2.5:7b-instruct +# MI50 backend — OpenAI-compatible llama.cpp server on the home-lab GPU box. +MI50_BASE_URL=http://10.0.0.44:8080/v1 +MI50_MODEL=local-gpu + # Cloud backend (OpenAI) — higher quality, costs money. OPENAI_API_KEY= CLOUD_MODEL=gpt-4o-mini diff --git a/lyra/chat.py b/lyra/chat.py index d4da349..0211fe2 100644 --- a/lyra/chat.py +++ b/lyra/chat.py @@ -81,7 +81,9 @@ def build_messages(session_id: str, user_msg: str) -> list[Message]: def respond(session_id: str, user_msg: str, backend: Backend = "cloud") -> str: """Produce Lyra's reply to a single user message and persist the exchange.""" cfg = config.load() - model = cfg.local_model if backend == "local" else cfg.cloud_model + model = {"local": cfg.local_model, "cloud": cfg.cloud_model, "mi50": cfg.mi50_model}.get( + backend, backend + ) logbus.log( "info", "chat request", session=session_id, backend=backend, model=model, embed=cfg.embed_backend, diff --git a/lyra/config.py b/lyra/config.py index 3b9b633..8405ee3 100644 --- a/lyra/config.py +++ b/lyra/config.py @@ -14,6 +14,8 @@ load_dotenv() class Config: local_base_url: str local_model: str + mi50_base_url: str # OpenAI-compatible llama.cpp server on the MI50 box + mi50_model: str openai_api_key: str cloud_model: str embed_backend: str # "cloud" (OpenAI) or "local" (Ollama) @@ -27,6 +29,8 @@ def load() -> Config: return Config( local_base_url=os.getenv("LOCAL_BASE_URL", "http://localhost:11434"), local_model=os.getenv("LOCAL_MODEL", "qwen2.5:7b-instruct"), + mi50_base_url=os.getenv("MI50_BASE_URL", "http://10.0.0.44:8080/v1"), + mi50_model=os.getenv("MI50_MODEL", "local-gpu"), openai_api_key=os.getenv("OPENAI_API_KEY", ""), cloud_model=os.getenv("CLOUD_MODEL", "gpt-4o-mini"), embed_backend=os.getenv("EMBED_BACKEND", "cloud").lower(), diff --git a/lyra/llm.py b/lyra/llm.py index 71e3fd0..471c6f9 100644 --- a/lyra/llm.py +++ b/lyra/llm.py @@ -14,7 +14,7 @@ class Message(TypedDict): content: str -Backend = Literal["local", "cloud"] +Backend = Literal["local", "cloud", "mi50"] def complete(messages: list[Message], backend: Backend = "local") -> str: @@ -26,6 +26,12 @@ def complete(messages: list[Message], backend: Backend = "local") -> str: resp = client.chat.completions.create(model=cfg.cloud_model, messages=messages) return resp.choices[0].message.content or "" + if backend == "mi50": + # MI50 box runs an OpenAI-compatible llama.cpp server; key is unused. + client = OpenAI(api_key="not-needed", base_url=cfg.mi50_base_url) + resp = client.chat.completions.create(model=cfg.mi50_model, messages=messages) + return resp.choices[0].message.content or "" + resp = httpx.post( f"{cfg.local_base_url}/api/chat", json={"model": cfg.local_model, "messages": messages, "stream": False}, diff --git a/lyra/web/server.py b/lyra/web/server.py index 94a9d13..b684401 100644 --- a/lyra/web/server.py +++ b/lyra/web/server.py @@ -32,7 +32,10 @@ _CLOUD = {"OPENAI", "cloud", "custom"} def _backend_for(label: str | None) -> Backend: - if label and label.upper() in {"PRIMARY", "SECONDARY", "FALLBACK", "LOCAL"}: + key = (label or "").lower() + if key == "mi50": + return "mi50" + if key in {"local", "primary", "secondary", "fallback"}: return "local" return "cloud" diff --git a/lyra/web/static/index.html b/lyra/web/static/index.html index adb30a6..411c737 100644 --- a/lyra/web/static/index.html +++ b/lyra/web/static/index.html @@ -123,6 +123,11 @@ Local — Ollama Free, private, runs on your home lab (LOCAL_MODEL) +