feat: persona chat loop, web UI, and local (Ollama) embeddings

Phase 1 — persona + persistent memory chat loop: - lyra/persona.py + personas/lyra.md: editable identity/voice (friend-first, honest, never invents poker math) - lyra/chat.py: turn loop assembling persona + cross-session recall + recent context, persisting both sides to SQLite - lyra/session.py, lyra/__main__.py: session lifecycle + `lyra` REPL Phase 1.25 — reuse the old web UI: - vendored the prior single-page UI into lyra/web/static, repointed to same-origin - lyra/web/server.py (FastAPI): serves the UI and backs its endpoint contract (/v1/chat/completions, session CRUD, health, inert thinking-stream) with the new chat loop + memory; SQLite stays the single source of truth - `lyra-web` console script Local backends — test for free, no OpenAI key: - llm.embed routes via EMBED_BACKEND (cloud=OpenAI, local=Ollama /api/embed) - simplified UI backend selector to Local (Ollama) / Cloud (OpenAI), default local - memory connection opened check_same_thread=False for the threaded server Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 18:36:31 +00:00
parent 6d88505697
commit 3b9e0bb1e0
17 changed files with 2973 additions and 4 deletions
@@ -0,0 +1,127 @@
+"""Web server for the vendored chat UI.
+
+Serves the static single-page UI and implements the small endpoint contract it
+expects (originally provided by the old Node relay), backed by the new Python
+chat loop and SQLite memory. SQLite is the single source of truth for messages:
+`/v1/chat/completions` persists via `chat.respond`, so the UI's `POST /sessions`
+saves are accepted but treated as no-ops (the row is ensured, messages are not
+re-stored).
+"""
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse
+from fastapi.staticfiles import StaticFiles
+
+from lyra import chat, memory
+from lyra.llm import Backend
+
+_STATIC = Path(__file__).parent / "static"
+
+# UI backend labels -> our two backends. Cloud is the default.
+_CLOUD = {"OPENAI", "cloud", "custom"}
+
+
+def _backend_for(label: str | None) -> Backend:
+    if label and label.upper() in {"PRIMARY", "SECONDARY", "FALLBACK", "LOCAL"}:
+        return "local"
+    return "cloud"
+
+
+def _last_user_message(messages: list[dict]) -> str:
+    for m in reversed(messages):
+        if m.get("role") == "user":
+            return m.get("content", "")
+    return messages[-1].get("content", "") if messages else ""
+
+
+def create_app() -> FastAPI:
+    app = FastAPI(title="Lyra Web")
+
+    @app.get("/_health")
+    async def health() -> dict:
+        return {"ok": True}
+
+    @app.get("/sessions")
+    async def list_sessions() -> list[dict]:
+        return memory.list_sessions()
+
+    @app.get("/sessions/{session_id}")
+    async def get_session(session_id: str) -> list[dict]:
+        return [{"role": ex.role, "content": ex.content} for ex in memory.history(session_id)]
+
+    @app.post("/sessions/{session_id}")
+    async def save_session(session_id: str, request: Request) -> dict:
+        # Messages are already persisted by chat.respond; just ensure the row exists.
+        await request.body()  # drain the history payload we intentionally ignore
+        memory.ensure_session(session_id)
+        return {"ok": True}
+
+    @app.patch("/sessions/{session_id}/metadata")
+    async def rename_session(session_id: str, request: Request) -> dict:
+        body = await request.json()
+        memory.ensure_session(session_id, name=body.get("name"))
+        return {"ok": True}
+
+    @app.delete("/sessions/{session_id}")
+    async def delete_session(session_id: str) -> dict:
+        memory.delete_session(session_id)
+        return {"ok": True}
+
+    @app.post("/v1/chat/completions")
+    async def chat_completions(request: Request) -> dict:
+        body = await request.json()
+        session_id = body.get("sessionId") or "default"
+        backend = _backend_for(body.get("backend"))
+        user_msg = _last_user_message(body.get("messages", []))
+
+        memory.ensure_session(session_id)
+        reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
+
+        return {
+            "object": "chat.completion",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": reply},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+
+    @app.get("/stream/thinking/{session_id}")
+    async def thinking_stream(session_id: str) -> StreamingResponse:
+        # Inert until cognitive layers exist: open the stream, emit keep-alives only.
+        async def gen():
+            yield ": connected\n\n"
+            while True:
+                await asyncio.sleep(25)
+                yield ": keep-alive\n\n"
+
+        return StreamingResponse(gen(), media_type="text/event-stream")
+
+    # Static UI last, so the API routes above take precedence. html=True serves
+    # index.html at "/" and assets (style.css, manifest.json) at their paths.
+    app.mount("/", StaticFiles(directory=str(_STATIC), html=True), name="ui")
+    return app
+
+
+app = create_app()
+
+
+def serve() -> None:
+    """Console-script entry: `lyra-web`."""
+    import os
+
+    import uvicorn
+
+    host = os.getenv("LYRA_WEB_HOST", "0.0.0.0")
+    port = int(os.getenv("LYRA_WEB_PORT", "7078"))
+    uvicorn.run(app, host=host, port=port)
+
+
+if __name__ == "__main__":
+    serve()