3b9e0bb1e0
Phase 1 — persona + persistent memory chat loop: - lyra/persona.py + personas/lyra.md: editable identity/voice (friend-first, honest, never invents poker math) - lyra/chat.py: turn loop assembling persona + cross-session recall + recent context, persisting both sides to SQLite - lyra/session.py, lyra/__main__.py: session lifecycle + `lyra` REPL Phase 1.25 — reuse the old web UI: - vendored the prior single-page UI into lyra/web/static, repointed to same-origin - lyra/web/server.py (FastAPI): serves the UI and backs its endpoint contract (/v1/chat/completions, session CRUD, health, inert thinking-stream) with the new chat loop + memory; SQLite stays the single source of truth - `lyra-web` console script Local backends — test for free, no OpenAI key: - llm.embed routes via EMBED_BACKEND (cloud=OpenAI, local=Ollama /api/embed) - simplified UI backend selector to Local (Ollama) / Cloud (OpenAI), default local - memory connection opened check_same_thread=False for the threaded server Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
128 lines
4.1 KiB
Python
128 lines
4.1 KiB
Python
"""Web server for the vendored chat UI.
|
|
|
|
Serves the static single-page UI and implements the small endpoint contract it
|
|
expects (originally provided by the old Node relay), backed by the new Python
|
|
chat loop and SQLite memory. SQLite is the single source of truth for messages:
|
|
`/v1/chat/completions` persists via `chat.respond`, so the UI's `POST /sessions`
|
|
saves are accepted but treated as no-ops (the row is ensured, messages are not
|
|
re-stored).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import StreamingResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
from lyra import chat, memory
|
|
from lyra.llm import Backend
|
|
|
|
_STATIC = Path(__file__).parent / "static"
|
|
|
|
# UI backend labels -> our two backends. Cloud is the default.
|
|
_CLOUD = {"OPENAI", "cloud", "custom"}
|
|
|
|
|
|
def _backend_for(label: str | None) -> Backend:
|
|
if label and label.upper() in {"PRIMARY", "SECONDARY", "FALLBACK", "LOCAL"}:
|
|
return "local"
|
|
return "cloud"
|
|
|
|
|
|
def _last_user_message(messages: list[dict]) -> str:
|
|
for m in reversed(messages):
|
|
if m.get("role") == "user":
|
|
return m.get("content", "")
|
|
return messages[-1].get("content", "") if messages else ""
|
|
|
|
|
|
def create_app() -> FastAPI:
|
|
app = FastAPI(title="Lyra Web")
|
|
|
|
@app.get("/_health")
|
|
async def health() -> dict:
|
|
return {"ok": True}
|
|
|
|
@app.get("/sessions")
|
|
async def list_sessions() -> list[dict]:
|
|
return memory.list_sessions()
|
|
|
|
@app.get("/sessions/{session_id}")
|
|
async def get_session(session_id: str) -> list[dict]:
|
|
return [{"role": ex.role, "content": ex.content} for ex in memory.history(session_id)]
|
|
|
|
@app.post("/sessions/{session_id}")
|
|
async def save_session(session_id: str, request: Request) -> dict:
|
|
# Messages are already persisted by chat.respond; just ensure the row exists.
|
|
await request.body() # drain the history payload we intentionally ignore
|
|
memory.ensure_session(session_id)
|
|
return {"ok": True}
|
|
|
|
@app.patch("/sessions/{session_id}/metadata")
|
|
async def rename_session(session_id: str, request: Request) -> dict:
|
|
body = await request.json()
|
|
memory.ensure_session(session_id, name=body.get("name"))
|
|
return {"ok": True}
|
|
|
|
@app.delete("/sessions/{session_id}")
|
|
async def delete_session(session_id: str) -> dict:
|
|
memory.delete_session(session_id)
|
|
return {"ok": True}
|
|
|
|
@app.post("/v1/chat/completions")
|
|
async def chat_completions(request: Request) -> dict:
|
|
body = await request.json()
|
|
session_id = body.get("sessionId") or "default"
|
|
backend = _backend_for(body.get("backend"))
|
|
user_msg = _last_user_message(body.get("messages", []))
|
|
|
|
memory.ensure_session(session_id)
|
|
reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
|
|
|
|
return {
|
|
"object": "chat.completion",
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"message": {"role": "assistant", "content": reply},
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
}
|
|
|
|
@app.get("/stream/thinking/{session_id}")
|
|
async def thinking_stream(session_id: str) -> StreamingResponse:
|
|
# Inert until cognitive layers exist: open the stream, emit keep-alives only.
|
|
async def gen():
|
|
yield ": connected\n\n"
|
|
while True:
|
|
await asyncio.sleep(25)
|
|
yield ": keep-alive\n\n"
|
|
|
|
return StreamingResponse(gen(), media_type="text/event-stream")
|
|
|
|
# Static UI last, so the API routes above take precedence. html=True serves
|
|
# index.html at "/" and assets (style.css, manifest.json) at their paths.
|
|
app.mount("/", StaticFiles(directory=str(_STATIC), html=True), name="ui")
|
|
return app
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
|
def serve() -> None:
|
|
"""Console-script entry: `lyra-web`."""
|
|
import os
|
|
|
|
import uvicorn
|
|
|
|
host = os.getenv("LYRA_WEB_HOST", "0.0.0.0")
|
|
port = int(os.getenv("LYRA_WEB_PORT", "7078"))
|
|
uvicorn.run(app, host=host, port=port)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
serve()
|