d7c258eba0
Older sessions fade to a general idea; details stay retrievable.
- memory: summaries table (one compacted gist per session, embedded), plus
store_summary/get_summary/recall_summaries and unsummarized_count (tracks
exchanges newer than the current summary)
- lyra/summary.py: summarize_session compacts a session's raw turns into a
third-person gist (default SUMMARY_BACKEND=local, so compaction is free);
maybe_summarize re-summarizes once SUMMARIZE_AFTER new turns accumulate
- chat.build_messages now layers context in tiers: persona -> gists of other
sessions -> a few sharp raw cross-session details -> current session raw
turns -> new message; respond() compacts the session after each turn
- web: POST /sessions/{id}/summarize to compact on demand
- summarization activity surfaces in the live log
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
154 lines
5.0 KiB
Python
154 lines
5.0 KiB
Python
"""Web server for the vendored chat UI.
|
|
|
|
Serves the static single-page UI and implements the small endpoint contract it
|
|
expects (originally provided by the old Node relay), backed by the new Python
|
|
chat loop and SQLite memory. SQLite is the single source of truth for messages:
|
|
`/v1/chat/completions` persists via `chat.respond`, so the UI's `POST /sessions`
|
|
saves are accepted but treated as no-ops (the row is ensured, messages are not
|
|
re-stored).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import StreamingResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
from lyra import chat, logbus, memory, summary
|
|
from lyra.llm import Backend
|
|
|
|
|
|
def _sse(event: dict) -> str:
|
|
return f"data: {json.dumps(event)}\n\n"
|
|
|
|
_STATIC = Path(__file__).parent / "static"
|
|
|
|
# UI backend labels -> our two backends. Cloud is the default.
|
|
_CLOUD = {"OPENAI", "cloud", "custom"}
|
|
|
|
|
|
def _backend_for(label: str | None) -> Backend:
|
|
if label and label.upper() in {"PRIMARY", "SECONDARY", "FALLBACK", "LOCAL"}:
|
|
return "local"
|
|
return "cloud"
|
|
|
|
|
|
def _last_user_message(messages: list[dict]) -> str:
|
|
for m in reversed(messages):
|
|
if m.get("role") == "user":
|
|
return m.get("content", "")
|
|
return messages[-1].get("content", "") if messages else ""
|
|
|
|
|
|
def create_app() -> FastAPI:
|
|
app = FastAPI(title="Lyra Web")
|
|
|
|
@app.get("/_health")
|
|
async def health() -> dict:
|
|
return {"ok": True}
|
|
|
|
@app.get("/sessions")
|
|
async def list_sessions() -> list[dict]:
|
|
return memory.list_sessions()
|
|
|
|
@app.get("/sessions/{session_id}")
|
|
async def get_session(session_id: str) -> list[dict]:
|
|
return [{"role": ex.role, "content": ex.content} for ex in memory.history(session_id)]
|
|
|
|
@app.post("/sessions/{session_id}")
|
|
async def save_session(session_id: str, request: Request) -> dict:
|
|
# Messages are already persisted by chat.respond; just ensure the row exists.
|
|
await request.body() # drain the history payload we intentionally ignore
|
|
memory.ensure_session(session_id)
|
|
return {"ok": True}
|
|
|
|
@app.patch("/sessions/{session_id}/metadata")
|
|
async def rename_session(session_id: str, request: Request) -> dict:
|
|
body = await request.json()
|
|
memory.ensure_session(session_id, name=body.get("name"))
|
|
return {"ok": True}
|
|
|
|
@app.delete("/sessions/{session_id}")
|
|
async def delete_session(session_id: str) -> dict:
|
|
memory.delete_session(session_id)
|
|
return {"ok": True}
|
|
|
|
@app.post("/sessions/{session_id}/summarize")
|
|
async def summarize(session_id: str) -> dict:
|
|
gist = await asyncio.to_thread(summary.summarize_session, session_id)
|
|
return {"ok": gist is not None, "summary": gist}
|
|
|
|
@app.post("/v1/chat/completions")
|
|
async def chat_completions(request: Request) -> dict:
|
|
body = await request.json()
|
|
session_id = body.get("sessionId") or "default"
|
|
backend = _backend_for(body.get("backend"))
|
|
user_msg = _last_user_message(body.get("messages", []))
|
|
|
|
memory.ensure_session(session_id)
|
|
try:
|
|
reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
|
|
except Exception as exc:
|
|
logbus.log("error", "chat failed", session=session_id, error=str(exc))
|
|
reply = f"[error] {exc}"
|
|
|
|
return {
|
|
"object": "chat.completion",
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"message": {"role": "assistant", "content": reply},
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
}
|
|
|
|
@app.get("/stream/logs")
|
|
async def stream_logs(request: Request) -> StreamingResponse:
|
|
"""Live activity feed: replay the recent buffer, then stream new events."""
|
|
async def gen():
|
|
backlog = logbus.since(0)
|
|
last = backlog[-1]["seq"] if backlog else 0
|
|
for e in backlog:
|
|
yield _sse(e)
|
|
yield _sse(
|
|
{"seq": last, "ts": time.time(), "level": "system",
|
|
"msg": "live log connected", "fields": {}}
|
|
)
|
|
while True:
|
|
if await request.is_disconnected():
|
|
break
|
|
for e in logbus.since(last):
|
|
last = e["seq"]
|
|
yield _sse(e)
|
|
await asyncio.sleep(0.5)
|
|
|
|
return StreamingResponse(gen(), media_type="text/event-stream")
|
|
|
|
# Static UI last, so the API routes above take precedence. html=True serves
|
|
# index.html at "/" and assets (style.css, manifest.json) at their paths.
|
|
app.mount("/", StaticFiles(directory=str(_STATIC), html=True), name="ui")
|
|
return app
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
|
def serve() -> None:
|
|
"""Console-script entry: `lyra-web`."""
|
|
import os
|
|
|
|
import uvicorn
|
|
|
|
host = os.getenv("LYRA_WEB_HOST", "0.0.0.0")
|
|
port = int(os.getenv("LYRA_WEB_PORT", "7078"))
|
|
uvicorn.run(app, host=host, port=port)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
serve()
|