project-lyra/lyra/web/server.py

"""Web server for the vendored chat UI.

Serves the static single-page UI and implements the small endpoint contract it
expects (originally provided by the old Node relay), backed by the new Python
chat loop and SQLite memory. SQLite is the single source of truth for messages:
`/v1/chat/completions` persists via `chat.respond`, so the UI's `POST /sessions`
saves are accepted but treated as no-ops (the row is ensured, messages are not
re-stored).
"""
from __future__ import annotations

import asyncio
from pathlib import Path

from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
from fastapi.staticfiles import StaticFiles

from lyra import chat, memory
from lyra.llm import Backend

_STATIC = Path(__file__).parent / "static"

# UI backend labels -> our two backends. Cloud is the default.
_CLOUD = {"OPENAI", "cloud", "custom"}


def _backend_for(label: str | None) -> Backend:
    if label and label.upper() in {"PRIMARY", "SECONDARY", "FALLBACK", "LOCAL"}:
        return "local"
    return "cloud"


def _last_user_message(messages: list[dict]) -> str:
    for m in reversed(messages):
        if m.get("role") == "user":
            return m.get("content", "")
    return messages[-1].get("content", "") if messages else ""


def create_app() -> FastAPI:
    app = FastAPI(title="Lyra Web")

    @app.get("/_health")
    async def health() -> dict:
        return {"ok": True}

    @app.get("/sessions")
    async def list_sessions() -> list[dict]:
        return memory.list_sessions()

    @app.get("/sessions/{session_id}")
    async def get_session(session_id: str) -> list[dict]:
        return [{"role": ex.role, "content": ex.content} for ex in memory.history(session_id)]

    @app.post("/sessions/{session_id}")
    async def save_session(session_id: str, request: Request) -> dict:
        # Messages are already persisted by chat.respond; just ensure the row exists.
        await request.body()  # drain the history payload we intentionally ignore
        memory.ensure_session(session_id)
        return {"ok": True}

    @app.patch("/sessions/{session_id}/metadata")
    async def rename_session(session_id: str, request: Request) -> dict:
        body = await request.json()
        memory.ensure_session(session_id, name=body.get("name"))
        return {"ok": True}

    @app.delete("/sessions/{session_id}")
    async def delete_session(session_id: str) -> dict:
        memory.delete_session(session_id)
        return {"ok": True}

    @app.post("/v1/chat/completions")
    async def chat_completions(request: Request) -> dict:
        body = await request.json()
        session_id = body.get("sessionId") or "default"
        backend = _backend_for(body.get("backend"))
        user_msg = _last_user_message(body.get("messages", []))

        memory.ensure_session(session_id)
        reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)

        return {
            "object": "chat.completion",
            "choices": [
                {
                    "index": 0,
                    "message": {"role": "assistant", "content": reply},
                    "finish_reason": "stop",
                }
            ],
        }

    @app.get("/stream/thinking/{session_id}")
    async def thinking_stream(session_id: str) -> StreamingResponse:
        # Inert until cognitive layers exist: open the stream, emit keep-alives only.
        async def gen():
            yield ": connected\n\n"
            while True:
                await asyncio.sleep(25)
                yield ": keep-alive\n\n"

        return StreamingResponse(gen(), media_type="text/event-stream")

    # Static UI last, so the API routes above take precedence. html=True serves
    # index.html at "/" and assets (style.css, manifest.json) at their paths.
    app.mount("/", StaticFiles(directory=str(_STATIC), html=True), name="ui")
    return app


app = create_app()


def serve() -> None:
    """Console-script entry: `lyra-web`."""
    import os

    import uvicorn

    host = os.getenv("LYRA_WEB_HOST", "0.0.0.0")
    port = int(os.getenv("LYRA_WEB_PORT", "7078"))
    uvicorn.run(app, host=host, port=port)


if __name__ == "__main__":
    serve()