Files
project-lyra/lyra/web/server.py
T
serversdown dfb6425395 feat: session modes (Talk/Cash) + live session HUD
Lyra now switches register based on what she's doing at the table instead of
being a wishy-washy companion mid-session.

Modes (lyra/modes.py):
- Talk (default companion) + Cash (live cash copilot); a mode = prompt card +
  tool allow-list. Tool gating via tools.specs(allow=).
- Two-register Cash voice: act-first one-line logging when fed facts; full warm
  companion voice for strategy / tilt / mental game.
- mode persisted per chat session (new sessions.mode column); auto-switch into
  Cash when start_session fires; UI forces cloud backend in Cash (tools only
  fire there).

Stack tracking + HUD:
- log_stack tool + poker_stack_log table; live net while sitting (stack - buy-in).
- poker.hud() bundle; /session HUD page (stack sparkline, hands, villains, notes,
  stats) polling /session/data every 5s; Talk/Cash switcher + Session nav.

Endpoints: /session, /session/data, GET/POST /sessions/{id}/mode, /modes.
tests/test_modes.py (gating, mode roundtrip, stack/HUD); 36 tests green. v0.3.0.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 05:28:15 +00:00

321 lines
12 KiB
Python

"""Web server for the vendored chat UI.
Serves the static single-page UI and implements the small endpoint contract it
expects (originally provided by the old Node relay), backed by the new Python
chat loop and SQLite memory. SQLite is the single source of truth for messages:
`/v1/chat/completions` persists via `chat.respond`, so the UI's `POST /sessions`
saves are accepted but treated as no-ops (the row is ensured, messages are not
re-stored).
"""
from __future__ import annotations
import asyncio
import json
import time
from pathlib import Path
from fastapi import FastAPI, Request, Response
from fastapi.responses import FileResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from lyra import chat, logbus, memory, modes, poker, self_state, summary
from lyra.llm import Backend
def _sse(event: dict) -> str:
return f"data: {json.dumps(event)}\n\n"
_STATIC = Path(__file__).parent / "static"
# UI backend labels -> our two backends. Cloud is the default.
_CLOUD = {"OPENAI", "cloud", "custom"}
def _backend_for(label: str | None) -> Backend:
key = (label or "").lower()
if key == "mi50":
return "mi50"
if key in {"local", "primary", "secondary", "fallback"}:
return "local"
return "cloud"
def _last_user_message(messages: list[dict]) -> str:
for m in reversed(messages):
if m.get("role") == "user":
return m.get("content", "")
return messages[-1].get("content", "") if messages else ""
def create_app() -> FastAPI:
app = FastAPI(title="Lyra Web")
@app.get("/_health")
async def health() -> dict:
return {"ok": True}
@app.get("/sessions")
async def list_sessions() -> list[dict]:
return memory.list_sessions()
@app.get("/sessions/{session_id}")
async def get_session(session_id: str) -> list[dict]:
return [{"role": ex.role, "content": ex.content} for ex in memory.history(session_id)]
@app.post("/sessions/{session_id}")
async def save_session(session_id: str, request: Request) -> dict:
# Messages are already persisted by chat.respond; just ensure the row exists.
await request.body() # drain the history payload we intentionally ignore
memory.ensure_session(session_id)
return {"ok": True}
@app.patch("/sessions/{session_id}/metadata")
async def rename_session(session_id: str, request: Request) -> dict:
body = await request.json()
memory.ensure_session(session_id, name=body.get("name"))
return {"ok": True}
@app.delete("/sessions/{session_id}")
async def delete_session(session_id: str) -> dict:
memory.delete_session(session_id)
return {"ok": True}
@app.post("/sessions/{session_id}/summarize")
async def summarize(session_id: str) -> dict:
gist = await asyncio.to_thread(summary.summarize_session, session_id)
return {"ok": gist is not None, "summary": gist}
@app.get("/modes")
async def list_modes() -> dict:
"""Available conversation modes, for the UI switcher."""
return {"modes": modes.listing(), "default": modes.DEFAULT}
@app.get("/sessions/{session_id}/mode")
async def get_mode(session_id: str) -> dict:
return {"mode": memory.get_session_mode(session_id) or modes.DEFAULT}
@app.post("/sessions/{session_id}/mode")
async def set_mode(session_id: str, request: Request) -> dict:
body = await request.json()
mode = body.get("mode") or modes.DEFAULT
memory.set_session_mode(session_id, mode)
logbus.log("info", "mode set", session=session_id, mode=mode)
return {"ok": True, "mode": mode}
@app.get("/session")
async def session_hud_page() -> FileResponse:
"""Live session HUD — stack, hands, villains, notes for the open session."""
return FileResponse(str(_STATIC / "session.html"))
@app.get("/session/data")
async def session_hud_data() -> dict:
"""The current live session's HUD bundle (or {session: None} if none open)."""
bundle = await asyncio.to_thread(poker.hud)
return bundle or {"session": None}
@app.post("/v1/chat/completions")
async def chat_completions(request: Request) -> dict:
body = await request.json()
session_id = body.get("sessionId") or "default"
backend = _backend_for(body.get("backend"))
user_msg = _last_user_message(body.get("messages", []))
model_override = body.get("model") or None
memory.ensure_session(session_id)
if body.get("mode"):
memory.set_session_mode(session_id, body["mode"])
try:
reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend, model_override)
except Exception as exc:
logbus.log("error", "chat failed", session=session_id, error=str(exc))
reply = f"[error] {exc}"
return {
"object": "chat.completion",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": reply},
"finish_reason": "stop",
}
],
}
@app.post("/v1/chat/stream")
async def chat_stream(request: Request) -> StreamingResponse:
"""Server-Sent Events: stream Lyra's reply token-by-token.
`chat.respond_stream` is a blocking generator (httpx/openai), so it runs in
a worker thread and bridges chunks to this async generator via a queue.
"""
body = await request.json()
session_id = body.get("sessionId") or "default"
backend = _backend_for(body.get("backend"))
user_msg = _last_user_message(body.get("messages", []))
model_override = body.get("model") or None
memory.ensure_session(session_id)
if body.get("mode"):
memory.set_session_mode(session_id, body["mode"])
async def gen():
loop = asyncio.get_running_loop()
q: asyncio.Queue = asyncio.Queue()
done = object()
def produce():
try:
for event in chat.respond_stream(session_id, user_msg, backend, model_override):
loop.call_soon_threadsafe(q.put_nowait, event)
except Exception as exc: # surface to the client stream, don't hang
logbus.log("error", "chat stream failed", session=session_id, error=str(exc))
loop.call_soon_threadsafe(q.put_nowait, ("error", str(exc)))
finally:
loop.call_soon_threadsafe(q.put_nowait, done)
loop.run_in_executor(None, produce)
while True:
item = await q.get()
if item is done:
break
ev, payload = item
yield f"data: {json.dumps({'type': ev, 'payload': payload})}\n\n"
return StreamingResponse(gen(), media_type="text/event-stream")
@app.get("/logs")
async def logs_page() -> FileResponse:
"""Full-page, mobile-friendly live log viewer (separate from the chat UI)."""
return FileResponse(str(_STATIC / "logs.html"))
@app.get("/self")
async def self_page() -> FileResponse:
"""'Read her mind' — a view of Lyra's current self-state."""
return FileResponse(str(_STATIC / "self.html"))
@app.get("/self/state")
async def self_state_json() -> dict:
"""Lyra's current interiority + when it last changed."""
return {"state": self_state.load(), "updated_at": memory.self_state_updated_at()}
@app.post("/self/reflect")
async def self_reflect() -> dict:
"""Run one two-step reflection now, in this process, so the draft ->
revised -> critique lands in the live log (/logs)."""
state = await asyncio.to_thread(self_state.reflect)
return {"ok": True, "mood": state.get("mood")}
@app.get("/journal")
async def journal_page() -> FileResponse:
"""Lyra's journal — the permanent, append-only record of her thoughts."""
return FileResponse(str(_STATIC / "journal.html"))
@app.get("/journal/data")
async def journal_data(limit: int = 300) -> dict:
return {"entries": memory.list_journal(limit=limit)}
@app.post("/rate")
async def rate(request: Request) -> dict:
"""Record Brian's 👍/👎 on a Lyra output (chat reply, reflection, journal)."""
b = await request.json()
rating = int(b.get("rating", 0))
content = (b.get("content") or "").strip()
if not content or rating == 0:
return {"ok": False}
memory.add_rating(
kind=b.get("kind") or "chat", rating=rating, content=content,
context=(b.get("context") or None), ref=b.get("ref"), note=b.get("note"),
)
logbus.log("info", "rating", kind=b.get("kind"), rating=1 if rating >= 0 else -1)
return {"ok": True, "counts": memory.rating_counts()}
@app.get("/ratings/counts")
async def ratings_counts() -> dict:
return memory.rating_counts()
@app.get("/ratings/export")
async def ratings_export() -> Response:
"""All ratings as JSONL — the seed for a future fine-tune / preference set."""
lines = "\n".join(json.dumps(r) for r in memory.list_ratings())
return Response(content=lines + ("\n" if lines else ""), media_type="application/x-ndjson",
headers={"Content-Disposition": 'attachment; filename="lyra_ratings.jsonl"'})
@app.get("/hand/{hand_id}")
async def hand_page(hand_id: int) -> FileResponse:
"""Replayable hand-history viewer."""
return FileResponse(str(_STATIC / "hand.html"))
@app.get("/hand/{hand_id}/data")
async def hand_data(hand_id: int) -> dict:
return poker.get_hand(hand_id) or {}
@app.get("/hands")
async def hands_page() -> FileResponse:
return FileResponse(str(_STATIC / "hands.html"))
@app.get("/hands/data")
async def hands_data(limit: int = 60) -> dict:
return {"hands": poker.list_recent_hands(limit=limit)}
@app.get("/recap/{session_id}")
async def recap_page() -> FileResponse:
return FileResponse(str(_STATIC / "recap.html"))
@app.get("/recap/{session_id}/data")
async def recap_data(session_id: int) -> dict:
s = poker.get_session(session_id) or {}
return {"session": s, "markdown": s.get("recap_md")}
@app.get("/recap/{session_id}/download")
async def recap_download(session_id: int) -> Response:
s = poker.get_session(session_id) or {}
md = s.get("recap_md") or "# No recap generated yet\n"
date = (s.get("started_at") or "session")[:10]
fname = f"pokerlog_{date}_s{session_id}.md"
return Response(content=md, media_type="text/markdown",
headers={"Content-Disposition": f'attachment; filename="{fname}"'})
@app.get("/stream/logs")
async def stream_logs(request: Request) -> StreamingResponse:
"""Live activity feed: replay the recent buffer, then stream new events."""
async def gen():
backlog = logbus.since(0)
last = backlog[-1]["seq"] if backlog else 0
for e in backlog:
yield _sse(e)
yield _sse(
{"seq": last, "ts": time.time(), "level": "system",
"msg": "live log connected", "fields": {}}
)
while True:
if await request.is_disconnected():
break
for e in logbus.since(last):
last = e["seq"]
yield _sse(e)
await asyncio.sleep(0.5)
return StreamingResponse(gen(), media_type="text/event-stream")
# Static UI last, so the API routes above take precedence. html=True serves
# index.html at "/" and assets (style.css, manifest.json) at their paths.
app.mount("/", StaticFiles(directory=str(_STATIC), html=True), name="ui")
return app
app = create_app()
def serve() -> None:
"""Console-script entry: `lyra-web`."""
import os
import uvicorn
host = os.getenv("LYRA_WEB_HOST", "0.0.0.0")
port = int(os.getenv("LYRA_WEB_PORT", "7078"))
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__":
serve()