feat: in-app live log (SSE activity feed)

Turn the inert "Show Work" thinking panel into a real live activity log:
- lyra/logbus.py: thread-safe in-memory ring buffer other modules publish to
- chat.respond logs backend/model/embed per turn, recall counts, reply size;
  web layer logs chat errors
- server: replace the keep-alive /stream/thinking stub with /stream/logs, an
  SSE endpoint that replays the recent buffer then streams new events
- UI: repurpose the panel as a global "Live Log" — connects on load, renders
  level/time/msg/fields, drops the old per-session localStorage + dead popup

Every turn now shows its backend + model in-app, so local-vs-cloud (free vs
paid) is visible at a glance.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 18:45:05 +00:00
parent 3b9e0bb1e0
commit 84c4f75e03
5 changed files with 143 additions and 160 deletions
+29 -8
View File
@@ -10,15 +10,21 @@ re-stored).
from __future__ import annotations
import asyncio
import json
import time
from pathlib import Path
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
from fastapi.staticfiles import StaticFiles
from lyra import chat, memory
from lyra import chat, logbus, memory
from lyra.llm import Backend
def _sse(event: dict) -> str:
return f"data: {json.dumps(event)}\n\n"
_STATIC = Path(__file__).parent / "static"
# UI backend labels -> our two backends. Cloud is the default.
@@ -79,7 +85,11 @@ def create_app() -> FastAPI:
user_msg = _last_user_message(body.get("messages", []))
memory.ensure_session(session_id)
reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
try:
reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
except Exception as exc:
logbus.log("error", "chat failed", session=session_id, error=str(exc))
reply = f"[error] {exc}"
return {
"object": "chat.completion",
@@ -92,14 +102,25 @@ def create_app() -> FastAPI:
],
}
@app.get("/stream/thinking/{session_id}")
async def thinking_stream(session_id: str) -> StreamingResponse:
# Inert until cognitive layers exist: open the stream, emit keep-alives only.
@app.get("/stream/logs")
async def stream_logs(request: Request) -> StreamingResponse:
"""Live activity feed: replay the recent buffer, then stream new events."""
async def gen():
yield ": connected\n\n"
backlog = logbus.since(0)
last = backlog[-1]["seq"] if backlog else 0
for e in backlog:
yield _sse(e)
yield _sse(
{"seq": last, "ts": time.time(), "level": "system",
"msg": "live log connected", "fields": {}}
)
while True:
await asyncio.sleep(25)
yield ": keep-alive\n\n"
if await request.is_disconnected():
break
for e in logbus.since(last):
last = e["seq"]
yield _sse(e)
await asyncio.sleep(0.5)
return StreamingResponse(gen(), media_type="text/event-stream")