feat: in-app live log (SSE activity feed)

Turn the inert "Show Work" thinking panel into a real live activity log: - lyra/logbus.py: thread-safe in-memory ring buffer other modules publish to - chat.respond logs backend/model/embed per turn, recall counts, reply size; web layer logs chat errors - server: replace the keep-alive /stream/thinking stub with /stream/logs, an SSE endpoint that replays the recent buffer then streams new events - UI: repurpose the panel as a global "Live Log" — connects on load, renders level/time/msg/fields, drops the old per-session localStorage + dead popup Every turn now shows its backend + model in-app, so local-vs-cloud (free vs paid) is visible at a glance. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 18:45:05 +00:00
parent 3b9e0bb1e0
commit 84c4f75e03
5 changed files with 143 additions and 160 deletions
@@ -10,15 +10,21 @@ re-stored).
 from __future__ import annotations

 import asyncio
+import json
+import time
 from pathlib import Path

 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse
 from fastapi.staticfiles import StaticFiles

-from lyra import chat, memory
+from lyra import chat, logbus, memory
 from lyra.llm import Backend

+
+def _sse(event: dict) -> str:
+    return f"data: {json.dumps(event)}\n\n"
+
 _STATIC = Path(__file__).parent / "static"

 # UI backend labels -> our two backends. Cloud is the default.
@@ -79,7 +85,11 @@ def create_app() -> FastAPI:
        user_msg = _last_user_message(body.get("messages", []))

        memory.ensure_session(session_id)
-        reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
+        try:
+            reply = await asyncio.to_thread(chat.respond, session_id, user_msg, backend)
+        except Exception as exc:
+            logbus.log("error", "chat failed", session=session_id, error=str(exc))
+            reply = f"[error] {exc}"

        return {
            "object": "chat.completion",
@@ -92,14 +102,25 @@ def create_app() -> FastAPI:
            ],
        }

-    @app.get("/stream/thinking/{session_id}")
-    async def thinking_stream(session_id: str) -> StreamingResponse:
-        # Inert until cognitive layers exist: open the stream, emit keep-alives only.
+    @app.get("/stream/logs")
+    async def stream_logs(request: Request) -> StreamingResponse:
+        """Live activity feed: replay the recent buffer, then stream new events."""
        async def gen():
-            yield ": connected\n\n"
+            backlog = logbus.since(0)
+            last = backlog[-1]["seq"] if backlog else 0
+            for e in backlog:
+                yield _sse(e)
+            yield _sse(
+                {"seq": last, "ts": time.time(), "level": "system",
+                 "msg": "live log connected", "fields": {}}
+            )
            while True:
-                await asyncio.sleep(25)
-                yield ": keep-alive\n\n"
+                if await request.is_disconnected():
+                    break
+                for e in logbus.since(last):
+                    last = e["seq"]
+                    yield _sse(e)
+                await asyncio.sleep(0.5)

        return StreamingResponse(gen(), media_type="text/event-stream")