diff --git a/lyra/summary.py b/lyra/summary.py index 1844444..46c3dfa 100644 --- a/lyra/summary.py +++ b/lyra/summary.py @@ -10,10 +10,13 @@ big imported conversation doesn't blow the local model's context window. from __future__ import annotations import sys +import time from lyra import config, llm, logbus, memory from lyra.llm import Backend, Message +_RETRIES = 4 + # Re-summarize a session once it has accumulated this many new raw exchanges. SUMMARIZE_AFTER = 20 # Transcript budget per LLM call; longer sessions are chunked + merged. @@ -49,7 +52,16 @@ def _summarize_text(text: str, backend: Backend) -> str: {"role": "system", "content": _PROMPT}, {"role": "user", "content": text}, ] - return llm.complete(messages, backend=backend) + # Retry transient backend errors (e.g. the GPU server restarting) with backoff. + for attempt in range(_RETRIES): + try: + return llm.complete(messages, backend=backend) + except Exception as exc: + if attempt == _RETRIES - 1: + raise + logbus.log("debug", "summary retry", attempt=attempt + 1, error=str(exc)[:80]) + time.sleep(5 * (attempt + 1)) + raise RuntimeError("unreachable") def summarize_session(session_id: str, backend: Backend | None = None) -> str | None: @@ -85,19 +97,26 @@ def summarize_all(backend: Backend | None = None, limit: int | None = None) -> d with an up-to-date summary are skipped, so re-running continues where it left off. """ sessions = memory.list_sessions() - done, skipped = 0, 0 + done, skipped, failed = 0, 0, 0 for s in sessions: sid = s["id"] if memory.get_summary(sid) and memory.unsummarized_count(sid) == 0: skipped += 1 continue - summarize_session(sid, backend=backend) + try: + summarize_session(sid, backend=backend) + except Exception as exc: + # Don't let one bad session kill the batch; log and move on (it'll + # be retried on the next run, since it stays unsummarized). + failed += 1 + logbus.log("error", "summarize failed", session=sid, error=str(exc)[:120]) + continue done += 1 if done % 25 == 0: - logbus.log("info", "summarize-all progress", summarized=done, skipped=skipped) + logbus.log("info", "summarize-all progress", summarized=done, skipped=skipped, failed=failed) if limit is not None and done >= limit: break - report = {"summarized": done, "skipped": skipped, "total": len(sessions)} + report = {"summarized": done, "skipped": skipped, "failed": failed, "total": len(sessions)} logbus.log("info", "summarize-all complete", **report) return report