fix: make summarize-all resilient to backend hiccups
The MI50 llama.cpp server OOM-killed (LXC RAM limit + 8GB prompt cache) mid-run, and summarize_all had no error handling, so one APIConnectionError killed the whole batch. Add retry-with-backoff around the summarization LLM call, and try/except per session in summarize_all (log + skip; unsummarized sessions get retried on the next run). (Server-side: CT202 RAM raised + prompt cache disabled.) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+22
-3
@@ -10,10 +10,13 @@ big imported conversation doesn't blow the local model's context window.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
from lyra import config, llm, logbus, memory
|
from lyra import config, llm, logbus, memory
|
||||||
from lyra.llm import Backend, Message
|
from lyra.llm import Backend, Message
|
||||||
|
|
||||||
|
_RETRIES = 4
|
||||||
|
|
||||||
# Re-summarize a session once it has accumulated this many new raw exchanges.
|
# Re-summarize a session once it has accumulated this many new raw exchanges.
|
||||||
SUMMARIZE_AFTER = 20
|
SUMMARIZE_AFTER = 20
|
||||||
# Transcript budget per LLM call; longer sessions are chunked + merged.
|
# Transcript budget per LLM call; longer sessions are chunked + merged.
|
||||||
@@ -49,7 +52,16 @@ def _summarize_text(text: str, backend: Backend) -> str:
|
|||||||
{"role": "system", "content": _PROMPT},
|
{"role": "system", "content": _PROMPT},
|
||||||
{"role": "user", "content": text},
|
{"role": "user", "content": text},
|
||||||
]
|
]
|
||||||
|
# Retry transient backend errors (e.g. the GPU server restarting) with backoff.
|
||||||
|
for attempt in range(_RETRIES):
|
||||||
|
try:
|
||||||
return llm.complete(messages, backend=backend)
|
return llm.complete(messages, backend=backend)
|
||||||
|
except Exception as exc:
|
||||||
|
if attempt == _RETRIES - 1:
|
||||||
|
raise
|
||||||
|
logbus.log("debug", "summary retry", attempt=attempt + 1, error=str(exc)[:80])
|
||||||
|
time.sleep(5 * (attempt + 1))
|
||||||
|
raise RuntimeError("unreachable")
|
||||||
|
|
||||||
|
|
||||||
def summarize_session(session_id: str, backend: Backend | None = None) -> str | None:
|
def summarize_session(session_id: str, backend: Backend | None = None) -> str | None:
|
||||||
@@ -85,19 +97,26 @@ def summarize_all(backend: Backend | None = None, limit: int | None = None) -> d
|
|||||||
with an up-to-date summary are skipped, so re-running continues where it left off.
|
with an up-to-date summary are skipped, so re-running continues where it left off.
|
||||||
"""
|
"""
|
||||||
sessions = memory.list_sessions()
|
sessions = memory.list_sessions()
|
||||||
done, skipped = 0, 0
|
done, skipped, failed = 0, 0, 0
|
||||||
for s in sessions:
|
for s in sessions:
|
||||||
sid = s["id"]
|
sid = s["id"]
|
||||||
if memory.get_summary(sid) and memory.unsummarized_count(sid) == 0:
|
if memory.get_summary(sid) and memory.unsummarized_count(sid) == 0:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
continue
|
continue
|
||||||
|
try:
|
||||||
summarize_session(sid, backend=backend)
|
summarize_session(sid, backend=backend)
|
||||||
|
except Exception as exc:
|
||||||
|
# Don't let one bad session kill the batch; log and move on (it'll
|
||||||
|
# be retried on the next run, since it stays unsummarized).
|
||||||
|
failed += 1
|
||||||
|
logbus.log("error", "summarize failed", session=sid, error=str(exc)[:120])
|
||||||
|
continue
|
||||||
done += 1
|
done += 1
|
||||||
if done % 25 == 0:
|
if done % 25 == 0:
|
||||||
logbus.log("info", "summarize-all progress", summarized=done, skipped=skipped)
|
logbus.log("info", "summarize-all progress", summarized=done, skipped=skipped, failed=failed)
|
||||||
if limit is not None and done >= limit:
|
if limit is not None and done >= limit:
|
||||||
break
|
break
|
||||||
report = {"summarized": done, "skipped": skipped, "total": len(sessions)}
|
report = {"summarized": done, "skipped": skipped, "failed": failed, "total": len(sessions)}
|
||||||
logbus.log("info", "summarize-all complete", **report)
|
logbus.log("info", "summarize-all complete", **report)
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user