perf: incremental era rebuilds — skip unchanged months
rebuild_eras() re-digested EVERY month from scratch on every coherence pass, including old months whose sessions never change — ~17 redundant 32B calls per pass (a big slice of the ~40-min consolidation grind + MI50 heat). Now it compares each month's current session count to the stored era and only rebuilds changed months (force=True still does all). Report gains built/skipped counts. test_era.py: builds all first pass, skips unchanged, rebuilds only a month that gained a session, force rebuilds all. Suite 99 green, ruff clean. (Profile rebuild re-reading all 851 sessions every pass is the bigger remaining hog — separate, harder fix.) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+14
-7
@@ -54,17 +54,24 @@ def _digest_month(gists: list[str], backend: Backend) -> str:
|
|||||||
return partials[0]
|
return partials[0]
|
||||||
|
|
||||||
|
|
||||||
def rebuild_eras(backend: Backend | None = None) -> dict:
|
def rebuild_eras(backend: Backend | None = None, force: bool = False) -> dict:
|
||||||
"""(Re)build a digest for every month that has session gists."""
|
"""Build a digest per month, but only for months whose session count changed since
|
||||||
|
the last build — old months don't change, so re-digesting them every consolidation
|
||||||
|
pass was pure wasted LLM work (and MI50 heat). `force=True` rebuilds everything."""
|
||||||
backend = backend or config.load().summary_backend
|
backend = backend or config.load().summary_backend
|
||||||
by_month = memory.summaries_by_month()
|
by_month = memory.summaries_by_month()
|
||||||
months = 0
|
have = {e.month: e.session_count for e in memory.list_eras()}
|
||||||
|
built = skipped = 0
|
||||||
for month in sorted(by_month):
|
for month in sorted(by_month):
|
||||||
|
n = len(by_month[month])
|
||||||
|
if not force and have.get(month) == n:
|
||||||
|
skipped += 1
|
||||||
|
continue # unchanged month — keep its existing digest
|
||||||
digest = _digest_month(by_month[month], backend)
|
digest = _digest_month(by_month[month], backend)
|
||||||
memory.store_era(month, digest, len(by_month[month]))
|
memory.store_era(month, digest, n)
|
||||||
months += 1
|
built += 1
|
||||||
logbus.log("info", "era built", month=month, sessions=len(by_month[month]))
|
logbus.log("info", "era built", month=month, sessions=n)
|
||||||
report = {"months": months}
|
report = {"built": built, "skipped": skipped, "months": built + skipped}
|
||||||
logbus.log("info", "eras complete", **report)
|
logbus.log("info", "eras complete", **report)
|
||||||
return report
|
return report
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,44 @@
|
|||||||
|
"""Era rollups: only re-digest months whose session count changed (incremental)."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from lyra.memory import Era
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def era(monkeypatch):
|
||||||
|
import lyra.era as era
|
||||||
|
importlib.reload(era)
|
||||||
|
return era
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_eras_is_incremental(era, monkeypatch):
|
||||||
|
by_month = {"2025-01": ["a", "b"], "2025-02": ["c"]}
|
||||||
|
stored: dict[str, int] = {}
|
||||||
|
built: list[str] = []
|
||||||
|
|
||||||
|
monkeypatch.setattr(era.memory, "summaries_by_month", lambda: dict(by_month))
|
||||||
|
monkeypatch.setattr(era.memory, "list_eras",
|
||||||
|
lambda: [Era(m, "x", c, "t") for m, c in stored.items()])
|
||||||
|
monkeypatch.setattr(era.memory, "store_era",
|
||||||
|
lambda month, content, n: (stored.__setitem__(month, n), built.append(month)))
|
||||||
|
monkeypatch.setattr(era, "_digest_month", lambda gists, backend: "digest") # no LLM
|
||||||
|
|
||||||
|
r1 = era.rebuild_eras(backend="local") # first pass: both built
|
||||||
|
assert r1["built"] == 2 and r1["skipped"] == 0
|
||||||
|
|
||||||
|
built.clear()
|
||||||
|
r2 = era.rebuild_eras(backend="local") # nothing changed: all skipped
|
||||||
|
assert r2["built"] == 0 and r2["skipped"] == 2 and built == []
|
||||||
|
|
||||||
|
built.clear()
|
||||||
|
by_month["2025-02"].append("d") # one month gains a session
|
||||||
|
r3 = era.rebuild_eras(backend="local")
|
||||||
|
assert r3["built"] == 1 and r3["skipped"] == 1 and built == ["2025-02"]
|
||||||
|
|
||||||
|
built.clear()
|
||||||
|
r4 = era.rebuild_eras(backend="local", force=True) # force rebuilds all
|
||||||
|
assert r4["built"] == 2
|
||||||
Reference in New Issue
Block a user