From d6f3516a341f605b4fda36a86ee08a81387307fb Mon Sep 17 00:00:00 2001 From: serversdown Date: Thu, 25 Jun 2026 03:31:02 +0000 Subject: [PATCH] =?UTF-8?q?perf:=20incremental=20era=20rebuilds=20?= =?UTF-8?q?=E2=80=94=20skip=20unchanged=20months?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rebuild_eras() re-digested EVERY month from scratch on every coherence pass, including old months whose sessions never change — ~17 redundant 32B calls per pass (a big slice of the ~40-min consolidation grind + MI50 heat). Now it compares each month's current session count to the stored era and only rebuilds changed months (force=True still does all). Report gains built/skipped counts. test_era.py: builds all first pass, skips unchanged, rebuilds only a month that gained a session, force rebuilds all. Suite 99 green, ruff clean. (Profile rebuild re-reading all 851 sessions every pass is the bigger remaining hog — separate, harder fix.) Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/era.py | 21 ++++++++++++++------- tests/test_era.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 tests/test_era.py diff --git a/lyra/era.py b/lyra/era.py index b70a8dd..5624ee6 100644 --- a/lyra/era.py +++ b/lyra/era.py @@ -54,17 +54,24 @@ def _digest_month(gists: list[str], backend: Backend) -> str: return partials[0] -def rebuild_eras(backend: Backend | None = None) -> dict: - """(Re)build a digest for every month that has session gists.""" +def rebuild_eras(backend: Backend | None = None, force: bool = False) -> dict: + """Build a digest per month, but only for months whose session count changed since + the last build — old months don't change, so re-digesting them every consolidation + pass was pure wasted LLM work (and MI50 heat). `force=True` rebuilds everything.""" backend = backend or config.load().summary_backend by_month = memory.summaries_by_month() - months = 0 + have = {e.month: e.session_count for e in memory.list_eras()} + built = skipped = 0 for month in sorted(by_month): + n = len(by_month[month]) + if not force and have.get(month) == n: + skipped += 1 + continue # unchanged month — keep its existing digest digest = _digest_month(by_month[month], backend) - memory.store_era(month, digest, len(by_month[month])) - months += 1 - logbus.log("info", "era built", month=month, sessions=len(by_month[month])) - report = {"months": months} + memory.store_era(month, digest, n) + built += 1 + logbus.log("info", "era built", month=month, sessions=n) + report = {"built": built, "skipped": skipped, "months": built + skipped} logbus.log("info", "eras complete", **report) return report diff --git a/tests/test_era.py b/tests/test_era.py new file mode 100644 index 0000000..6d2bda4 --- /dev/null +++ b/tests/test_era.py @@ -0,0 +1,44 @@ +"""Era rollups: only re-digest months whose session count changed (incremental).""" +from __future__ import annotations + +import importlib + +import pytest + +from lyra.memory import Era + + +@pytest.fixture +def era(monkeypatch): + import lyra.era as era + importlib.reload(era) + return era + + +def test_rebuild_eras_is_incremental(era, monkeypatch): + by_month = {"2025-01": ["a", "b"], "2025-02": ["c"]} + stored: dict[str, int] = {} + built: list[str] = [] + + monkeypatch.setattr(era.memory, "summaries_by_month", lambda: dict(by_month)) + monkeypatch.setattr(era.memory, "list_eras", + lambda: [Era(m, "x", c, "t") for m, c in stored.items()]) + monkeypatch.setattr(era.memory, "store_era", + lambda month, content, n: (stored.__setitem__(month, n), built.append(month))) + monkeypatch.setattr(era, "_digest_month", lambda gists, backend: "digest") # no LLM + + r1 = era.rebuild_eras(backend="local") # first pass: both built + assert r1["built"] == 2 and r1["skipped"] == 0 + + built.clear() + r2 = era.rebuild_eras(backend="local") # nothing changed: all skipped + assert r2["built"] == 0 and r2["skipped"] == 2 and built == [] + + built.clear() + by_month["2025-02"].append("d") # one month gains a session + r3 = era.rebuild_eras(backend="local") + assert r3["built"] == 1 and r3["skipped"] == 1 and built == ["2025-02"] + + built.clear() + r4 = era.rebuild_eras(backend="local", force=True) # force rebuilds all + assert r4["built"] == 2