2a73033eed
The profile pass map-reduced every session gist (~851) on every consolidation firing — the biggest redundant-work and MI50-heat source left after the eras fix. Now: skip when nothing's new, fold only the gists added since last build into the existing profile, and full-rebuild only when there's no profile, too much has accumulated to fold safely (>FOLD_LIMIT), on a periodic cadence (anti-drift), or when forced. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
129 lines
5.6 KiB
Python
129 lines
5.6 KiB
Python
"""Profile derivation: distill standing facts about the user (semantic memory).
|
|
|
|
This is consolidation step 2. It reads every session gist and map-reduces them
|
|
into one profile document — who Brian is as a player and person — which is then
|
|
injected into every prompt. This is what answers identity/abstract questions
|
|
("what kind of player am I", "what are my leaks") that raw recall handles badly,
|
|
because those are patterns across many sessions, not facts in any single message.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
|
|
from lyra import config, llm, logbus, memory
|
|
from lyra.llm import Backend, Message
|
|
|
|
BATCH_CHARS = 18000
|
|
|
|
_MAP_PROMPT = """From these session summaries, extract durable facts about Brian \
|
|
— things that are stably true, not one-off events. Cover, where present: poker \
|
|
games/formats/stakes he plays, his playing style and strengths, recurring leaks \
|
|
and tendencies, mental-game patterns (tilt triggers, scared money, fatigue), \
|
|
relevant personal context, and how he likes to be coached. Terse bullet points. \
|
|
Omit anything not supported by the summaries."""
|
|
|
|
_REDUCE_PROMPT = """Merge these fact lists into one deduplicated profile of Brian. \
|
|
Organize under these headings: Poker Style, Leaks & Tendencies, Mental Game, \
|
|
Personal Context, Working With Brian. Keep it tight — bullets, no fluff, no \
|
|
repetition. Resolve contradictions toward the more recent/frequent signal."""
|
|
|
|
_FOLD_PROMPT = """Update Brian's existing profile with new facts from his most \
|
|
recent sessions. Keep the same headings (Poker Style, Leaks & Tendencies, Mental \
|
|
Game, Personal Context, Working With Brian). Integrate genuinely new durable facts, \
|
|
strengthen or revise existing bullets where the new sessions confirm or contradict \
|
|
them (favor the more recent signal), and drop nothing that's still true. Keep it \
|
|
tight — bullets, no fluff, no repetition. Return the full updated profile."""
|
|
|
|
# A long gap (consolidation hasn't run in ages) folds too much at once to trust the
|
|
# delta path; rebuild from scratch instead. And cross every Nth session do a full
|
|
# rebuild regardless, so accumulated small folds can't fossilize stale facts.
|
|
FOLD_LIMIT = 25
|
|
FULL_REBUILD_EVERY = 100
|
|
|
|
|
|
def _batch_texts(texts: list[str], budget: int) -> list[str]:
|
|
"""Group texts into joined blocks under `budget` chars."""
|
|
blocks, buf, size = [], [], 0
|
|
for t in texts:
|
|
if size + len(t) > budget and buf:
|
|
blocks.append("\n\n".join(buf))
|
|
buf, size = [], 0
|
|
buf.append(t)
|
|
size += len(t)
|
|
if buf:
|
|
blocks.append("\n\n".join(buf))
|
|
return blocks
|
|
|
|
|
|
def _call(prompt: str, body: str, backend: Backend) -> str:
|
|
messages: list[Message] = [
|
|
{"role": "system", "content": prompt},
|
|
{"role": "user", "content": body},
|
|
]
|
|
return llm.complete(messages, backend=backend)
|
|
|
|
|
|
def _map_reduce(gists: list[str], backend: Backend) -> str:
|
|
"""MAP: extract facts from batches of gists. REDUCE: fold to one fact list."""
|
|
partials = [_call(_MAP_PROMPT, b, backend) for b in _batch_texts(gists, BATCH_CHARS)]
|
|
while len(partials) > 1:
|
|
partials = [_call(_REDUCE_PROMPT, g, backend) for g in _batch_texts(partials, BATCH_CHARS)]
|
|
return partials[0]
|
|
|
|
|
|
def _full_rebuild(gists: list[str], backend: Backend) -> str:
|
|
"""Re-derive the whole profile from every gist (the expensive path)."""
|
|
profile = _map_reduce(gists, backend)
|
|
memory.set_profile(profile, len(gists))
|
|
logbus.log("info", "profile rebuilt", sessions=len(gists), chars=len(profile))
|
|
return profile
|
|
|
|
|
|
def _fold(existing: str, new_gists: list[str], total: int, backend: Backend) -> str:
|
|
"""Fold only the new session gists into the existing profile (the cheap path)."""
|
|
facts = _map_reduce(new_gists, backend)
|
|
body = f"EXISTING PROFILE:\n{existing}\n\nNEW FACTS FROM RECENT SESSIONS:\n{facts}"
|
|
profile = _call(_FOLD_PROMPT, body, backend)
|
|
memory.set_profile(profile, total)
|
|
logbus.log("info", "profile folded", added=len(new_gists), total=total, chars=len(profile))
|
|
return profile
|
|
|
|
|
|
def rebuild_profile(backend: Backend | None = None, force: bool = False) -> str | None:
|
|
"""Derive Brian's profile from session gists. Incremental by default: if a profile
|
|
already exists, fold only the gists added since it was last built instead of
|
|
re-digesting all of them every consolidation pass (the old behavior re-read ~851
|
|
sessions each time — the biggest redundant-work / MI50-heat source). Falls back to
|
|
a full rebuild when there's no profile yet, too much has accumulated to fold safely,
|
|
on a periodic cadence (anti-drift), or when `force=True`."""
|
|
backend = backend or config.load().summary_backend
|
|
summaries = memory.list_summaries()
|
|
if not summaries:
|
|
return None
|
|
total = len(summaries)
|
|
existing = memory.get_profile()
|
|
covered = memory.profile_sessions_covered()
|
|
|
|
if existing and not force and 0 < covered <= total:
|
|
new = total - covered
|
|
if new == 0:
|
|
logbus.log("info", "profile unchanged", sessions=total)
|
|
return existing # nothing new since last build — skip entirely
|
|
crosses_cadence = total // FULL_REBUILD_EVERY != covered // FULL_REBUILD_EVERY
|
|
if new <= FOLD_LIMIT and not crosses_cadence:
|
|
return _fold(existing, [s.content for s in summaries[covered:]], total, backend)
|
|
|
|
return _full_rebuild([s.content for s in summaries], backend)
|
|
|
|
|
|
def main() -> int:
|
|
profile = rebuild_profile()
|
|
if profile is None:
|
|
print("No summaries yet — run lyra-summarize first.")
|
|
return 1
|
|
print(profile)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|