"""Profile derivation: distill standing facts about the user (semantic memory). This is consolidation step 2. It reads every session gist and map-reduces them into one profile document — who Brian is as a player and person — which is then injected into every prompt. This is what answers identity/abstract questions ("what kind of player am I", "what are my leaks") that raw recall handles badly, because those are patterns across many sessions, not facts in any single message. """ from __future__ import annotations from lyra import config, llm, logbus, memory from lyra.llm import Backend, Message BATCH_CHARS = 18000 _MAP_PROMPT = """From these session summaries, extract durable facts about Brian \ — things that are stably true, not one-off events. Cover, where present: poker \ games/formats/stakes he plays, his playing style and strengths, recurring leaks \ and tendencies, mental-game patterns (tilt triggers, scared money, fatigue), \ relevant personal context, and how he likes to be coached. Terse bullet points. \ Omit anything not supported by the summaries.""" _REDUCE_PROMPT = """Merge these fact lists into one deduplicated profile of Brian. \ Organize under these headings: Poker Style, Leaks & Tendencies, Mental Game, \ Personal Context, Working With Brian. Keep it tight — bullets, no fluff, no \ repetition. Resolve contradictions toward the more recent/frequent signal.""" _FOLD_PROMPT = """Update Brian's existing profile with new facts from his most \ recent sessions. Keep the same headings (Poker Style, Leaks & Tendencies, Mental \ Game, Personal Context, Working With Brian). Integrate genuinely new durable facts, \ strengthen or revise existing bullets where the new sessions confirm or contradict \ them (favor the more recent signal), and drop nothing that's still true. Keep it \ tight — bullets, no fluff, no repetition. Return the full updated profile.""" # A long gap (consolidation hasn't run in ages) folds too much at once to trust the # delta path; rebuild from scratch instead. And cross every Nth session do a full # rebuild regardless, so accumulated small folds can't fossilize stale facts. FOLD_LIMIT = 25 FULL_REBUILD_EVERY = 100 def _batch_texts(texts: list[str], budget: int) -> list[str]: """Group texts into joined blocks under `budget` chars.""" blocks, buf, size = [], [], 0 for t in texts: if size + len(t) > budget and buf: blocks.append("\n\n".join(buf)) buf, size = [], 0 buf.append(t) size += len(t) if buf: blocks.append("\n\n".join(buf)) return blocks def _call(prompt: str, body: str, backend: Backend) -> str: messages: list[Message] = [ {"role": "system", "content": prompt}, {"role": "user", "content": body}, ] return llm.complete(messages, backend=backend) def _map_reduce(gists: list[str], backend: Backend) -> str: """MAP: extract facts from batches of gists. REDUCE: fold to one fact list.""" partials = [_call(_MAP_PROMPT, b, backend) for b in _batch_texts(gists, BATCH_CHARS)] while len(partials) > 1: partials = [_call(_REDUCE_PROMPT, g, backend) for g in _batch_texts(partials, BATCH_CHARS)] return partials[0] def _full_rebuild(gists: list[str], backend: Backend) -> str: """Re-derive the whole profile from every gist (the expensive path).""" profile = _map_reduce(gists, backend) memory.set_profile(profile, len(gists)) logbus.log("info", "profile rebuilt", sessions=len(gists), chars=len(profile)) return profile def _fold(existing: str, new_gists: list[str], total: int, backend: Backend) -> str: """Fold only the new session gists into the existing profile (the cheap path).""" facts = _map_reduce(new_gists, backend) body = f"EXISTING PROFILE:\n{existing}\n\nNEW FACTS FROM RECENT SESSIONS:\n{facts}" profile = _call(_FOLD_PROMPT, body, backend) memory.set_profile(profile, total) logbus.log("info", "profile folded", added=len(new_gists), total=total, chars=len(profile)) return profile def rebuild_profile(backend: Backend | None = None, force: bool = False) -> str | None: """Derive Brian's profile from session gists. Incremental by default: if a profile already exists, fold only the gists added since it was last built instead of re-digesting all of them every consolidation pass (the old behavior re-read ~851 sessions each time — the biggest redundant-work / MI50-heat source). Falls back to a full rebuild when there's no profile yet, too much has accumulated to fold safely, on a periodic cadence (anti-drift), or when `force=True`.""" backend = backend or config.load().summary_backend summaries = memory.list_summaries() if not summaries: return None total = len(summaries) existing = memory.get_profile() covered = memory.profile_sessions_covered() if existing and not force and 0 < covered <= total: new = total - covered if new == 0: logbus.log("info", "profile unchanged", sessions=total) return existing # nothing new since last build — skip entirely crosses_cadence = total // FULL_REBUILD_EVERY != covered // FULL_REBUILD_EVERY if new <= FOLD_LIMIT and not crosses_cadence: return _fold(existing, [s.content for s in summaries[covered:]], total, backend) return _full_rebuild([s.content for s in summaries], backend) def main() -> int: profile = rebuild_profile() if profile is None: print("No summaries yet — run lyra-summarize first.") return 1 print(profile) return 0 if __name__ == "__main__": raise SystemExit(main())