"""The chat turn loop: persona + tiered memory + recent context -> reply. Context is assembled in tiers (oldest/most-compacted first): 1. persona 2. long-term gist — relevant *summaries* of other sessions 3. sharp details — a few raw cross-session exchanges (so specifics survive) 4. recent raw turns of the current session (full fidelity) 5. the new user message After replying, the session is compacted if enough new turns have accumulated. """ from __future__ import annotations from lyra import config, llm, logbus, memory, persona, summary from lyra.llm import Backend, Message RECALL_K = 3 # raw cross-session "sharp detail" hits RECENT_N = 10 # raw turns of the current session SUMMARY_K = 3 # other-session gists def _summary_note(summaries: list[memory.Summary]) -> Message: lines = [f"- ({s.created_at[:10]}) {s.content}" for s in summaries] body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines) return {"role": "system", "content": body} def _detail_note(exchanges: list[memory.Exchange]) -> Message: lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges] body = "Specific things you recall from past conversations:\n" + "\n".join(lines) return {"role": "system", "content": body} def _render(messages: list[Message]) -> str: """Human-readable dump of the exact prompt, for the live-log inspector.""" return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages) def build_messages(session_id: str, user_msg: str) -> list[Message]: """Assemble the full, tiered message list for one turn.""" messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}] recent = memory.recent(session_id, n=RECENT_N) recent_ids = {ex.id for ex in recent} # Tier 1: compacted gists of *other* sessions (long-term, general idea). summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id) if summaries: messages.append(_summary_note(summaries)) # Tier 2: a few sharp raw details from other sessions (so specifics survive # compaction). Skip the current session (its raw turns are in `recent`). recalled = [ ex for ex in memory.recall(user_msg, k=RECALL_K) if ex.id not in recent_ids and ex.session_id != session_id ] if recalled: messages.append(_detail_note(recalled)) # Tier 3: current session, full fidelity. for ex in recent: messages.append({"role": ex.role, "content": ex.content}) messages.append({"role": "user", "content": user_msg}) logbus.log( "debug", "context built", recent=len(recent), summaries=len(summaries), details=len(recalled), chars=sum(len(m["content"]) for m in messages), detail=_render(messages), ) return messages def respond(session_id: str, user_msg: str, backend: Backend = "cloud") -> str: """Produce Lyra's reply to a single user message and persist the exchange.""" cfg = config.load() model = cfg.local_model if backend == "local" else cfg.cloud_model logbus.log( "info", "chat request", session=session_id, backend=backend, model=model, embed=cfg.embed_backend, ) messages = build_messages(session_id, user_msg) reply = llm.complete(messages, backend=backend) logbus.log("info", "reply", session=session_id, chars=len(reply)) memory.remember(session_id, "user", user_msg) memory.remember(session_id, "assistant", reply) # Compact this session once enough new turns have piled up. summary.maybe_summarize(session_id) return reply