diff --git a/lyra/mind.py b/lyra/mind.py index 12d4b88..9579116 100644 --- a/lyra/mind.py +++ b/lyra/mind.py @@ -104,10 +104,40 @@ def _render(messages: list[Message]) -> str: return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages) +# Generous triggers for the heavy situational persona sections — err toward INCLUDING +# them (a false positive is a few spare KB; a false negative risks confabulation or +# eyeballed poker math). The core (identity + voice) is always present regardless. +_META_HINTS = ( + "you work", "how do you", "how does your", "your memory", "your dream", "your thought", + "do you remember", "are you", "do you feel", "conscious", "sentient", "yourself", + "your mind", "who are you", "what are you", "your origin", "how were you", "how did you", + "your inner", "your reflect", "your journal", +) +_POKER_HINTS = ( + "poker", "fold", "call", "raise", "river", "turn", "flop", "preflop", "equity", "range", + "villain", "stack", "tilt", "hand", "bluff", "pot", "3bet", "gto", "outs", "draw", +) + + +def _persona_block(user_msg: str, mode: modes.Mode | None, moment: dict | None) -> str: + """Core persona always; pull in situational sections (origin/self-model, poker + guardrails) only when the turn calls for it.""" + parts = [persona.core_prompt()] + um = user_msg.lower() + kind = (moment or {}).get("kind") + if kind == "meta" or any(h in um for h in _META_HINTS): + parts += [persona.section("What you are"), persona.section("How you actually work")] + poker = (mode and mode.key in ("poker_cash", "study")) or kind == "strategic" \ + or any(h in um for h in _POKER_HINTS) + if poker: + parts.append(persona.section("What you do NOT do")) + return "\n\n".join(p for p in parts if p) + + def build_messages(session_id: str, user_msg: str, mode: modes.Mode | None = None, moment: dict | None = None) -> list[Message]: """Assemble the full, tiered message list for one turn.""" - messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}] + messages: list[Message] = [{"role": "system", "content": _persona_block(user_msg, mode, moment)}] # Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes # right after the persona — her sense of self before her model of the world. @@ -207,12 +237,30 @@ _DELIBERATE_SYS = ( ) -def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str: +def _deliberation_context(session_id: str, user_msg: str) -> list[Message]: + """A LEAN context for the private thinking pass — her interiority + recent turns + + the message. Deliberately omits the full persona, profile, narrative, and recall + tiers: the thinking doesn't need the voice rules or the world-model dump (those + shape the final reply, not the private take), and dropping them cuts this whole + extra call by most of its tokens.""" + msgs: list[Message] = [ + {"role": "system", "content": self_state.render_for_context(self_state.load())} + ] + inner = _inner_life_note() + if inner: + msgs.append(inner) + for ex in memory.recent(session_id, n=6): + msgs.append({"role": ex.role, "content": ex.content}) + msgs.append({"role": "user", "content": user_msg}) + msgs.append({"role": "system", "content": _DELIBERATE_SYS}) + return msgs + + +def _deliberate(session_id: str, user_msg: str, backend: Backend, model: str | None) -> str: """One private 'what do I actually think' pass before replying. Returns her thinking (empty on any failure — chat must never break because deliberation hiccuped).""" try: - out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}], - backend=backend, model=model) + out = llm.complete(_deliberation_context(session_id, user_msg), backend=backend, model=model) return (out or "").strip() except Exception as exc: logbus.log("error", "deliberation failed", error=str(exc)[:160]) @@ -232,11 +280,11 @@ def _answer_from(thinking: str) -> Message: def _deliberation_note(session_id: str, user_msg: str, backend: Backend, - model: str | None, messages: list[Message]) -> Message | None: + model: str | None) -> Message | None: """Run the private thinking pass if warranted; return the answer-from-thinking note.""" if not config.load().chat_deliberate or not _should_deliberate(user_msg): return None - thinking = _deliberate(messages, backend, model) + thinking = _deliberate(session_id, user_msg, backend, model) if not thinking: return None logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking) @@ -299,7 +347,7 @@ def _compose(ctx: TurnContext) -> TurnContext: def _deliberate_part(ctx: TurnContext) -> TurnContext: """Private 'what do I actually think' pass, appended last so it shapes the reply.""" - note = _deliberation_note(ctx.session_id, ctx.user_msg, ctx.backend, ctx.model, ctx.messages) + note = _deliberation_note(ctx.session_id, ctx.user_msg, ctx.backend, ctx.model) if note: ctx.messages.append(note) return ctx diff --git a/lyra/persona.py b/lyra/persona.py index 1f069aa..3471dae 100644 --- a/lyra/persona.py +++ b/lyra/persona.py @@ -1,20 +1,60 @@ """Persona: Lyra's identity and voice, loaded from an editable markdown prompt. -The prompt lives in `personas/.md` so it can be tuned without touching -code. `LYRA_PERSONA` selects which file to load (default: "lyra"). +The prompt lives in `personas/.md` so it can be tuned without touching code. +`LYRA_PERSONA` selects which file to load (default: "lyra"). + +The file is split on `## ` headers so the control plane can include only what a turn +needs: the **core** (identity + voice — the anti-generic essentials) is always sent; +the heavier situational sections (her origin, the self-model, the poker guardrails) +are pulled in by `mind` only when relevant. This keeps the per-turn prompt tight +without losing fidelity. `system_prompt()` still returns the whole thing (fallback). """ from __future__ import annotations import os +import re from functools import lru_cache from pathlib import Path _PERSONA_DIR = Path(__file__).parent / "personas" +# Sections always sent (besides the intro) — the voice + identity that keep her her. +_CORE = ("Who you are", "How you talk", "Right now") + + +def _name(name: str | None) -> str: + return name or os.getenv("LYRA_PERSONA", "lyra") + + +@lru_cache(maxsize=None) +def _sections(name: str) -> dict[str, str]: + """Parse the persona file into {header: text}; the pre-header preamble is 'intro'.""" + text = (_PERSONA_DIR / f"{name}.md").read_text(encoding="utf-8").strip() + chunks = re.split(r"(?m)^## ", text) + out = {"intro": chunks[0].strip()} + for ch in chunks[1:]: + header = ch.split("\n", 1)[0].strip() + out[header] = ("## " + ch).strip() + return out + @lru_cache(maxsize=None) def system_prompt(name: str | None = None) -> str: - """Return the persona system prompt. Cached; pass a name to override env.""" - name = name or os.getenv("LYRA_PERSONA", "lyra") - path = _PERSONA_DIR / f"{name}.md" - return path.read_text(encoding="utf-8").strip() + """The full persona (every section). Fallback / back-compat.""" + return (_PERSONA_DIR / f"{_name(name)}.md").read_text(encoding="utf-8").strip() + + +def core_prompt(name: str | None = None) -> str: + """Intro + the always-on core sections (identity + voice).""" + s = _sections(_name(name)) + parts = [s["intro"]] + [section(h, name) for h in _CORE] + return "\n\n".join(p for p in parts if p) + + +def section(header_prefix: str, name: str | None = None) -> str: + """A situational section by header prefix (e.g. 'How you actually work'); '' if absent.""" + pref = header_prefix.lower() + for header, body in _sections(_name(name)).items(): + if header.lower().startswith(pref): + return body + return "" diff --git a/tests/test_chat.py b/tests/test_chat.py index f0e8fd9..0bdbace 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -29,15 +29,16 @@ def test_should_deliberate_skips_trivial(lyra): def test_deliberation_note_runs_and_appends(lyra, monkeypatch): - _, mind = lyra + memory, mind = lyra calls = [] def fake_complete(messages, backend=None, model=None): calls.append(messages) return "I actually think the first move is the smallest end-to-end slice." + memory.ensure_session("s1") monkeypatch.setattr(mind.llm, "complete", fake_complete) - note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None, []) + note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None) assert note and note["role"] == "system" assert "first move is the smallest" in note["content"] # her thinking carried in assert "numbered list" in note["content"].lower() # voice enforcement attached @@ -49,10 +50,26 @@ def test_deliberation_skipped_when_disabled(lyra, monkeypatch): monkeypatch.setenv("CHAT_DELIBERATE", "false") called = [] monkeypatch.setattr(mind.llm, "complete", lambda *a, **k: called.append(1) or "x") - assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None + assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None) is None assert called == [] # no LLM call when off +def test_persona_core_is_tight_situational_is_gated(lyra): + memory, mind = lyra + from lyra import persona + core, full = persona.core_prompt(), persona.system_prompt() + assert "How you talk" in core and "How you actually work" not in core # voice core, self-model not + assert len(core) < len(full) and persona.section("How you actually work") + + memory.ensure_session("s1") + casual = " ".join(m["content"] for m in mind.build_messages("s1", "any dinner ideas tonight?") + if m["role"] == "system") + meta = " ".join(m["content"] for m in mind.build_messages("s1", "how does your memory actually work?") + if m["role"] == "system") + assert "How you actually work" not in casual # situational section omitted on a casual turn + assert "How you actually work" in meta # pulled in for a meta question + + def test_assemble_runs_the_pipeline(lyra, monkeypatch): memory, mind = lyra monkeypatch.setenv("CHAT_DELIBERATE", "false") # keep it offline for the structure test