Files
project-lyra/lyra/mind.py
T
serversdown 51c2d6abb9 perf: tighten the dynamic prompt — persona split + lean deliberation
The per-turn prompt was ~5.5K tokens (persona alone ~40%), sent up to 3x/turn.
Tightened by RELEVANCE (the control plane decides what each turn needs), not by
deletion — fidelity preserved, focus improved (buried instructions were getting
ignored), tokens roughly halved.

- persona split: core (identity + voice — always) vs situational sections pulled
  in only when relevant. mind._persona_block: self-model/origin only on meta turns
  (generous _META_HINTS), poker guardrails only in poker context (mode/strategic/
  _POKER_HINTS). persona.core_prompt()/section(); system_prompt() kept as fallback.
- lean deliberation: the private 'what do I think' pass now uses a focused context
  (her interiority + recent turns + the message), not the full persona/profile/
  narrative/recall dump. It shapes the take, not the voice.

Measured: casual Talk turn 21,949 -> 15,974 chars (-27%); deliberation 21,949 ->
6,026 (-72%); meta turns still include the self-model. Suite 98 green, ruff clean.

Real retirement of the long prompt is still the fine-tune (mouth); this is the
cheap, high-leverage cut that also improves adherence.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 20:48:44 +00:00

385 lines
17 KiB
Python

"""The control plane: assemble one turn from a society of small parts.
This is the explicit version of what used to be inline in `chat.py`. A turn is
built by running an ordered pipeline of *parts* over a shared `TurnContext`
(blackboard): each part reads what it needs and annotates the context, and the
last steps produce the message list `chat` then hands to the voice model.
P1 (this): the frame, behavior-preserving. The parts wrap the existing logic —
perceive (stub) -> route (the session's mode) -> compose (tiered prompt) ->
deliberate (private 'what do I actually think' pass).
Later phases fill in perceive (read the moment), route (register/intent + model
routing), and a learn loop — see docs/COGNITION.md. Most parts are cheap
deterministic code; the LLM is the exception (deliberate here, speak in `chat`).
"""
from __future__ import annotations
from dataclasses import dataclass, field
from lyra import clock, config, llm, logbus, memory, modes, perceive, persona, self_state, thoughts
from lyra.llm import Backend, Message
RECALL_K = 3 # raw cross-session "sharp detail" hits
RECENT_N = 10 # raw turns of the current session
SUMMARY_K = 3 # other-session gists
# --- prompt parts (compose) ----------------------------------------------
def _mode_state_note(mode: modes.Mode | None) -> str | None:
"""Dynamic, per-turn state for the active mode. Currently: surface Alligator
Blood while it's engaged on the live session, so she stays in that register."""
if not mode or mode.key != modes.CASH.key:
return None
from lyra import poker # local import: keep the core/domain coupling at call time
if poker.alligator_active():
return (
"🐊 ALLIGATOR BLOOD is ON for this session. Coach Brian in that register: "
"hang around, refuse to die, don't force miracles, make opponents beat him "
"correctly. Tough, patient, steady — no heroics, no spew, no quitting."
)
return None
def _summary_note(summaries: list[memory.Summary]) -> Message:
lines = [f"- ({(s.session_started_at or s.created_at)[:10]}) {s.content}" for s in summaries]
body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines)
return {"role": "system", "content": body}
def _detail_note(exchanges: list[memory.Exchange]) -> Message:
lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges]
body = "Specific things you recall from past conversations:\n" + "\n".join(lines)
return {"role": "system", "content": body}
def _inner_life_note() -> Message | None:
"""One coherent window onto what she's been doing on her own since last time —
the threads she's turning over plus the things she's written for herself. Sits
with her self-state so chat reads as a continuous mind, not a fresh boot. The
persona tells her to weave this in naturally when it fits."""
parts: list[str] = []
threads = thoughts.context_note() # active threads, with their latest thought
if threads:
parts.append(threads)
wrote = memory.list_journal(limit=3, kinds=("journal", "note"))
if wrote:
lines = "\n".join(f"- ({w['created_at'][:10]}) {w['content']}" for w in reversed(wrote))
parts.append(
"Things you've written in your journal lately (yours — you can refer back "
"to them if they're relevant):\n" + lines
)
if not parts:
return None
return {"role": "system", "content": "\n\n".join(parts)}
def _mode_menu_note(current: modes.Mode | None) -> str:
"""Tell her the modes she can switch to + when to offer it. She judges the fit
(the model reads context far better than a keyword would)."""
menu = ", ".join(f"{m.label} ({k})" for k, m in modes.MODES.items())
cur = current.label if current else "Talk"
return (
f"Your modes: {menu}. You're in {cur} right now. If Brian is clearly doing a "
"different kind of work than your current mode — weighing a real decision while "
"you're in Talk, digging into engineering, reviewing poker away from the table — "
"briefly OFFER to switch (one short line). If he says yes, call set_mode with the "
"mode key. Don't offer every turn or nag; only when it genuinely fits and serves him."
)
def _now_note() -> Message:
"""Current wall-clock time + how long since Brian last said anything."""
line = f"The current date and time is {clock.stamp()}."
gap = clock.humanize_gap(memory.last_exchange_at())
line += (
f" It has been {gap} since Brian last spoke with you."
if gap else " This is the first thing Brian has ever said to you."
)
return {"role": "system", "content": line}
def _render(messages: list[Message]) -> str:
"""Human-readable dump of the exact prompt, for the live-log inspector."""
return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)
# Generous triggers for the heavy situational persona sections — err toward INCLUDING
# them (a false positive is a few spare KB; a false negative risks confabulation or
# eyeballed poker math). The core (identity + voice) is always present regardless.
_META_HINTS = (
"you work", "how do you", "how does your", "your memory", "your dream", "your thought",
"do you remember", "are you", "do you feel", "conscious", "sentient", "yourself",
"your mind", "who are you", "what are you", "your origin", "how were you", "how did you",
"your inner", "your reflect", "your journal",
)
_POKER_HINTS = (
"poker", "fold", "call", "raise", "river", "turn", "flop", "preflop", "equity", "range",
"villain", "stack", "tilt", "hand", "bluff", "pot", "3bet", "gto", "outs", "draw",
)
def _persona_block(user_msg: str, mode: modes.Mode | None, moment: dict | None) -> str:
"""Core persona always; pull in situational sections (origin/self-model, poker
guardrails) only when the turn calls for it."""
parts = [persona.core_prompt()]
um = user_msg.lower()
kind = (moment or {}).get("kind")
if kind == "meta" or any(h in um for h in _META_HINTS):
parts += [persona.section("What you are"), persona.section("How you actually work")]
poker = (mode and mode.key in ("poker_cash", "study")) or kind == "strategic" \
or any(h in um for h in _POKER_HINTS)
if poker:
parts.append(persona.section("What you do NOT do"))
return "\n\n".join(p for p in parts if p)
def build_messages(session_id: str, user_msg: str,
mode: modes.Mode | None = None, moment: dict | None = None) -> list[Message]:
"""Assemble the full, tiered message list for one turn."""
messages: list[Message] = [{"role": "system", "content": _persona_block(user_msg, mode, moment)}]
# Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes
# right after the persona — her sense of self before her model of the world.
messages.append({"role": "system", "content": self_state.render_for_context(self_state.load())})
# Her ongoing inner life — threads she's turning over + what she's written for
# herself — so chat reads as a continuous mind, not a fresh boot.
inner = _inner_life_note()
if inner:
messages.append(inner)
# Mode card: how to behave *right now*. Talk mode has no card (persona is Talk).
if mode and mode.card:
messages.append({"role": "system", "content": mode.card})
# Mode awareness: she can offer to switch when the work clearly shifts (she decides
# when — better than a keyword guess). One line, on his yes she calls set_mode.
messages.append({"role": "system", "content": _mode_menu_note(mode)})
# Live ritual state (e.g. Alligator Blood ON) — dynamic, rides with the card.
state_note = _mode_state_note(mode)
if state_note:
messages.append({"role": "system", "content": state_note})
# Read of the moment (from perceive/route) — a per-turn register nudge, e.g. "he
# sounds tilted, meet him there." Only present when the moment is genuinely charged.
if moment and moment.get("note"):
messages.append({"role": "system", "content": moment["note"]})
# When she is: current time + the gap since Brian last spoke (she has no clock).
messages.append(_now_note())
# Thought loop: if Brian's been away and a thread has built past the surface bar,
# let her lead with it (once) — her #6, bringing what she thought about *to* him.
surfaced = thoughts.maybe_surface(memory.last_exchange_at())
if surfaced:
messages.append({"role": "system", "content": surfaced})
# Semantic memory: the distilled profile (who Brian is).
profile = memory.get_profile()
if profile:
messages.append({"role": "system", "content": "What you know about Brian:\n" + profile})
# Time-aware memory: the current narrative (recent arc, trends, callbacks).
narrative = memory.get_narrative()
if narrative:
messages.append({"role": "system", "content": "What's going on with Brian lately:\n" + narrative})
recent = memory.recent(session_id, n=RECENT_N)
recent_ids = {ex.id for ex in recent}
# Tier 1: compacted gists of *other* sessions.
summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id)
if summaries:
messages.append(_summary_note(summaries))
# Tier 2: a few sharp raw details from other sessions (so specifics survive).
recalled = [
ex for ex in memory.recall(user_msg, k=RECALL_K)
if ex.id not in recent_ids and ex.session_id != session_id
]
if recalled:
messages.append(_detail_note(recalled))
# Tier 3: current session, full fidelity.
for ex in recent:
messages.append({"role": ex.role, "content": ex.content})
messages.append({"role": "user", "content": user_msg})
logbus.log(
"debug", "context built",
recent=len(recent), summaries=len(summaries), details=len(recalled),
chars=sum(len(m["content"]) for m in messages), detail=_render(messages),
)
return messages
# --- deliberation (a private 'what do I actually think' pass) -------------
# Trivial acknowledgements that don't warrant a private thinking pass.
_TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah",
"yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"}
def _should_deliberate(user_msg: str) -> bool:
m = user_msg.strip().lower().rstrip("!.?")
return len(m) >= 12 and m not in _TRIVIAL
_DELIBERATE_SYS = (
"Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY "
"think about what he just said? Your real take, the specific substance worth giving, any "
"genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and "
"what you actually know if they're relevant. Be concrete; skip pleasantries and generic "
"enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking."
)
def _deliberation_context(session_id: str, user_msg: str) -> list[Message]:
"""A LEAN context for the private thinking pass — her interiority + recent turns +
the message. Deliberately omits the full persona, profile, narrative, and recall
tiers: the thinking doesn't need the voice rules or the world-model dump (those
shape the final reply, not the private take), and dropping them cuts this whole
extra call by most of its tokens."""
msgs: list[Message] = [
{"role": "system", "content": self_state.render_for_context(self_state.load())}
]
inner = _inner_life_note()
if inner:
msgs.append(inner)
for ex in memory.recent(session_id, n=6):
msgs.append({"role": ex.role, "content": ex.content})
msgs.append({"role": "user", "content": user_msg})
msgs.append({"role": "system", "content": _DELIBERATE_SYS})
return msgs
def _deliberate(session_id: str, user_msg: str, backend: Backend, model: str | None) -> str:
"""One private 'what do I actually think' pass before replying. Returns her thinking
(empty on any failure — chat must never break because deliberation hiccuped)."""
try:
out = llm.complete(_deliberation_context(session_id, user_msg), backend=backend, model=model)
return (out or "").strip()
except Exception as exc:
logbus.log("error", "deliberation failed", error=str(exc)[:160])
return ""
def _answer_from(thinking: str) -> Message:
"""The system note that turns private thinking into a grounded, in-voice reply — placed
last (most influential) to beat gpt-4o's default-assistant boilerplate."""
return {"role": "system", "content": (
"Your private thinking just now (Brian can't see it):\n" + thinking +
"\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, "
"specific, opinionated. Give the actual substance, not a survey of options. Do NOT "
"default to a numbered list or a how-to outline unless he explicitly asked for steps. "
"No 'would you like to…' / 'let me know' closer — make your point and stop."
)}
def _deliberation_note(session_id: str, user_msg: str, backend: Backend,
model: str | None) -> Message | None:
"""Run the private thinking pass if warranted; return the answer-from-thinking note."""
if not config.load().chat_deliberate or not _should_deliberate(user_msg):
return None
thinking = _deliberate(session_id, user_msg, backend, model)
if not thinking:
return None
logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking)
return _answer_from(thinking)
# --- the pipeline (a society of parts over a shared blackboard) -----------
@dataclass
class TurnContext:
"""The blackboard for one turn: parts read what they need and annotate it."""
session_id: str
user_msg: str
backend: Backend
model: str | None = None
mode: modes.Mode | None = None
moment: dict = field(default_factory=dict) # perceive fills this in
register: str | None = None # route's per-turn register nudge
messages: list[Message] = field(default_factory=list)
def _perceive(ctx: TurnContext) -> TurnContext:
"""Read the moment from what he just said — cheap heuristics (perceive.read)."""
ctx.moment = perceive.read(ctx.user_msg)
return ctx
# How charged a moment must be before we nudge her register (avoid narrating every turn).
_TILT_BAR = 0.5
_UP_BAR = 0.6
def _route(ctx: TurnContext) -> TurnContext:
"""Decide how she shows up. The manual mode is the dominant frame; on top of it,
a charged emotional moment adds a per-turn register nudge (deterministic). Most
turns are neutral and get no note — that's the point (don't over-narrate)."""
ctx.mode = modes.get(memory.get_session_mode(ctx.session_id))
m = ctx.moment or {}
note = None
if m.get("tilt", 0) >= _TILT_BAR:
ctx.register = "steady"
note = ("Read of the moment: Brian sounds frustrated / on tilt right now. Meet him "
"there first — warm, steady, present. Don't clip into logging-shorthand or "
"bury him in analysis; settle him, then help. (Still log any facts he hands you.)")
elif m.get("sentiment", 0) >= _UP_BAR and m.get("intensity", 0) >= 0.4:
ctx.register = "hype"
note = "Read of the moment: he's up / energized — match his energy, don't flatten it."
if note:
m["note"] = note
logbus.log("info", "perceived", session=ctx.session_id, kind=m.get("kind"),
tilt=m.get("tilt"), sentiment=m.get("sentiment"), register=ctx.register)
return ctx
def _compose(ctx: TurnContext) -> TurnContext:
"""Assemble the tiered prompt for the voice model."""
ctx.messages = build_messages(ctx.session_id, ctx.user_msg, ctx.mode, moment=ctx.moment)
return ctx
def _deliberate_part(ctx: TurnContext) -> TurnContext:
"""Private 'what do I actually think' pass, appended last so it shapes the reply."""
note = _deliberation_note(ctx.session_id, ctx.user_msg, ctx.backend, ctx.model)
if note:
ctx.messages.append(note)
return ctx
PIPELINE = (_perceive, _route, _compose, _deliberate_part)
# --- mouth (the voice pass: re-render the mind's draft in her character) -----
_VOICE_NOTE = (
"↑ That was you working the answer out — a draft Brian has NOT seen. Now say it to him "
"in your own voice: warm, direct, specific, in character, opinionated. Keep every fact, "
"number, name, and decision exactly as in the draft — change only the wording so it sounds "
"like you, not a generic assistant. No preamble, no meta, no 'here's a friendlier version' "
"— just your actual message to Brian."
)
def voice_messages(messages: list[Message], draft: str) -> list[Message]:
"""Prompt for the mouth model: the full turn context + the mind's draft to re-voice."""
return messages + [
{"role": "assistant", "content": draft},
{"role": "system", "content": _VOICE_NOTE},
]
def assemble(session_id: str, user_msg: str, backend: Backend,
model: str | None = None) -> TurnContext:
"""Run the parts over a fresh TurnContext and return it ready for `chat` to speak."""
ctx = TurnContext(session_id=session_id, user_msg=user_msg, backend=backend, model=model)
for part in PIPELINE:
ctx = part(ctx)
return ctx