Files
project-lyra/lyra/chat.py
T
serversdown fef45b3e05 feat: make chat a window onto her whole inner life (continuity)
Brian's felt disconnect: chat, thoughts, journal, reflections read as separate
streams. This ties them together at the chat surface.

- chat._inner_life_note(): one coherent block combining her active thought threads
  AND what she's written in her journal lately, so she carries her continuous inner
  life into every conversation (not just a single surfaced thought). Replaces the
  standalone threads block.
- persona: inner-life section rewritten to describe the current machinery (thought
  loop / threads she returns to, journal she writes in, feeds she reads, reaching
  out to Brian) and — the key change — instruct her to let that inner life show up
  in conversation naturally, the way a friend picks up where they left off, without
  info-dumping or performing it. New self-model bullets for the thought loop + journal.

Suite 65 green, ruff clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 01:10:59 +00:00

301 lines
13 KiB
Python

"""The chat turn loop: persona + tiered memory + recent context -> reply.
Context is assembled in tiers (oldest/most-compacted first):
1. persona
2. long-term gist — relevant *summaries* of other sessions
3. sharp details — a few raw cross-session exchanges (so specifics survive)
4. recent raw turns of the current session (full fidelity)
5. the new user message
After replying, the session is compacted if enough new turns have accumulated.
"""
from __future__ import annotations
from lyra import clock, config, llm, logbus, memory, modes, persona, self_state, summary, thoughts
from lyra import tools as toolkit
from lyra.llm import Backend, Message
RECALL_K = 3 # raw cross-session "sharp detail" hits
RECENT_N = 10 # raw turns of the current session
SUMMARY_K = 3 # other-session gists
MAX_TOOL_ROUNDS = 5 # cap tool-call iterations per turn
# Backends that support function-calling. The MI50's llama.cpp server only does
# tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat
# doesn't 500 on the tools param. Add "mi50" here once that flag is set.
TOOL_BACKENDS = {"cloud"}
def _mode_state_note(mode: modes.Mode | None) -> str | None:
"""Dynamic, per-turn state for the active mode. Currently: surface Alligator
Blood while it's engaged on the live session, so she stays in that register."""
if not mode or mode.key != modes.CASH.key:
return None
from lyra import poker # local import: keep the core/domain coupling at call time
if poker.alligator_active():
return (
"🐊 ALLIGATOR BLOOD is ON for this session. Coach Brian in that register: "
"hang around, refuse to die, don't force miracles, make opponents beat him "
"correctly. Tough, patient, steady — no heroics, no spew, no quitting."
)
return None
def _maybe_switch_mode(session_id: str, tool_name: str) -> None:
"""Keep the chat framing aligned with the live data: opening a poker session
auto-flips this chat into Cash mode (so the next turn gets the cash card + the
full live toolset). Manual UI switching still overrides anytime."""
if tool_name == "start_session":
memory.set_session_mode(session_id, modes.CASH.key)
logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key)
def _summary_note(summaries: list[memory.Summary]) -> Message:
lines = [f"- ({(s.session_started_at or s.created_at)[:10]}) {s.content}" for s in summaries]
body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines)
return {"role": "system", "content": body}
def _detail_note(exchanges: list[memory.Exchange]) -> Message:
lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges]
body = "Specific things you recall from past conversations:\n" + "\n".join(lines)
return {"role": "system", "content": body}
def _inner_life_note() -> Message | None:
"""One coherent window onto what she's been doing on her own since last time —
the threads she's turning over plus the things she's written for herself. Sits
with her self-state so chat reads as a continuous mind, not a fresh boot. The
persona tells her to weave this in naturally when it fits."""
parts: list[str] = []
threads = thoughts.context_note() # active threads, with their latest thought
if threads:
parts.append(threads)
wrote = memory.list_journal(limit=3, kinds=("journal", "note"))
if wrote:
lines = "\n".join(f"- ({w['created_at'][:10]}) {w['content']}" for w in reversed(wrote))
parts.append(
"Things you've written in your journal lately (yours — you can refer back "
"to them if they're relevant):\n" + lines
)
if not parts:
return None
return {"role": "system", "content": "\n\n".join(parts)}
def _now_note() -> Message:
"""Current wall-clock time + how long since Brian last said anything.
Stated as plain fact — she has no clock otherwise, so without this 'now' and
the gap since the last turn are invisible to her.
"""
line = f"The current date and time is {clock.stamp()}."
gap = clock.humanize_gap(memory.last_exchange_at())
line += (
f" It has been {gap} since Brian last spoke with you."
if gap else " This is the first thing Brian has ever said to you."
)
return {"role": "system", "content": line}
def _render(messages: list[Message]) -> str:
"""Human-readable dump of the exact prompt, for the live-log inspector."""
return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)
def build_messages(session_id: str, user_msg: str,
mode: modes.Mode | None = None) -> list[Message]:
"""Assemble the full, tiered message list for one turn."""
messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}]
# Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes
# right after the persona — her sense of self before her model of the world.
messages.append({"role": "system", "content": self_state.render_for_context(self_state.load())})
# Her ongoing inner life — the threads she's turning over and what she's written
# for herself — so she's continuous across conversations and can pick up where she
# left off, not only when a thought crosses the surface bar below. Rides with the
# self; the persona tells her to bring it into conversation naturally when it fits.
inner = _inner_life_note()
if inner:
messages.append(inner)
# Mode card: how to behave *right now* (e.g. live-cash copilot). High priority —
# it sits just after her sense of self, before her model of the world. Talk mode
# has no card (the persona's default voice is the Talk register).
if mode and mode.card:
messages.append({"role": "system", "content": mode.card})
# Live ritual state (e.g. Alligator Blood ON) — dynamic, so it rides alongside
# the static card and keeps her in-register for the whole stretch, not just the
# turn she flipped it.
state_note = _mode_state_note(mode)
if state_note:
messages.append({"role": "system", "content": state_note})
# When she is: current time + the gap since Brian last spoke (she has no clock).
messages.append(_now_note())
# Thought loop: if Brian's been away and one of her own threads has built past
# the surface bar, let her lead with it (once). This is her #6 — bringing what
# she thought about while alone *to* him. Runs before the world-model tiers so
# it's framed as her interiority, like the self-state.
surfaced = thoughts.maybe_surface(memory.last_exchange_at())
if surfaced:
messages.append({"role": "system", "content": surfaced})
# Semantic memory: the distilled profile (who Brian is) — answers identity
# questions that raw recall can't. Always in context when it exists.
profile = memory.get_profile()
if profile:
messages.append(
{"role": "system", "content": "What you know about Brian:\n" + profile}
)
# Time-aware memory: the current narrative (recent arc, trends, callbacks).
narrative = memory.get_narrative()
if narrative:
messages.append(
{"role": "system", "content": "What's going on with Brian lately:\n" + narrative}
)
recent = memory.recent(session_id, n=RECENT_N)
recent_ids = {ex.id for ex in recent}
# Tier 1: compacted gists of *other* sessions (long-term, general idea).
summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id)
if summaries:
messages.append(_summary_note(summaries))
# Tier 2: a few sharp raw details from other sessions (so specifics survive
# compaction). Skip the current session (its raw turns are in `recent`).
recalled = [
ex for ex in memory.recall(user_msg, k=RECALL_K)
if ex.id not in recent_ids and ex.session_id != session_id
]
if recalled:
messages.append(_detail_note(recalled))
# Tier 3: current session, full fidelity.
for ex in recent:
messages.append({"role": ex.role, "content": ex.content})
messages.append({"role": "user", "content": user_msg})
logbus.log(
"debug", "context built",
recent=len(recent), summaries=len(summaries), details=len(recalled),
chars=sum(len(m["content"]) for m in messages), detail=_render(messages),
)
return messages
def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
model_override: str | None = None) -> str:
"""Produce Lyra's reply to a single user message and persist the exchange.
`model_override` (from the UI's cloud-model picker) only applies on the cloud
backend; local/mi50 keep their own configured models.
"""
cfg = config.load()
# Live chat uses the stronger chat_model on cloud (bulk consolidation keeps
# cloud_model). local/mi50 use their own configured model.
model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get(
backend, backend
)
if model_override and backend == "cloud":
model = model_override
logbus.log(
"info", "chat request", session=session_id, backend=backend,
model=model, embed=cfg.embed_backend,
)
mode = modes.get(memory.get_session_mode(session_id))
messages = build_messages(session_id, user_msg, mode=mode)
# Tool loop: offer Lyra her tools (scoped to the mode); if she calls one, run it
# and feed the result back so she can continue, until she returns a text reply.
tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
ctx = {"session_id": session_id, "backend": backend}
reply = ""
for _ in range(MAX_TOOL_ROUNDS):
assistant_msg, tool_calls = llm.chat_call(
messages, backend=backend, model=model, tools=tool_specs
)
if not tool_calls:
reply = assistant_msg.get("content") or ""
break
messages.append(assistant_msg) # her tool-call request
for tc in tool_calls:
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
_maybe_switch_mode(session_id, tc["name"])
if not reply:
reply = "(I got tangled using my tools there — say that again?)"
logbus.log("info", "reply", session=session_id, chars=len(reply))
memory.remember(session_id, "user", user_msg)
memory.remember(session_id, "assistant", reply)
# Compact this session once enough new turns have piled up.
summary.maybe_summarize_async(session_id)
return reply
def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
model_override: str | None = None):
"""Streaming generator version of `respond`.
Yields ("delta", text) as content streams in, and ("tool", name) when a tool
runs. Persists the full exchange and yields a final ("done", reply) — matching
`respond`'s side effects (memory + compaction) exactly.
"""
cfg = config.load()
model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get(
backend, backend
)
if model_override and backend == "cloud":
model = model_override
logbus.log(
"info", "chat request (stream)", session=session_id, backend=backend,
model=model, embed=cfg.embed_backend,
)
mode = modes.get(memory.get_session_mode(session_id))
messages = build_messages(session_id, user_msg, mode=mode)
tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
ctx = {"session_id": session_id, "backend": backend}
parts: list[str] = []
for _ in range(MAX_TOOL_ROUNDS):
assistant_msg = None
tool_calls = None
for ev, payload in llm.chat_call_stream(
messages, backend=backend, model=model, tools=tool_specs
):
if ev == "delta":
parts.append(payload)
yield ("delta", payload)
elif ev == "message":
assistant_msg = payload
elif ev == "tool_calls":
tool_calls = payload
if not tool_calls:
break
messages.append(assistant_msg) # her tool-call request
for tc in tool_calls:
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
_maybe_switch_mode(session_id, tc["name"])
yield ("tool", tc["name"])
reply = "".join(parts)
if not reply:
reply = "(I got tangled using my tools there — say that again?)"
yield ("delta", reply)
logbus.log("info", "reply", session=session_id, chars=len(reply))
memory.remember(session_id, "user", user_msg)
memory.remember(session_id, "assistant", reply)
summary.maybe_summarize_async(session_id)
yield ("done", reply)