refactor(P1): extract the turn pipeline into lyra/mind.py (behavior-preserving)

First step of the cognition control plane (docs/COGNITION.md). The chat turn is now an explicit society of parts over a shared TurnContext blackboard: perceive (stub) -> route (session mode) -> compose (tiered prompt) -> deliberate. - lyra/mind.py (new): TurnContext + the pipeline + assemble(); moved build_messages and the deliberation helpers here (the assembly belongs in the control plane). - lyra/chat.py: slimmed to "speak + persist" — calls mind.assemble(), runs the tool/generation loop, persists. No behavior change (same prompt, same output). - tests: point test_time/test_chat at mind; add an assemble() structure test; make test_chat/test_tools hermetic (CHAT_DELIBERATE off so respond() doesn't make a real LLM call). Suite 86 green in ~5s, ruff clean, no import cycle. This is the frame; perceive/route/learn get filled in next phases — each opt-in. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 05:19:39 +00:00
parent f1f15972ac
commit 904eda3388
5 changed files with 345 additions and 299 deletions
@@ -1,22 +1,16 @@
-"""The chat turn loop: persona + tiered memory + recent context -> reply.
+"""The chat turn: assemble the prompt (lyra.mind) then speak + persist.
-Context is assembled in tiers (oldest/most-compacted first):
+`mind.assemble()` runs the society of parts (perceive → route → compose →
-  1. persona
+deliberate) and hands back a ready message list + the active mode; `chat` runs the
-  2. long-term gist  — relevant *summaries* of other sessions
+tool/generation loop (the "speak" part) and persists the exchange. Keeping speak
-  3. sharp details   — a few raw cross-session exchanges (so specifics survive)
+here (not in mind) is deliberate — it's tangled with streaming and tool dispatch.
  4. recent raw turns of the current session (full fidelity)
  5. the new user message
 After replying, the session is compacted if enough new turns have accumulated.
 """
 from __future__ import annotations
-from lyra import clock, config, llm, logbus, memory, modes, persona, self_state, summary, thoughts
+from lyra import config, llm, logbus, memory, mind, modes, summary
 from lyra import tools as toolkit
-from lyra.llm import Backend, Message
+from lyra.llm import Backend
 RECALL_K = 3  # raw cross-session "sharp detail" hits
 RECENT_N = 10  # raw turns of the current session
 SUMMARY_K = 3  # other-session gists
 MAX_TOOL_ROUNDS = 5  # cap tool-call iterations per turn
 # Backends that support function-calling. The MI50's llama.cpp server only does
 # tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat
@@ -24,256 +18,40 @@ MAX_TOOL_ROUNDS = 5  # cap tool-call iterations per turn
 TOOL_BACKENDS = {"cloud"}
-def _mode_state_note(mode: modes.Mode | None) -> str | None:
+def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str:
-    """Dynamic, per-turn state for the active mode. Currently: surface Alligator
+    """Live chat uses the stronger chat_model on cloud; local/mi50 use their own.
-    Blood while it's engaged on the live session, so she stays in that register."""
+    The UI's cloud-model picker only applies on the cloud backend."""
    if not mode or mode.key != modes.CASH.key:
        return None
    from lyra import poker  # local import: keep the core/domain coupling at call time
    if poker.alligator_active():
        return (
            "🐊 ALLIGATOR BLOOD is ON for this session. Coach Brian in that register: "
            "hang around, refuse to die, don't force miracles, make opponents beat him "
            "correctly. Tough, patient, steady — no heroics, no spew, no quitting."
        )
    return None
 def _maybe_switch_mode(session_id: str, tool_name: str) -> None:
    """Keep the chat framing aligned with the live data: opening a poker session
    auto-flips this chat into Cash mode (so the next turn gets the cash card + the
    full live toolset). Manual UI switching still overrides anytime."""
    if tool_name == "start_session":
        memory.set_session_mode(session_id, modes.CASH.key)
        logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key)
 def _summary_note(summaries: list[memory.Summary]) -> Message:
    lines = [f"- ({(s.session_started_at or s.created_at)[:10]}) {s.content}" for s in summaries]
    body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines)
    return {"role": "system", "content": body}
 def _detail_note(exchanges: list[memory.Exchange]) -> Message:
    lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges]
    body = "Specific things you recall from past conversations:\n" + "\n".join(lines)
    return {"role": "system", "content": body}
 def _inner_life_note() -> Message | None:
    """One coherent window onto what she's been doing on her own since last time —
    the threads she's turning over plus the things she's written for herself. Sits
    with her self-state so chat reads as a continuous mind, not a fresh boot. The
    persona tells her to weave this in naturally when it fits."""
    parts: list[str] = []
    threads = thoughts.context_note()  # active threads, with their latest thought
    if threads:
        parts.append(threads)
    wrote = memory.list_journal(limit=3, kinds=("journal", "note"))
    if wrote:
        lines = "\n".join(f"- ({w['created_at'][:10]}) {w['content']}" for w in reversed(wrote))
        parts.append(
            "Things you've written in your journal lately (yours — you can refer back "
            "to them if they're relevant):\n" + lines
        )
    if not parts:
        return None
    return {"role": "system", "content": "\n\n".join(parts)}
 def _now_note() -> Message:
    """Current wall-clock time + how long since Brian last said anything.
    Stated as plain fact — she has no clock otherwise, so without this 'now' and
    the gap since the last turn are invisible to her.
    """
    line = f"The current date and time is {clock.stamp()}."
    gap = clock.humanize_gap(memory.last_exchange_at())
    line += (
        f" It has been {gap} since Brian last spoke with you."
        if gap else " This is the first thing Brian has ever said to you."
    )
    return {"role": "system", "content": line}
 def _render(messages: list[Message]) -> str:
    """Human-readable dump of the exact prompt, for the live-log inspector."""
    return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)
 # Trivial acknowledgements that don't warrant a private thinking pass.
 _TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah",
            "yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"}
 def _should_deliberate(user_msg: str) -> bool:
    m = user_msg.strip().lower().rstrip("!.?")
    return len(m) >= 12 and m not in _TRIVIAL
 _DELIBERATE_SYS = (
    "Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY "
    "think about what he just said? Your real take, the specific substance worth giving, any "
    "genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and "
    "what you actually know if they're relevant. Be concrete; skip pleasantries and generic "
    "enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking."
 )
 def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str:
    """One private 'what do I actually think' pass before replying. Returns her thinking
    (empty on any failure — chat must never break because deliberation hiccuped)."""
    try:
        out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}],
                           backend=backend, model=model)
        return (out or "").strip()
    except Exception as exc:
        logbus.log("error", "deliberation failed", error=str(exc)[:160])
        return ""
 def _answer_from(thinking: str) -> Message:
    """The system note that turns private thinking into a grounded, in-voice reply — placed
    last (most influential) to beat gpt-4o's default-assistant boilerplate."""
    return {"role": "system", "content": (
        "Your private thinking just now (Brian can't see it):\n" + thinking +
        "\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, "
        "specific, opinionated. Give the actual substance, not a survey of options. Do NOT "
        "default to a numbered list or a how-to outline unless he explicitly asked for steps. "
        "No 'would you like to…' / 'let me know' closer — make your point and stop."
    )}
 def _deliberation_note(session_id: str, user_msg: str, backend: Backend,
                       model: str | None, messages: list[Message]) -> Message | None:
    """Run the private thinking pass if warranted; return the answer-from-thinking note."""
    if not config.load().chat_deliberate or not _should_deliberate(user_msg):
        return None
    thinking = _deliberate(messages, backend, model)
    if not thinking:
        return None
    logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking)
    return _answer_from(thinking)
 def build_messages(session_id: str, user_msg: str,
                   mode: modes.Mode | None = None) -> list[Message]:
    """Assemble the full, tiered message list for one turn."""
    messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}]
    # Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes
    # right after the persona — her sense of self before her model of the world.
    messages.append({"role": "system", "content": self_state.render_for_context(self_state.load())})
    # Her ongoing inner life — the threads she's turning over and what she's written
    # for herself — so she's continuous across conversations and can pick up where she
    # left off, not only when a thought crosses the surface bar below. Rides with the
    # self; the persona tells her to bring it into conversation naturally when it fits.
    inner = _inner_life_note()
    if inner:
        messages.append(inner)
    # Mode card: how to behave *right now* (e.g. live-cash copilot). High priority —
    # it sits just after her sense of self, before her model of the world. Talk mode
    # has no card (the persona's default voice is the Talk register).
    if mode and mode.card:
        messages.append({"role": "system", "content": mode.card})
    # Live ritual state (e.g. Alligator Blood ON) — dynamic, so it rides alongside
    # the static card and keeps her in-register for the whole stretch, not just the
    # turn she flipped it.
    state_note = _mode_state_note(mode)
    if state_note:
        messages.append({"role": "system", "content": state_note})
    # When she is: current time + the gap since Brian last spoke (she has no clock).
    messages.append(_now_note())
    # Thought loop: if Brian's been away and one of her own threads has built past
    # the surface bar, let her lead with it (once). This is her #6 — bringing what
    # she thought about while alone *to* him. Runs before the world-model tiers so
    # it's framed as her interiority, like the self-state.
    surfaced = thoughts.maybe_surface(memory.last_exchange_at())
    if surfaced:
        messages.append({"role": "system", "content": surfaced})
    # Semantic memory: the distilled profile (who Brian is) — answers identity
    # questions that raw recall can't. Always in context when it exists.
    profile = memory.get_profile()
    if profile:
        messages.append(
            {"role": "system", "content": "What you know about Brian:\n" + profile}
        )
    # Time-aware memory: the current narrative (recent arc, trends, callbacks).
    narrative = memory.get_narrative()
    if narrative:
        messages.append(
            {"role": "system", "content": "What's going on with Brian lately:\n" + narrative}
        )
    recent = memory.recent(session_id, n=RECENT_N)
    recent_ids = {ex.id for ex in recent}
    # Tier 1: compacted gists of *other* sessions (long-term, general idea).
    summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id)
    if summaries:
        messages.append(_summary_note(summaries))
    # Tier 2: a few sharp raw details from other sessions (so specifics survive
    # compaction). Skip the current session (its raw turns are in `recent`).
    recalled = [
        ex for ex in memory.recall(user_msg, k=RECALL_K)
        if ex.id not in recent_ids and ex.session_id != session_id
    ]
    if recalled:
        messages.append(_detail_note(recalled))
    # Tier 3: current session, full fidelity.
    for ex in recent:
        messages.append({"role": ex.role, "content": ex.content})
    messages.append({"role": "user", "content": user_msg})
    logbus.log(
        "debug", "context built",
        recent=len(recent), summaries=len(summaries), details=len(recalled),
        chars=sum(len(m["content"]) for m in messages), detail=_render(messages),
    )
    return messages
 def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
            model_override: str | None = None) -> str:
    """Produce Lyra's reply to a single user message and persist the exchange.
    `model_override` (from the UI's cloud-model picker) only applies on the cloud
    backend; local/mi50 keep their own configured models.
    """
    cfg = config.load()
    # Live chat uses the stronger chat_model on cloud (bulk consolidation keeps
    # cloud_model). local/mi50 use their own configured model.
    model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get(
        backend, backend
    )
    if model_override and backend == "cloud":
        model = model_override
-    logbus.log(
+    return model
        "info", "chat request", session=session_id, backend=backend,
        model=model, embed=cfg.embed_backend,
    )
    mode = modes.get(memory.get_session_mode(session_id))
    messages = build_messages(session_id, user_msg, mode=mode)
-    # Live thought loop: think privately about what to actually say before answering.
+def _maybe_switch_mode(session_id: str, tool_name: str) -> None:
-    note = _deliberation_note(session_id, user_msg, backend, model, messages)
+    """Keep the chat framing aligned with the live data: opening a poker session
-    if note:
+    auto-flips this chat into Poker mode (next turn gets the card + full live tools).
-        messages.append(note)
+    Manual UI switching still overrides anytime."""
    if tool_name == "start_session":
        memory.set_session_mode(session_id, modes.CASH.key)
        logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key)
-    # Tool loop: offer Lyra her tools (scoped to the mode); if she calls one, run it
+
-    # and feed the result back so she can continue, until she returns a text reply.
+def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
-    tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
+            model_override: str | None = None) -> str:
    """Produce Lyra's reply to a single user message and persist the exchange."""
    cfg = config.load()
    model = _resolve_model(backend, model_override, cfg)
    logbus.log("info", "chat request", session=session_id, backend=backend,
               model=model, embed=cfg.embed_backend)
    turn = mind.assemble(session_id, user_msg, backend, model)
    messages = turn.messages
    # Tool loop (speak): offer her tools (scoped to the mode); run any she calls and
    # feed results back until she returns a text reply.
    tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
    ctx = {"session_id": session_id, "backend": backend}
    reply = ""
    for _ in range(MAX_TOOL_ROUNDS):
@@ -295,9 +73,7 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
    memory.remember(session_id, "user", user_msg)
    memory.remember(session_id, "assistant", reply)
-
+    summary.maybe_summarize_async(session_id)  # compact once enough new turns pile up
    # Compact this session once enough new turns have piled up.
    summary.maybe_summarize_async(session_id)
    return reply
@@ -305,30 +81,17 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
                   model_override: str | None = None):
    """Streaming generator version of `respond`.
-    Yields ("delta", text) as content streams in, and ("tool", name) when a tool
+    Yields ("delta", text) as content streams in, ("tool", name) when a tool runs,
-    runs. Persists the full exchange and yields a final ("done", reply) — matching
+    and a final ("done", reply). Persists the exchange — same side effects as `respond`.
    `respond`'s side effects (memory + compaction) exactly.
    """
    cfg = config.load()
-    model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get(
+    model = _resolve_model(backend, model_override, cfg)
-        backend, backend
+    logbus.log("info", "chat request (stream)", session=session_id, backend=backend,
-    )
+               model=model, embed=cfg.embed_backend)
    if model_override and backend == "cloud":
        model = model_override
    logbus.log(
        "info", "chat request (stream)", session=session_id, backend=backend,
        model=model, embed=cfg.embed_backend,
    )
-    mode = modes.get(memory.get_session_mode(session_id))
+    turn = mind.assemble(session_id, user_msg, backend, model)
-    messages = build_messages(session_id, user_msg, mode=mode)
+    messages = turn.messages
-
+    tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
    # Live thought loop: think privately about what to actually say before answering.
    note = _deliberation_note(session_id, user_msg, backend, model, messages)
    if note:
        messages.append(note)
    tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
    ctx = {"session_id": session_id, "backend": backend}
    parts: list[str] = []
    for _ in range(MAX_TOOL_ROUNDS):
@@ -0,0 +1,272 @@
 """The control plane: assemble one turn from a society of small parts.
 This is the explicit version of what used to be inline in `chat.py`. A turn is
 built by running an ordered pipeline of *parts* over a shared `TurnContext`
 (blackboard): each part reads what it needs and annotates the context, and the
 last steps produce the message list `chat` then hands to the voice model.
 P1 (this): the frame, behavior-preserving. The parts wrap the existing logic —
  perceive (stub) -> route (the session's mode) -> compose (tiered prompt) ->
  deliberate (private 'what do I actually think' pass).
 Later phases fill in perceive (read the moment), route (register/intent + model
 routing), and a learn loop — see docs/COGNITION.md. Most parts are cheap
 deterministic code; the LLM is the exception (deliberate here, speak in `chat`).
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from lyra import clock, config, llm, logbus, memory, modes, persona, self_state, thoughts
 from lyra.llm import Backend, Message
 RECALL_K = 3  # raw cross-session "sharp detail" hits
 RECENT_N = 10  # raw turns of the current session
 SUMMARY_K = 3  # other-session gists
 # --- prompt parts (compose) ----------------------------------------------
 def _mode_state_note(mode: modes.Mode | None) -> str | None:
    """Dynamic, per-turn state for the active mode. Currently: surface Alligator
    Blood while it's engaged on the live session, so she stays in that register."""
    if not mode or mode.key != modes.CASH.key:
        return None
    from lyra import poker  # local import: keep the core/domain coupling at call time
    if poker.alligator_active():
        return (
            "🐊 ALLIGATOR BLOOD is ON for this session. Coach Brian in that register: "
            "hang around, refuse to die, don't force miracles, make opponents beat him "
            "correctly. Tough, patient, steady — no heroics, no spew, no quitting."
        )
    return None
 def _summary_note(summaries: list[memory.Summary]) -> Message:
    lines = [f"- ({(s.session_started_at or s.created_at)[:10]}) {s.content}" for s in summaries]
    body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines)
    return {"role": "system", "content": body}
 def _detail_note(exchanges: list[memory.Exchange]) -> Message:
    lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges]
    body = "Specific things you recall from past conversations:\n" + "\n".join(lines)
    return {"role": "system", "content": body}
 def _inner_life_note() -> Message | None:
    """One coherent window onto what she's been doing on her own since last time —
    the threads she's turning over plus the things she's written for herself. Sits
    with her self-state so chat reads as a continuous mind, not a fresh boot. The
    persona tells her to weave this in naturally when it fits."""
    parts: list[str] = []
    threads = thoughts.context_note()  # active threads, with their latest thought
    if threads:
        parts.append(threads)
    wrote = memory.list_journal(limit=3, kinds=("journal", "note"))
    if wrote:
        lines = "\n".join(f"- ({w['created_at'][:10]}) {w['content']}" for w in reversed(wrote))
        parts.append(
            "Things you've written in your journal lately (yours — you can refer back "
            "to them if they're relevant):\n" + lines
        )
    if not parts:
        return None
    return {"role": "system", "content": "\n\n".join(parts)}
 def _now_note() -> Message:
    """Current wall-clock time + how long since Brian last said anything."""
    line = f"The current date and time is {clock.stamp()}."
    gap = clock.humanize_gap(memory.last_exchange_at())
    line += (
        f" It has been {gap} since Brian last spoke with you."
        if gap else " This is the first thing Brian has ever said to you."
    )
    return {"role": "system", "content": line}
 def _render(messages: list[Message]) -> str:
    """Human-readable dump of the exact prompt, for the live-log inspector."""
    return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)
 def build_messages(session_id: str, user_msg: str,
                   mode: modes.Mode | None = None) -> list[Message]:
    """Assemble the full, tiered message list for one turn."""
    messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}]
    # Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes
    # right after the persona — her sense of self before her model of the world.
    messages.append({"role": "system", "content": self_state.render_for_context(self_state.load())})
    # Her ongoing inner life — threads she's turning over + what she's written for
    # herself — so chat reads as a continuous mind, not a fresh boot.
    inner = _inner_life_note()
    if inner:
        messages.append(inner)
    # Mode card: how to behave *right now*. Talk mode has no card (persona is Talk).
    if mode and mode.card:
        messages.append({"role": "system", "content": mode.card})
    # Live ritual state (e.g. Alligator Blood ON) — dynamic, rides with the card.
    state_note = _mode_state_note(mode)
    if state_note:
        messages.append({"role": "system", "content": state_note})
    # When she is: current time + the gap since Brian last spoke (she has no clock).
    messages.append(_now_note())
    # Thought loop: if Brian's been away and a thread has built past the surface bar,
    # let her lead with it (once) — her #6, bringing what she thought about *to* him.
    surfaced = thoughts.maybe_surface(memory.last_exchange_at())
    if surfaced:
        messages.append({"role": "system", "content": surfaced})
    # Semantic memory: the distilled profile (who Brian is).
    profile = memory.get_profile()
    if profile:
        messages.append({"role": "system", "content": "What you know about Brian:\n" + profile})
    # Time-aware memory: the current narrative (recent arc, trends, callbacks).
    narrative = memory.get_narrative()
    if narrative:
        messages.append({"role": "system", "content": "What's going on with Brian lately:\n" + narrative})
    recent = memory.recent(session_id, n=RECENT_N)
    recent_ids = {ex.id for ex in recent}
    # Tier 1: compacted gists of *other* sessions.
    summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id)
    if summaries:
        messages.append(_summary_note(summaries))
    # Tier 2: a few sharp raw details from other sessions (so specifics survive).
    recalled = [
        ex for ex in memory.recall(user_msg, k=RECALL_K)
        if ex.id not in recent_ids and ex.session_id != session_id
    ]
    if recalled:
        messages.append(_detail_note(recalled))
    # Tier 3: current session, full fidelity.
    for ex in recent:
        messages.append({"role": ex.role, "content": ex.content})
    messages.append({"role": "user", "content": user_msg})
    logbus.log(
        "debug", "context built",
        recent=len(recent), summaries=len(summaries), details=len(recalled),
        chars=sum(len(m["content"]) for m in messages), detail=_render(messages),
    )
    return messages
 # --- deliberation (a private 'what do I actually think' pass) -------------
 # Trivial acknowledgements that don't warrant a private thinking pass.
 _TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah",
            "yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"}
 def _should_deliberate(user_msg: str) -> bool:
    m = user_msg.strip().lower().rstrip("!.?")
    return len(m) >= 12 and m not in _TRIVIAL
 _DELIBERATE_SYS = (
    "Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY "
    "think about what he just said? Your real take, the specific substance worth giving, any "
    "genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and "
    "what you actually know if they're relevant. Be concrete; skip pleasantries and generic "
    "enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking."
 )
 def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str:
    """One private 'what do I actually think' pass before replying. Returns her thinking
    (empty on any failure — chat must never break because deliberation hiccuped)."""
    try:
        out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}],
                           backend=backend, model=model)
        return (out or "").strip()
    except Exception as exc:
        logbus.log("error", "deliberation failed", error=str(exc)[:160])
        return ""
 def _answer_from(thinking: str) -> Message:
    """The system note that turns private thinking into a grounded, in-voice reply — placed
    last (most influential) to beat gpt-4o's default-assistant boilerplate."""
    return {"role": "system", "content": (
        "Your private thinking just now (Brian can't see it):\n" + thinking +
        "\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, "
        "specific, opinionated. Give the actual substance, not a survey of options. Do NOT "
        "default to a numbered list or a how-to outline unless he explicitly asked for steps. "
        "No 'would you like to…' / 'let me know' closer — make your point and stop."
    )}
 def _deliberation_note(session_id: str, user_msg: str, backend: Backend,
                       model: str | None, messages: list[Message]) -> Message | None:
    """Run the private thinking pass if warranted; return the answer-from-thinking note."""
    if not config.load().chat_deliberate or not _should_deliberate(user_msg):
        return None
    thinking = _deliberate(messages, backend, model)
    if not thinking:
        return None
    logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking)
    return _answer_from(thinking)
 # --- the pipeline (a society of parts over a shared blackboard) -----------
@dataclass
 class TurnContext:
    """The blackboard for one turn: parts read what they need and annotate it."""
    session_id: str
    user_msg: str
    backend: Backend
    model: str | None = None
    mode: modes.Mode | None = None
    moment: dict = field(default_factory=dict)  # perceive fills this in (P2)
    messages: list[Message] = field(default_factory=list)
 def _perceive(ctx: TurnContext) -> TurnContext:
    """Read the moment (sentiment / kind / tilt). Stub for now — P2 fills it in."""
    ctx.moment = {}
    return ctx
 def _route(ctx: TurnContext) -> TurnContext:
    """Pick how she shows up. Manual for now: the mode chosen for this session."""
    ctx.mode = modes.get(memory.get_session_mode(ctx.session_id))
    return ctx
 def _compose(ctx: TurnContext) -> TurnContext:
    """Assemble the tiered prompt for the voice model."""
    ctx.messages = build_messages(ctx.session_id, ctx.user_msg, ctx.mode)
    return ctx
 def _deliberate_part(ctx: TurnContext) -> TurnContext:
    """Private 'what do I actually think' pass, appended last so it shapes the reply."""
    note = _deliberation_note(ctx.session_id, ctx.user_msg, ctx.backend, ctx.model, ctx.messages)
    if note:
        ctx.messages.append(note)
    return ctx
 PIPELINE = (_perceive, _route, _compose, _deliberate_part)
 def assemble(session_id: str, user_msg: str, backend: Backend,
             model: str | None = None) -> TurnContext:
    """Run the parts over a fresh TurnContext and return it ready for `chat` to speak."""
    ctx = TurnContext(session_id=session_id, user_msg=user_msg, backend=backend, model=model)
    for part in PIPELINE:
        ctx = part(ctx)
    return ctx
@@ -1,4 +1,4 @@
-"""Live chat: the deliberation pass (think privately before answering)."""
+"""The mind pipeline: the deliberation pass (think privately before answering)."""
 from __future__ import annotations
 import importlib
@@ -13,31 +13,31 @@ def lyra(tmp_path, monkeypatch):
    monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
    import lyra.memory as memory
    importlib.reload(memory)
-    import lyra.chat as chat
+    import lyra.mind as mind
-    importlib.reload(chat)
+    importlib.reload(mind)
-    return memory, chat
+    return memory, mind
 def test_should_deliberate_skips_trivial(lyra):
-    _, chat = lyra
+    _, mind = lyra
-    assert chat._should_deliberate("How would we actually start building this?")
+    assert mind._should_deliberate("How would we actually start building this?")
-    assert chat._should_deliberate("I disagree, that seems risky")
+    assert mind._should_deliberate("I disagree, that seems risky")
    for trivial in ("ok", "lol", "thanks", "yeah", "nice", "👍", "k"):
-        assert not chat._should_deliberate(trivial)
+        assert not mind._should_deliberate(trivial)
-    assert not chat._should_deliberate("ok!")        # punctuation stripped
+    assert not mind._should_deliberate("ok!")        # punctuation stripped
-    assert not chat._should_deliberate("hey")         # too short
+    assert not mind._should_deliberate("hey")         # too short
 def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
-    _, chat = lyra
+    _, mind = lyra
    calls = []
    def fake_complete(messages, backend=None, model=None):
        calls.append(messages)
        return "I actually think the first move is the smallest end-to-end slice."
-    monkeypatch.setattr(chat.llm, "complete", fake_complete)
+    monkeypatch.setattr(mind.llm, "complete", fake_complete)
-    note = chat._deliberation_note("s1", "How would we start on this?", "cloud", None, [])
+    note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None, [])
    assert note and note["role"] == "system"
    assert "first move is the smallest" in note["content"]      # her thinking carried in
    assert "numbered list" in note["content"].lower()           # voice enforcement attached
@@ -45,9 +45,19 @@ def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
 def test_deliberation_skipped_when_disabled(lyra, monkeypatch):
-    _, chat = lyra
+    _, mind = lyra
    monkeypatch.setenv("CHAT_DELIBERATE", "false")
    called = []
-    monkeypatch.setattr(chat.llm, "complete", lambda *a, **k: called.append(1) or "x")
+    monkeypatch.setattr(mind.llm, "complete", lambda *a, **k: called.append(1) or "x")
-    assert chat._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None
+    assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None
    assert called == []                                          # no LLM call when off
 def test_assemble_runs_the_pipeline(lyra, monkeypatch):
    memory, mind = lyra
    monkeypatch.setenv("CHAT_DELIBERATE", "false")  # keep it offline for the structure test
    memory.ensure_session("s1")
    turn = mind.assemble("s1", "hey what's up", "cloud", None)
    assert turn.mode is not None                       # route ran
    assert turn.messages and turn.messages[-1]["role"] == "user"   # compose ran
    assert turn.messages[-1]["content"] == "hey what's up"
@@ -39,8 +39,8 @@ def lyra(tmp_path, monkeypatch):
 def test_now_note_first_contact(lyra):
-    from lyra import chat
+    from lyra import mind
-    note = chat._now_note()["content"]
+    note = mind._now_note()["content"]
    assert "current date and time is" in note
    assert "first thing Brian has ever said" in note
@@ -48,6 +48,6 @@ def test_now_note_first_contact(lyra):
 def test_now_note_reports_gap(lyra):
    memory = lyra
    memory.remember("s1", "user", "hey")
-    from lyra import chat
+    from lyra import mind
-    note = chat._now_note()["content"]
+    note = mind._now_note()["content"]
    assert "since Brian last spoke with you" in note
@@ -9,6 +9,7 @@ import pytest
@pytest.fixture
 def lyra(tmp_path, monkeypatch):
    monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
    monkeypatch.setenv("CHAT_DELIBERATE", "false")  # don't make a real LLM call in respond()
    from lyra import llm
    monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
    import lyra.memory as memory