diff --git a/lyra/chat.py b/lyra/chat.py index 15c276d..51b0f40 100644 --- a/lyra/chat.py +++ b/lyra/chat.py @@ -1,22 +1,16 @@ -"""The chat turn loop: persona + tiered memory + recent context -> reply. +"""The chat turn: assemble the prompt (lyra.mind) then speak + persist. -Context is assembled in tiers (oldest/most-compacted first): - 1. persona - 2. long-term gist — relevant *summaries* of other sessions - 3. sharp details — a few raw cross-session exchanges (so specifics survive) - 4. recent raw turns of the current session (full fidelity) - 5. the new user message -After replying, the session is compacted if enough new turns have accumulated. +`mind.assemble()` runs the society of parts (perceive → route → compose → +deliberate) and hands back a ready message list + the active mode; `chat` runs the +tool/generation loop (the "speak" part) and persists the exchange. Keeping speak +here (not in mind) is deliberate — it's tangled with streaming and tool dispatch. """ from __future__ import annotations -from lyra import clock, config, llm, logbus, memory, modes, persona, self_state, summary, thoughts +from lyra import config, llm, logbus, memory, mind, modes, summary from lyra import tools as toolkit -from lyra.llm import Backend, Message +from lyra.llm import Backend -RECALL_K = 3 # raw cross-session "sharp detail" hits -RECENT_N = 10 # raw turns of the current session -SUMMARY_K = 3 # other-session gists MAX_TOOL_ROUNDS = 5 # cap tool-call iterations per turn # Backends that support function-calling. The MI50's llama.cpp server only does # tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat @@ -24,256 +18,40 @@ MAX_TOOL_ROUNDS = 5 # cap tool-call iterations per turn TOOL_BACKENDS = {"cloud"} -def _mode_state_note(mode: modes.Mode | None) -> str | None: - """Dynamic, per-turn state for the active mode. Currently: surface Alligator - Blood while it's engaged on the live session, so she stays in that register.""" - if not mode or mode.key != modes.CASH.key: - return None - from lyra import poker # local import: keep the core/domain coupling at call time - if poker.alligator_active(): - return ( - "🐊 ALLIGATOR BLOOD is ON for this session. Coach Brian in that register: " - "hang around, refuse to die, don't force miracles, make opponents beat him " - "correctly. Tough, patient, steady — no heroics, no spew, no quitting." - ) - return None - - -def _maybe_switch_mode(session_id: str, tool_name: str) -> None: - """Keep the chat framing aligned with the live data: opening a poker session - auto-flips this chat into Cash mode (so the next turn gets the cash card + the - full live toolset). Manual UI switching still overrides anytime.""" - if tool_name == "start_session": - memory.set_session_mode(session_id, modes.CASH.key) - logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key) - - -def _summary_note(summaries: list[memory.Summary]) -> Message: - lines = [f"- ({(s.session_started_at or s.created_at)[:10]}) {s.content}" for s in summaries] - body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines) - return {"role": "system", "content": body} - - -def _detail_note(exchanges: list[memory.Exchange]) -> Message: - lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges] - body = "Specific things you recall from past conversations:\n" + "\n".join(lines) - return {"role": "system", "content": body} - - -def _inner_life_note() -> Message | None: - """One coherent window onto what she's been doing on her own since last time — - the threads she's turning over plus the things she's written for herself. Sits - with her self-state so chat reads as a continuous mind, not a fresh boot. The - persona tells her to weave this in naturally when it fits.""" - parts: list[str] = [] - threads = thoughts.context_note() # active threads, with their latest thought - if threads: - parts.append(threads) - wrote = memory.list_journal(limit=3, kinds=("journal", "note")) - if wrote: - lines = "\n".join(f"- ({w['created_at'][:10]}) {w['content']}" for w in reversed(wrote)) - parts.append( - "Things you've written in your journal lately (yours — you can refer back " - "to them if they're relevant):\n" + lines - ) - if not parts: - return None - return {"role": "system", "content": "\n\n".join(parts)} - - -def _now_note() -> Message: - """Current wall-clock time + how long since Brian last said anything. - - Stated as plain fact — she has no clock otherwise, so without this 'now' and - the gap since the last turn are invisible to her. - """ - line = f"The current date and time is {clock.stamp()}." - gap = clock.humanize_gap(memory.last_exchange_at()) - line += ( - f" It has been {gap} since Brian last spoke with you." - if gap else " This is the first thing Brian has ever said to you." - ) - return {"role": "system", "content": line} - - -def _render(messages: list[Message]) -> str: - """Human-readable dump of the exact prompt, for the live-log inspector.""" - return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages) - - -# Trivial acknowledgements that don't warrant a private thinking pass. -_TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah", - "yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"} - - -def _should_deliberate(user_msg: str) -> bool: - m = user_msg.strip().lower().rstrip("!.?") - return len(m) >= 12 and m not in _TRIVIAL - - -_DELIBERATE_SYS = ( - "Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY " - "think about what he just said? Your real take, the specific substance worth giving, any " - "genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and " - "what you actually know if they're relevant. Be concrete; skip pleasantries and generic " - "enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking." -) - - -def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str: - """One private 'what do I actually think' pass before replying. Returns her thinking - (empty on any failure — chat must never break because deliberation hiccuped).""" - try: - out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}], - backend=backend, model=model) - return (out or "").strip() - except Exception as exc: - logbus.log("error", "deliberation failed", error=str(exc)[:160]) - return "" - - -def _answer_from(thinking: str) -> Message: - """The system note that turns private thinking into a grounded, in-voice reply — placed - last (most influential) to beat gpt-4o's default-assistant boilerplate.""" - return {"role": "system", "content": ( - "Your private thinking just now (Brian can't see it):\n" + thinking + - "\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, " - "specific, opinionated. Give the actual substance, not a survey of options. Do NOT " - "default to a numbered list or a how-to outline unless he explicitly asked for steps. " - "No 'would you like to…' / 'let me know' closer — make your point and stop." - )} - - -def _deliberation_note(session_id: str, user_msg: str, backend: Backend, - model: str | None, messages: list[Message]) -> Message | None: - """Run the private thinking pass if warranted; return the answer-from-thinking note.""" - if not config.load().chat_deliberate or not _should_deliberate(user_msg): - return None - thinking = _deliberate(messages, backend, model) - if not thinking: - return None - logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking) - return _answer_from(thinking) - - -def build_messages(session_id: str, user_msg: str, - mode: modes.Mode | None = None) -> list[Message]: - """Assemble the full, tiered message list for one turn.""" - messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}] - - # Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes - # right after the persona — her sense of self before her model of the world. - messages.append({"role": "system", "content": self_state.render_for_context(self_state.load())}) - - # Her ongoing inner life — the threads she's turning over and what she's written - # for herself — so she's continuous across conversations and can pick up where she - # left off, not only when a thought crosses the surface bar below. Rides with the - # self; the persona tells her to bring it into conversation naturally when it fits. - inner = _inner_life_note() - if inner: - messages.append(inner) - - # Mode card: how to behave *right now* (e.g. live-cash copilot). High priority — - # it sits just after her sense of self, before her model of the world. Talk mode - # has no card (the persona's default voice is the Talk register). - if mode and mode.card: - messages.append({"role": "system", "content": mode.card}) - - # Live ritual state (e.g. Alligator Blood ON) — dynamic, so it rides alongside - # the static card and keeps her in-register for the whole stretch, not just the - # turn she flipped it. - state_note = _mode_state_note(mode) - if state_note: - messages.append({"role": "system", "content": state_note}) - - # When she is: current time + the gap since Brian last spoke (she has no clock). - messages.append(_now_note()) - - # Thought loop: if Brian's been away and one of her own threads has built past - # the surface bar, let her lead with it (once). This is her #6 — bringing what - # she thought about while alone *to* him. Runs before the world-model tiers so - # it's framed as her interiority, like the self-state. - surfaced = thoughts.maybe_surface(memory.last_exchange_at()) - if surfaced: - messages.append({"role": "system", "content": surfaced}) - - # Semantic memory: the distilled profile (who Brian is) — answers identity - # questions that raw recall can't. Always in context when it exists. - profile = memory.get_profile() - if profile: - messages.append( - {"role": "system", "content": "What you know about Brian:\n" + profile} - ) - - # Time-aware memory: the current narrative (recent arc, trends, callbacks). - narrative = memory.get_narrative() - if narrative: - messages.append( - {"role": "system", "content": "What's going on with Brian lately:\n" + narrative} - ) - - recent = memory.recent(session_id, n=RECENT_N) - recent_ids = {ex.id for ex in recent} - - # Tier 1: compacted gists of *other* sessions (long-term, general idea). - summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id) - if summaries: - messages.append(_summary_note(summaries)) - - # Tier 2: a few sharp raw details from other sessions (so specifics survive - # compaction). Skip the current session (its raw turns are in `recent`). - recalled = [ - ex for ex in memory.recall(user_msg, k=RECALL_K) - if ex.id not in recent_ids and ex.session_id != session_id - ] - if recalled: - messages.append(_detail_note(recalled)) - - # Tier 3: current session, full fidelity. - for ex in recent: - messages.append({"role": ex.role, "content": ex.content}) - - messages.append({"role": "user", "content": user_msg}) - - logbus.log( - "debug", "context built", - recent=len(recent), summaries=len(summaries), details=len(recalled), - chars=sum(len(m["content"]) for m in messages), detail=_render(messages), - ) - return messages - - -def respond(session_id: str, user_msg: str, backend: Backend = "cloud", - model_override: str | None = None) -> str: - """Produce Lyra's reply to a single user message and persist the exchange. - - `model_override` (from the UI's cloud-model picker) only applies on the cloud - backend; local/mi50 keep their own configured models. - """ - cfg = config.load() - # Live chat uses the stronger chat_model on cloud (bulk consolidation keeps - # cloud_model). local/mi50 use their own configured model. +def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str: + """Live chat uses the stronger chat_model on cloud; local/mi50 use their own. + The UI's cloud-model picker only applies on the cloud backend.""" model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get( backend, backend ) if model_override and backend == "cloud": model = model_override - logbus.log( - "info", "chat request", session=session_id, backend=backend, - model=model, embed=cfg.embed_backend, - ) + return model - mode = modes.get(memory.get_session_mode(session_id)) - messages = build_messages(session_id, user_msg, mode=mode) - # Live thought loop: think privately about what to actually say before answering. - note = _deliberation_note(session_id, user_msg, backend, model, messages) - if note: - messages.append(note) +def _maybe_switch_mode(session_id: str, tool_name: str) -> None: + """Keep the chat framing aligned with the live data: opening a poker session + auto-flips this chat into Poker mode (next turn gets the card + full live tools). + Manual UI switching still overrides anytime.""" + if tool_name == "start_session": + memory.set_session_mode(session_id, modes.CASH.key) + logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key) - # Tool loop: offer Lyra her tools (scoped to the mode); if she calls one, run it - # and feed the result back so she can continue, until she returns a text reply. - tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None + +def respond(session_id: str, user_msg: str, backend: Backend = "cloud", + model_override: str | None = None) -> str: + """Produce Lyra's reply to a single user message and persist the exchange.""" + cfg = config.load() + model = _resolve_model(backend, model_override, cfg) + logbus.log("info", "chat request", session=session_id, backend=backend, + model=model, embed=cfg.embed_backend) + + turn = mind.assemble(session_id, user_msg, backend, model) + messages = turn.messages + + # Tool loop (speak): offer her tools (scoped to the mode); run any she calls and + # feed results back until she returns a text reply. + tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None ctx = {"session_id": session_id, "backend": backend} reply = "" for _ in range(MAX_TOOL_ROUNDS): @@ -295,9 +73,7 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud", memory.remember(session_id, "user", user_msg) memory.remember(session_id, "assistant", reply) - - # Compact this session once enough new turns have piled up. - summary.maybe_summarize_async(session_id) + summary.maybe_summarize_async(session_id) # compact once enough new turns pile up return reply @@ -305,30 +81,17 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud", model_override: str | None = None): """Streaming generator version of `respond`. - Yields ("delta", text) as content streams in, and ("tool", name) when a tool - runs. Persists the full exchange and yields a final ("done", reply) — matching - `respond`'s side effects (memory + compaction) exactly. + Yields ("delta", text) as content streams in, ("tool", name) when a tool runs, + and a final ("done", reply). Persists the exchange — same side effects as `respond`. """ cfg = config.load() - model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get( - backend, backend - ) - if model_override and backend == "cloud": - model = model_override - logbus.log( - "info", "chat request (stream)", session=session_id, backend=backend, - model=model, embed=cfg.embed_backend, - ) + model = _resolve_model(backend, model_override, cfg) + logbus.log("info", "chat request (stream)", session=session_id, backend=backend, + model=model, embed=cfg.embed_backend) - mode = modes.get(memory.get_session_mode(session_id)) - messages = build_messages(session_id, user_msg, mode=mode) - - # Live thought loop: think privately about what to actually say before answering. - note = _deliberation_note(session_id, user_msg, backend, model, messages) - if note: - messages.append(note) - - tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None + turn = mind.assemble(session_id, user_msg, backend, model) + messages = turn.messages + tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None ctx = {"session_id": session_id, "backend": backend} parts: list[str] = [] for _ in range(MAX_TOOL_ROUNDS): diff --git a/lyra/mind.py b/lyra/mind.py new file mode 100644 index 0000000..186e12d --- /dev/null +++ b/lyra/mind.py @@ -0,0 +1,272 @@ +"""The control plane: assemble one turn from a society of small parts. + +This is the explicit version of what used to be inline in `chat.py`. A turn is +built by running an ordered pipeline of *parts* over a shared `TurnContext` +(blackboard): each part reads what it needs and annotates the context, and the +last steps produce the message list `chat` then hands to the voice model. + +P1 (this): the frame, behavior-preserving. The parts wrap the existing logic — + perceive (stub) -> route (the session's mode) -> compose (tiered prompt) -> + deliberate (private 'what do I actually think' pass). +Later phases fill in perceive (read the moment), route (register/intent + model +routing), and a learn loop — see docs/COGNITION.md. Most parts are cheap +deterministic code; the LLM is the exception (deliberate here, speak in `chat`). +""" +from __future__ import annotations + +from dataclasses import dataclass, field + +from lyra import clock, config, llm, logbus, memory, modes, persona, self_state, thoughts +from lyra.llm import Backend, Message + +RECALL_K = 3 # raw cross-session "sharp detail" hits +RECENT_N = 10 # raw turns of the current session +SUMMARY_K = 3 # other-session gists + + +# --- prompt parts (compose) ---------------------------------------------- + +def _mode_state_note(mode: modes.Mode | None) -> str | None: + """Dynamic, per-turn state for the active mode. Currently: surface Alligator + Blood while it's engaged on the live session, so she stays in that register.""" + if not mode or mode.key != modes.CASH.key: + return None + from lyra import poker # local import: keep the core/domain coupling at call time + if poker.alligator_active(): + return ( + "🐊 ALLIGATOR BLOOD is ON for this session. Coach Brian in that register: " + "hang around, refuse to die, don't force miracles, make opponents beat him " + "correctly. Tough, patient, steady — no heroics, no spew, no quitting." + ) + return None + + +def _summary_note(summaries: list[memory.Summary]) -> Message: + lines = [f"- ({(s.session_started_at or s.created_at)[:10]}) {s.content}" for s in summaries] + body = "Gist of earlier sessions (compacted — ask if you need specifics):\n" + "\n".join(lines) + return {"role": "system", "content": body} + + +def _detail_note(exchanges: list[memory.Exchange]) -> Message: + lines = [f"- ({ex.created_at[:10]}, {ex.role}) {ex.content}" for ex in exchanges] + body = "Specific things you recall from past conversations:\n" + "\n".join(lines) + return {"role": "system", "content": body} + + +def _inner_life_note() -> Message | None: + """One coherent window onto what she's been doing on her own since last time — + the threads she's turning over plus the things she's written for herself. Sits + with her self-state so chat reads as a continuous mind, not a fresh boot. The + persona tells her to weave this in naturally when it fits.""" + parts: list[str] = [] + threads = thoughts.context_note() # active threads, with their latest thought + if threads: + parts.append(threads) + wrote = memory.list_journal(limit=3, kinds=("journal", "note")) + if wrote: + lines = "\n".join(f"- ({w['created_at'][:10]}) {w['content']}" for w in reversed(wrote)) + parts.append( + "Things you've written in your journal lately (yours — you can refer back " + "to them if they're relevant):\n" + lines + ) + if not parts: + return None + return {"role": "system", "content": "\n\n".join(parts)} + + +def _now_note() -> Message: + """Current wall-clock time + how long since Brian last said anything.""" + line = f"The current date and time is {clock.stamp()}." + gap = clock.humanize_gap(memory.last_exchange_at()) + line += ( + f" It has been {gap} since Brian last spoke with you." + if gap else " This is the first thing Brian has ever said to you." + ) + return {"role": "system", "content": line} + + +def _render(messages: list[Message]) -> str: + """Human-readable dump of the exact prompt, for the live-log inspector.""" + return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages) + + +def build_messages(session_id: str, user_msg: str, + mode: modes.Mode | None = None) -> list[Message]: + """Assemble the full, tiered message list for one turn.""" + messages: list[Message] = [{"role": "system", "content": persona.system_prompt()}] + + # Autonomy Core: Lyra's own evolving interiority (mood, self-narrative). Comes + # right after the persona — her sense of self before her model of the world. + messages.append({"role": "system", "content": self_state.render_for_context(self_state.load())}) + + # Her ongoing inner life — threads she's turning over + what she's written for + # herself — so chat reads as a continuous mind, not a fresh boot. + inner = _inner_life_note() + if inner: + messages.append(inner) + + # Mode card: how to behave *right now*. Talk mode has no card (persona is Talk). + if mode and mode.card: + messages.append({"role": "system", "content": mode.card}) + + # Live ritual state (e.g. Alligator Blood ON) — dynamic, rides with the card. + state_note = _mode_state_note(mode) + if state_note: + messages.append({"role": "system", "content": state_note}) + + # When she is: current time + the gap since Brian last spoke (she has no clock). + messages.append(_now_note()) + + # Thought loop: if Brian's been away and a thread has built past the surface bar, + # let her lead with it (once) — her #6, bringing what she thought about *to* him. + surfaced = thoughts.maybe_surface(memory.last_exchange_at()) + if surfaced: + messages.append({"role": "system", "content": surfaced}) + + # Semantic memory: the distilled profile (who Brian is). + profile = memory.get_profile() + if profile: + messages.append({"role": "system", "content": "What you know about Brian:\n" + profile}) + + # Time-aware memory: the current narrative (recent arc, trends, callbacks). + narrative = memory.get_narrative() + if narrative: + messages.append({"role": "system", "content": "What's going on with Brian lately:\n" + narrative}) + + recent = memory.recent(session_id, n=RECENT_N) + recent_ids = {ex.id for ex in recent} + + # Tier 1: compacted gists of *other* sessions. + summaries = memory.recall_summaries(user_msg, k=SUMMARY_K, exclude_session=session_id) + if summaries: + messages.append(_summary_note(summaries)) + + # Tier 2: a few sharp raw details from other sessions (so specifics survive). + recalled = [ + ex for ex in memory.recall(user_msg, k=RECALL_K) + if ex.id not in recent_ids and ex.session_id != session_id + ] + if recalled: + messages.append(_detail_note(recalled)) + + # Tier 3: current session, full fidelity. + for ex in recent: + messages.append({"role": ex.role, "content": ex.content}) + + messages.append({"role": "user", "content": user_msg}) + + logbus.log( + "debug", "context built", + recent=len(recent), summaries=len(summaries), details=len(recalled), + chars=sum(len(m["content"]) for m in messages), detail=_render(messages), + ) + return messages + + +# --- deliberation (a private 'what do I actually think' pass) ------------- + +# Trivial acknowledgements that don't warrant a private thinking pass. +_TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah", + "yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"} + + +def _should_deliberate(user_msg: str) -> bool: + m = user_msg.strip().lower().rstrip("!.?") + return len(m) >= 12 and m not in _TRIVIAL + + +_DELIBERATE_SYS = ( + "Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY " + "think about what he just said? Your real take, the specific substance worth giving, any " + "genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and " + "what you actually know if they're relevant. Be concrete; skip pleasantries and generic " + "enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking." +) + + +def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str: + """One private 'what do I actually think' pass before replying. Returns her thinking + (empty on any failure — chat must never break because deliberation hiccuped).""" + try: + out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}], + backend=backend, model=model) + return (out or "").strip() + except Exception as exc: + logbus.log("error", "deliberation failed", error=str(exc)[:160]) + return "" + + +def _answer_from(thinking: str) -> Message: + """The system note that turns private thinking into a grounded, in-voice reply — placed + last (most influential) to beat gpt-4o's default-assistant boilerplate.""" + return {"role": "system", "content": ( + "Your private thinking just now (Brian can't see it):\n" + thinking + + "\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, " + "specific, opinionated. Give the actual substance, not a survey of options. Do NOT " + "default to a numbered list or a how-to outline unless he explicitly asked for steps. " + "No 'would you like to…' / 'let me know' closer — make your point and stop." + )} + + +def _deliberation_note(session_id: str, user_msg: str, backend: Backend, + model: str | None, messages: list[Message]) -> Message | None: + """Run the private thinking pass if warranted; return the answer-from-thinking note.""" + if not config.load().chat_deliberate or not _should_deliberate(user_msg): + return None + thinking = _deliberate(messages, backend, model) + if not thinking: + return None + logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking) + return _answer_from(thinking) + + +# --- the pipeline (a society of parts over a shared blackboard) ----------- + +@dataclass +class TurnContext: + """The blackboard for one turn: parts read what they need and annotate it.""" + session_id: str + user_msg: str + backend: Backend + model: str | None = None + mode: modes.Mode | None = None + moment: dict = field(default_factory=dict) # perceive fills this in (P2) + messages: list[Message] = field(default_factory=list) + + +def _perceive(ctx: TurnContext) -> TurnContext: + """Read the moment (sentiment / kind / tilt). Stub for now — P2 fills it in.""" + ctx.moment = {} + return ctx + + +def _route(ctx: TurnContext) -> TurnContext: + """Pick how she shows up. Manual for now: the mode chosen for this session.""" + ctx.mode = modes.get(memory.get_session_mode(ctx.session_id)) + return ctx + + +def _compose(ctx: TurnContext) -> TurnContext: + """Assemble the tiered prompt for the voice model.""" + ctx.messages = build_messages(ctx.session_id, ctx.user_msg, ctx.mode) + return ctx + + +def _deliberate_part(ctx: TurnContext) -> TurnContext: + """Private 'what do I actually think' pass, appended last so it shapes the reply.""" + note = _deliberation_note(ctx.session_id, ctx.user_msg, ctx.backend, ctx.model, ctx.messages) + if note: + ctx.messages.append(note) + return ctx + + +PIPELINE = (_perceive, _route, _compose, _deliberate_part) + + +def assemble(session_id: str, user_msg: str, backend: Backend, + model: str | None = None) -> TurnContext: + """Run the parts over a fresh TurnContext and return it ready for `chat` to speak.""" + ctx = TurnContext(session_id=session_id, user_msg=user_msg, backend=backend, model=model) + for part in PIPELINE: + ctx = part(ctx) + return ctx diff --git a/tests/test_chat.py b/tests/test_chat.py index 1d3307e..e806dae 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -1,4 +1,4 @@ -"""Live chat: the deliberation pass (think privately before answering).""" +"""The mind pipeline: the deliberation pass (think privately before answering).""" from __future__ import annotations import importlib @@ -13,31 +13,31 @@ def lyra(tmp_path, monkeypatch): monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts]) import lyra.memory as memory importlib.reload(memory) - import lyra.chat as chat - importlib.reload(chat) - return memory, chat + import lyra.mind as mind + importlib.reload(mind) + return memory, mind def test_should_deliberate_skips_trivial(lyra): - _, chat = lyra - assert chat._should_deliberate("How would we actually start building this?") - assert chat._should_deliberate("I disagree, that seems risky") + _, mind = lyra + assert mind._should_deliberate("How would we actually start building this?") + assert mind._should_deliberate("I disagree, that seems risky") for trivial in ("ok", "lol", "thanks", "yeah", "nice", "👍", "k"): - assert not chat._should_deliberate(trivial) - assert not chat._should_deliberate("ok!") # punctuation stripped - assert not chat._should_deliberate("hey") # too short + assert not mind._should_deliberate(trivial) + assert not mind._should_deliberate("ok!") # punctuation stripped + assert not mind._should_deliberate("hey") # too short def test_deliberation_note_runs_and_appends(lyra, monkeypatch): - _, chat = lyra + _, mind = lyra calls = [] def fake_complete(messages, backend=None, model=None): calls.append(messages) return "I actually think the first move is the smallest end-to-end slice." - monkeypatch.setattr(chat.llm, "complete", fake_complete) - note = chat._deliberation_note("s1", "How would we start on this?", "cloud", None, []) + monkeypatch.setattr(mind.llm, "complete", fake_complete) + note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None, []) assert note and note["role"] == "system" assert "first move is the smallest" in note["content"] # her thinking carried in assert "numbered list" in note["content"].lower() # voice enforcement attached @@ -45,9 +45,19 @@ def test_deliberation_note_runs_and_appends(lyra, monkeypatch): def test_deliberation_skipped_when_disabled(lyra, monkeypatch): - _, chat = lyra + _, mind = lyra monkeypatch.setenv("CHAT_DELIBERATE", "false") called = [] - monkeypatch.setattr(chat.llm, "complete", lambda *a, **k: called.append(1) or "x") - assert chat._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None + monkeypatch.setattr(mind.llm, "complete", lambda *a, **k: called.append(1) or "x") + assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None assert called == [] # no LLM call when off + + +def test_assemble_runs_the_pipeline(lyra, monkeypatch): + memory, mind = lyra + monkeypatch.setenv("CHAT_DELIBERATE", "false") # keep it offline for the structure test + memory.ensure_session("s1") + turn = mind.assemble("s1", "hey what's up", "cloud", None) + assert turn.mode is not None # route ran + assert turn.messages and turn.messages[-1]["role"] == "user" # compose ran + assert turn.messages[-1]["content"] == "hey what's up" diff --git a/tests/test_time.py b/tests/test_time.py index cd7d5d8..782a7af 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -39,8 +39,8 @@ def lyra(tmp_path, monkeypatch): def test_now_note_first_contact(lyra): - from lyra import chat - note = chat._now_note()["content"] + from lyra import mind + note = mind._now_note()["content"] assert "current date and time is" in note assert "first thing Brian has ever said" in note @@ -48,6 +48,6 @@ def test_now_note_first_contact(lyra): def test_now_note_reports_gap(lyra): memory = lyra memory.remember("s1", "user", "hey") - from lyra import chat - note = chat._now_note()["content"] + from lyra import mind + note = mind._now_note()["content"] assert "since Brian last spoke with you" in note diff --git a/tests/test_tools.py b/tests/test_tools.py index 8868922..fe48e24 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -9,6 +9,7 @@ import pytest @pytest.fixture def lyra(tmp_path, monkeypatch): monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db")) + monkeypatch.setenv("CHAT_DELIBERATE", "false") # don't make a real LLM call in respond() from lyra import llm monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts]) import lyra.memory as memory