diff --git a/.env.example b/.env.example index effc370..1f97475 100644 --- a/.env.example +++ b/.env.example @@ -49,3 +49,5 @@ PING_AUTO_SALIENCE=0.8 # a thought this salient auto-pings even without an exp PING_COOLDOWN_MIN=60 # min minutes between AUTO pings (explicit reach-outs bypass) DIGEST_HOUR=18 # local hour to send her daily "what I've been thinking" digest CHAT_DELIBERATE=true # think privately before answering substantive chat turns (false = faster, shallower) +MOUTH_BACKEND= # mind/mouth split: separate character/voice model for the final reply (empty = mind speaks) +MOUTH_MODEL= diff --git a/lyra/chat.py b/lyra/chat.py index 51b0f40..a67d43f 100644 --- a/lyra/chat.py +++ b/lyra/chat.py @@ -1,9 +1,12 @@ """The chat turn: assemble the prompt (lyra.mind) then speak + persist. `mind.assemble()` runs the society of parts (perceive → route → compose → -deliberate) and hands back a ready message list + the active mode; `chat` runs the -tool/generation loop (the "speak" part) and persists the exchange. Keeping speak -here (not in mind) is deliberate — it's tangled with streaming and tool dispatch. +deliberate) and hands back a ready message list + the active mode. Then: + - the MIND (the chat backend/model) runs the tool/generation loop — decide, + reason, run tools — and produces a draft. + - the MOUTH (a separate character model, if configured) re-voices that draft in + her own voice. Default: no mouth configured → the mind's draft IS the reply + (bit-for-bit the old behavior). The mouth slot is where a fine-tuned voice lands. """ from __future__ import annotations @@ -16,6 +19,7 @@ MAX_TOOL_ROUNDS = 5 # cap tool-call iterations per turn # tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat # doesn't 500 on the tools param. Add "mi50" here once that flag is set. TOOL_BACKENDS = {"cloud"} +_TANGLED = "(I got tangled using my tools there — say that again?)" def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str: @@ -29,15 +33,59 @@ def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str: return model +def _mouth_target(cfg, mind_backend: Backend, mind_model: str | None): + """The mouth (backend, model) if configured AND different from the mind; else None + (mouth == mind → no separate voice pass).""" + if not cfg.mouth_backend and not cfg.mouth_model: + return None + backend = cfg.mouth_backend or mind_backend + model = cfg.mouth_model or None + if backend == mind_backend and model == mind_model: + return None + return backend, model + + def _maybe_switch_mode(session_id: str, tool_name: str) -> None: - """Keep the chat framing aligned with the live data: opening a poker session - auto-flips this chat into Poker mode (next turn gets the card + full live tools). - Manual UI switching still overrides anytime.""" + """Opening a poker session auto-flips this chat into Poker mode. Manual UI switching + still overrides anytime.""" if tool_name == "start_session": memory.set_session_mode(session_id, modes.CASH.key) logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key) +def _mind_loop(messages, backend: Backend, model: str | None, tool_specs, + ctx: dict, session_id: str) -> tuple[str, list[str]]: + """Run the tool/generation loop on the MIND model (non-streaming). Mutates + `messages` with tool calls/results. Returns (draft_reply, tool_names_run).""" + tools_run: list[str] = [] + reply = "" + for _ in range(MAX_TOOL_ROUNDS): + assistant_msg, tool_calls = llm.chat_call( + messages, backend=backend, model=model, tools=tool_specs + ) + if not tool_calls: + reply = assistant_msg.get("content") or "" + break + messages.append(assistant_msg) + for tc in tool_calls: + result = toolkit.dispatch(tc["name"], tc["arguments"], ctx) + logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80]) + messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result}) + _maybe_switch_mode(session_id, tc["name"]) + tools_run.append(tc["name"]) + return reply, tools_run + + +def _voice_pass(messages, draft: str, backend: Backend, model: str | None) -> str: + """Mouth: re-render the mind's draft in her voice. Falls back to the draft on failure.""" + try: + out = llm.complete(mind.voice_messages(messages, draft), backend=backend, model=model) + return (out or "").strip() or draft + except Exception as exc: + logbus.log("error", "voice pass failed", error=str(exc)[:160]) + return draft + + def respond(session_id: str, user_msg: str, backend: Backend = "cloud", model_override: str | None = None) -> str: """Produce Lyra's reply to a single user message and persist the exchange.""" @@ -48,28 +96,16 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud", turn = mind.assemble(session_id, user_msg, backend, model) messages = turn.messages - - # Tool loop (speak): offer her tools (scoped to the mode); run any she calls and - # feed results back until she returns a text reply. tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None ctx = {"session_id": session_id, "backend": backend} - reply = "" - for _ in range(MAX_TOOL_ROUNDS): - assistant_msg, tool_calls = llm.chat_call( - messages, backend=backend, model=model, tools=tool_specs - ) - if not tool_calls: - reply = assistant_msg.get("content") or "" - break - messages.append(assistant_msg) # her tool-call request - for tc in tool_calls: - result = toolkit.dispatch(tc["name"], tc["arguments"], ctx) - logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80]) - messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result}) - _maybe_switch_mode(session_id, tc["name"]) + + reply, _ = _mind_loop(messages, backend, model, tool_specs, ctx, session_id) + mouth = _mouth_target(cfg, backend, model) + if mouth and reply: + reply = _voice_pass(messages, reply, *mouth) if not reply: - reply = "(I got tangled using my tools there — say that again?)" - logbus.log("info", "reply", session=session_id, chars=len(reply)) + reply = _TANGLED + logbus.log("info", "reply", session=session_id, chars=len(reply), voiced=bool(mouth)) memory.remember(session_id, "user", user_msg) memory.remember(session_id, "assistant", reply) @@ -79,11 +115,8 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud", def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud", model_override: str | None = None): - """Streaming generator version of `respond`. - - Yields ("delta", text) as content streams in, ("tool", name) when a tool runs, - and a final ("done", reply). Persists the exchange — same side effects as `respond`. - """ + """Streaming generator version of `respond`. Yields ("delta", text), ("tool", name), + and a final ("done", reply). Same side effects as `respond`.""" cfg = config.load() model = _resolve_model(backend, model_override, cfg) logbus.log("info", "chat request (stream)", session=session_id, backend=backend, @@ -93,36 +126,57 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud", messages = turn.messages tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None ctx = {"session_id": session_id, "backend": backend} - parts: list[str] = [] - for _ in range(MAX_TOOL_ROUNDS): - assistant_msg = None - tool_calls = None - for ev, payload in llm.chat_call_stream( - messages, backend=backend, model=model, tools=tool_specs - ): - if ev == "delta": - parts.append(payload) - yield ("delta", payload) - elif ev == "message": - assistant_msg = payload - elif ev == "tool_calls": - tool_calls = payload - if not tool_calls: - break - messages.append(assistant_msg) # her tool-call request - for tc in tool_calls: - result = toolkit.dispatch(tc["name"], tc["arguments"], ctx) - logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80]) - messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result}) - _maybe_switch_mode(session_id, tc["name"]) - yield ("tool", tc["name"]) + mouth = _mouth_target(cfg, backend, model) - reply = "".join(parts) - if not reply: - reply = "(I got tangled using my tools there — say that again?)" - yield ("delta", reply) - logbus.log("info", "reply", session=session_id, chars=len(reply)) + if mouth is None: + # No separate voice: stream the mind directly (the original path, unchanged). + parts: list[str] = [] + for _ in range(MAX_TOOL_ROUNDS): + assistant_msg = None + tool_calls = None + for ev, payload in llm.chat_call_stream( + messages, backend=backend, model=model, tools=tool_specs + ): + if ev == "delta": + parts.append(payload) + yield ("delta", payload) + elif ev == "message": + assistant_msg = payload + elif ev == "tool_calls": + tool_calls = payload + if not tool_calls: + break + messages.append(assistant_msg) + for tc in tool_calls: + result = toolkit.dispatch(tc["name"], tc["arguments"], ctx) + logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80]) + messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result}) + _maybe_switch_mode(session_id, tc["name"]) + yield ("tool", tc["name"]) + reply = "".join(parts) + if not reply: + reply = _TANGLED + yield ("delta", reply) + else: + # Mind decides + runs tools (non-streamed); mouth re-voices, streamed. + draft, tools_run = _mind_loop(messages, backend, model, tool_specs, ctx, session_id) + for name in tools_run: + yield ("tool", name) + parts = [] + try: + for ev, payload in llm.chat_call_stream( + mind.voice_messages(messages, draft), backend=mouth[0], model=mouth[1], tools=None + ): + if ev == "delta": + parts.append(payload) + yield ("delta", payload) + except Exception as exc: + logbus.log("error", "voice stream failed", error=str(exc)[:160]) + reply = "".join(parts).strip() or draft or _TANGLED + if not parts: + yield ("delta", reply) + logbus.log("info", "reply", session=session_id, chars=len(reply), voiced=bool(mouth)) memory.remember(session_id, "user", user_msg) memory.remember(session_id, "assistant", reply) summary.maybe_summarize_async(session_id) diff --git a/lyra/config.py b/lyra/config.py index de97d8a..e9a84ba 100644 --- a/lyra/config.py +++ b/lyra/config.py @@ -38,6 +38,11 @@ class Config: ping_quiet_hours: str # local "start-end" 24h window to stay silent, e.g. "1-9" digest_hour: int # local hour (0-23) to send her daily "what I've been thinking" digest chat_deliberate: bool # think privately before answering substantive chat turns + # Mind/mouth split: the mind (the chat backend/model above) decides, reasons, and + # runs tools; the mouth re-voices the final reply in her character. Empty = mouth + # is the mind (no separate pass) — the slot for an eventual fine-tuned voice. + mouth_backend: str + mouth_model: str | None # External input feed (her #1: react to the world). Comma-separated RSS/Atom URLs. feeds: tuple[str, ...] feed_react_prob: float # chance a would-be new thread reacts to a feed item instead @@ -81,6 +86,8 @@ def load() -> Config: ping_quiet_hours=os.getenv("PING_QUIET_HOURS", "1-9"), digest_hour=int(os.getenv("DIGEST_HOUR", "18")), chat_deliberate=os.getenv("CHAT_DELIBERATE", "true").lower() not in ("0", "false", "no"), + mouth_backend=os.getenv("MOUTH_BACKEND", "").lower(), + mouth_model=os.getenv("MOUTH_MODEL") or None, feeds=_csv("LYRA_FEEDS", "https://hnrss.org/frontpage,https://www.pokernews.com/rss.php"), feed_react_prob=float(os.getenv("FEED_REACT_PROB", "0.5")), ) diff --git a/lyra/mind.py b/lyra/mind.py index 71babb5..31202d7 100644 --- a/lyra/mind.py +++ b/lyra/mind.py @@ -290,6 +290,25 @@ def _deliberate_part(ctx: TurnContext) -> TurnContext: PIPELINE = (_perceive, _route, _compose, _deliberate_part) +# --- mouth (the voice pass: re-render the mind's draft in her character) ----- + +_VOICE_NOTE = ( + "↑ That was you working the answer out — a draft Brian has NOT seen. Now say it to him " + "in your own voice: warm, direct, specific, in character, opinionated. Keep every fact, " + "number, name, and decision exactly as in the draft — change only the wording so it sounds " + "like you, not a generic assistant. No preamble, no meta, no 'here's a friendlier version' " + "— just your actual message to Brian." +) + + +def voice_messages(messages: list[Message], draft: str) -> list[Message]: + """Prompt for the mouth model: the full turn context + the mind's draft to re-voice.""" + return messages + [ + {"role": "assistant", "content": draft}, + {"role": "system", "content": _VOICE_NOTE}, + ] + + def assemble(session_id: str, user_msg: str, backend: Backend, model: str | None = None) -> TurnContext: """Run the parts over a fresh TurnContext and return it ready for `chat` to speak.""" diff --git a/tests/test_chat.py b/tests/test_chat.py index e806dae..f0e8fd9 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -61,3 +61,46 @@ def test_assemble_runs_the_pipeline(lyra, monkeypatch): assert turn.mode is not None # route ran assert turn.messages and turn.messages[-1]["role"] == "user" # compose ran assert turn.messages[-1]["content"] == "hey what's up" + + +# --- mind/mouth split (P3) ---------------------------------------------- + +def test_mouth_target_off_by_default(monkeypatch): + import importlib + from lyra import config + monkeypatch.delenv("MOUTH_BACKEND", raising=False) + monkeypatch.delenv("MOUTH_MODEL", raising=False) + import lyra.chat as chat + importlib.reload(chat) + assert chat._mouth_target(config.load(), "cloud", "gpt-4o") is None # mouth == mind + + +def test_mouth_target_when_configured(monkeypatch): + import importlib + from lyra import config + monkeypatch.setenv("MOUTH_BACKEND", "local") + monkeypatch.setenv("MOUTH_MODEL", "dolphin3:8b") + import lyra.chat as chat + importlib.reload(chat) + assert chat._mouth_target(config.load(), "cloud", "gpt-4o") == ("local", "dolphin3:8b") + + +def test_voice_messages_carries_draft_and_instruction(lyra): + _, mind = lyra + out = mind.voice_messages([{"role": "user", "content": "hi"}], "draft with FACT 42") + assert out[-2] == {"role": "assistant", "content": "draft with FACT 42"} + assert out[-1]["role"] == "system" and "your own voice" in out[-1]["content"].lower() + + +def test_voice_pass_revoices_then_falls_back(lyra, monkeypatch): + _, mind = lyra + import importlib + import lyra.chat as chat + importlib.reload(chat) + monkeypatch.setattr(chat.llm, "complete", lambda msgs, backend=None, model=None: "voiced (FACT 42)") + assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "voiced (FACT 42)" + # on failure it keeps the mind's draft (chat must not break) + def boom(*a, **k): + raise RuntimeError("mouth down") + monkeypatch.setattr(chat.llm, "complete", boom) + assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "draft FACT 42"