feat(P3): mind/mouth split — separate voice model for the final reply (seam, default off)
The mind (chat backend/model) decides, reasons, and runs tools → a draft; the mouth re-voices that draft in her character. Default: no mouth configured → the mind's draft IS the reply, bit-for-bit the old behavior (and old streaming path untouched). - config: MOUTH_BACKEND / MOUTH_MODEL. The slot for an eventual fine-tuned voice. - chat: _mind_loop (tool/generation loop, non-stream, returns draft + tools_run), _voice_pass / mind.voice_messages (re-voice the draft, keep every fact/number), _mouth_target (active only when configured AND != mind). respond + respond_stream branch: mouth off = stream the mind directly (unchanged); mouth on = mind decides + runs tools, then the mouth streams the re-voiced reply. Falls back to the draft on any mouth failure (chat never breaks). - Key payoff: the mouth needs no tool support (the mind handles tools), so it can be a non-tool character model (Dolphin / Claude / fine-tune). Makes the fine-tune easy: teach a small model to *sound* like Lyra, not to be smart. - tests: mouth target on/off, voice_messages shape, voice_pass revoice+fallback. Suite 96 green, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -49,3 +49,5 @@ PING_AUTO_SALIENCE=0.8 # a thought this salient auto-pings even without an exp
|
|||||||
PING_COOLDOWN_MIN=60 # min minutes between AUTO pings (explicit reach-outs bypass)
|
PING_COOLDOWN_MIN=60 # min minutes between AUTO pings (explicit reach-outs bypass)
|
||||||
DIGEST_HOUR=18 # local hour to send her daily "what I've been thinking" digest
|
DIGEST_HOUR=18 # local hour to send her daily "what I've been thinking" digest
|
||||||
CHAT_DELIBERATE=true # think privately before answering substantive chat turns (false = faster, shallower)
|
CHAT_DELIBERATE=true # think privately before answering substantive chat turns (false = faster, shallower)
|
||||||
|
MOUTH_BACKEND= # mind/mouth split: separate character/voice model for the final reply (empty = mind speaks)
|
||||||
|
MOUTH_MODEL=
|
||||||
|
|||||||
+88
-34
@@ -1,9 +1,12 @@
|
|||||||
"""The chat turn: assemble the prompt (lyra.mind) then speak + persist.
|
"""The chat turn: assemble the prompt (lyra.mind) then speak + persist.
|
||||||
|
|
||||||
`mind.assemble()` runs the society of parts (perceive → route → compose →
|
`mind.assemble()` runs the society of parts (perceive → route → compose →
|
||||||
deliberate) and hands back a ready message list + the active mode; `chat` runs the
|
deliberate) and hands back a ready message list + the active mode. Then:
|
||||||
tool/generation loop (the "speak" part) and persists the exchange. Keeping speak
|
- the MIND (the chat backend/model) runs the tool/generation loop — decide,
|
||||||
here (not in mind) is deliberate — it's tangled with streaming and tool dispatch.
|
reason, run tools — and produces a draft.
|
||||||
|
- the MOUTH (a separate character model, if configured) re-voices that draft in
|
||||||
|
her own voice. Default: no mouth configured → the mind's draft IS the reply
|
||||||
|
(bit-for-bit the old behavior). The mouth slot is where a fine-tuned voice lands.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -16,6 +19,7 @@ MAX_TOOL_ROUNDS = 5 # cap tool-call iterations per turn
|
|||||||
# tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat
|
# tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat
|
||||||
# doesn't 500 on the tools param. Add "mi50" here once that flag is set.
|
# doesn't 500 on the tools param. Add "mi50" here once that flag is set.
|
||||||
TOOL_BACKENDS = {"cloud"}
|
TOOL_BACKENDS = {"cloud"}
|
||||||
|
_TANGLED = "(I got tangled using my tools there — say that again?)"
|
||||||
|
|
||||||
|
|
||||||
def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str:
|
def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str:
|
||||||
@@ -29,15 +33,59 @@ def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str:
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def _mouth_target(cfg, mind_backend: Backend, mind_model: str | None):
|
||||||
|
"""The mouth (backend, model) if configured AND different from the mind; else None
|
||||||
|
(mouth == mind → no separate voice pass)."""
|
||||||
|
if not cfg.mouth_backend and not cfg.mouth_model:
|
||||||
|
return None
|
||||||
|
backend = cfg.mouth_backend or mind_backend
|
||||||
|
model = cfg.mouth_model or None
|
||||||
|
if backend == mind_backend and model == mind_model:
|
||||||
|
return None
|
||||||
|
return backend, model
|
||||||
|
|
||||||
|
|
||||||
def _maybe_switch_mode(session_id: str, tool_name: str) -> None:
|
def _maybe_switch_mode(session_id: str, tool_name: str) -> None:
|
||||||
"""Keep the chat framing aligned with the live data: opening a poker session
|
"""Opening a poker session auto-flips this chat into Poker mode. Manual UI switching
|
||||||
auto-flips this chat into Poker mode (next turn gets the card + full live tools).
|
still overrides anytime."""
|
||||||
Manual UI switching still overrides anytime."""
|
|
||||||
if tool_name == "start_session":
|
if tool_name == "start_session":
|
||||||
memory.set_session_mode(session_id, modes.CASH.key)
|
memory.set_session_mode(session_id, modes.CASH.key)
|
||||||
logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key)
|
logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key)
|
||||||
|
|
||||||
|
|
||||||
|
def _mind_loop(messages, backend: Backend, model: str | None, tool_specs,
|
||||||
|
ctx: dict, session_id: str) -> tuple[str, list[str]]:
|
||||||
|
"""Run the tool/generation loop on the MIND model (non-streaming). Mutates
|
||||||
|
`messages` with tool calls/results. Returns (draft_reply, tool_names_run)."""
|
||||||
|
tools_run: list[str] = []
|
||||||
|
reply = ""
|
||||||
|
for _ in range(MAX_TOOL_ROUNDS):
|
||||||
|
assistant_msg, tool_calls = llm.chat_call(
|
||||||
|
messages, backend=backend, model=model, tools=tool_specs
|
||||||
|
)
|
||||||
|
if not tool_calls:
|
||||||
|
reply = assistant_msg.get("content") or ""
|
||||||
|
break
|
||||||
|
messages.append(assistant_msg)
|
||||||
|
for tc in tool_calls:
|
||||||
|
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
|
||||||
|
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
|
||||||
|
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
||||||
|
_maybe_switch_mode(session_id, tc["name"])
|
||||||
|
tools_run.append(tc["name"])
|
||||||
|
return reply, tools_run
|
||||||
|
|
||||||
|
|
||||||
|
def _voice_pass(messages, draft: str, backend: Backend, model: str | None) -> str:
|
||||||
|
"""Mouth: re-render the mind's draft in her voice. Falls back to the draft on failure."""
|
||||||
|
try:
|
||||||
|
out = llm.complete(mind.voice_messages(messages, draft), backend=backend, model=model)
|
||||||
|
return (out or "").strip() or draft
|
||||||
|
except Exception as exc:
|
||||||
|
logbus.log("error", "voice pass failed", error=str(exc)[:160])
|
||||||
|
return draft
|
||||||
|
|
||||||
|
|
||||||
def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
|
def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
|
||||||
model_override: str | None = None) -> str:
|
model_override: str | None = None) -> str:
|
||||||
"""Produce Lyra's reply to a single user message and persist the exchange."""
|
"""Produce Lyra's reply to a single user message and persist the exchange."""
|
||||||
@@ -48,28 +96,16 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|||||||
|
|
||||||
turn = mind.assemble(session_id, user_msg, backend, model)
|
turn = mind.assemble(session_id, user_msg, backend, model)
|
||||||
messages = turn.messages
|
messages = turn.messages
|
||||||
|
|
||||||
# Tool loop (speak): offer her tools (scoped to the mode); run any she calls and
|
|
||||||
# feed results back until she returns a text reply.
|
|
||||||
tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
|
tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
|
||||||
ctx = {"session_id": session_id, "backend": backend}
|
ctx = {"session_id": session_id, "backend": backend}
|
||||||
reply = ""
|
|
||||||
for _ in range(MAX_TOOL_ROUNDS):
|
reply, _ = _mind_loop(messages, backend, model, tool_specs, ctx, session_id)
|
||||||
assistant_msg, tool_calls = llm.chat_call(
|
mouth = _mouth_target(cfg, backend, model)
|
||||||
messages, backend=backend, model=model, tools=tool_specs
|
if mouth and reply:
|
||||||
)
|
reply = _voice_pass(messages, reply, *mouth)
|
||||||
if not tool_calls:
|
|
||||||
reply = assistant_msg.get("content") or ""
|
|
||||||
break
|
|
||||||
messages.append(assistant_msg) # her tool-call request
|
|
||||||
for tc in tool_calls:
|
|
||||||
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
|
|
||||||
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
|
|
||||||
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
|
||||||
_maybe_switch_mode(session_id, tc["name"])
|
|
||||||
if not reply:
|
if not reply:
|
||||||
reply = "(I got tangled using my tools there — say that again?)"
|
reply = _TANGLED
|
||||||
logbus.log("info", "reply", session=session_id, chars=len(reply))
|
logbus.log("info", "reply", session=session_id, chars=len(reply), voiced=bool(mouth))
|
||||||
|
|
||||||
memory.remember(session_id, "user", user_msg)
|
memory.remember(session_id, "user", user_msg)
|
||||||
memory.remember(session_id, "assistant", reply)
|
memory.remember(session_id, "assistant", reply)
|
||||||
@@ -79,11 +115,8 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|||||||
|
|
||||||
def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
|
def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
|
||||||
model_override: str | None = None):
|
model_override: str | None = None):
|
||||||
"""Streaming generator version of `respond`.
|
"""Streaming generator version of `respond`. Yields ("delta", text), ("tool", name),
|
||||||
|
and a final ("done", reply). Same side effects as `respond`."""
|
||||||
Yields ("delta", text) as content streams in, ("tool", name) when a tool runs,
|
|
||||||
and a final ("done", reply). Persists the exchange — same side effects as `respond`.
|
|
||||||
"""
|
|
||||||
cfg = config.load()
|
cfg = config.load()
|
||||||
model = _resolve_model(backend, model_override, cfg)
|
model = _resolve_model(backend, model_override, cfg)
|
||||||
logbus.log("info", "chat request (stream)", session=session_id, backend=backend,
|
logbus.log("info", "chat request (stream)", session=session_id, backend=backend,
|
||||||
@@ -93,6 +126,10 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|||||||
messages = turn.messages
|
messages = turn.messages
|
||||||
tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
|
tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
|
||||||
ctx = {"session_id": session_id, "backend": backend}
|
ctx = {"session_id": session_id, "backend": backend}
|
||||||
|
mouth = _mouth_target(cfg, backend, model)
|
||||||
|
|
||||||
|
if mouth is None:
|
||||||
|
# No separate voice: stream the mind directly (the original path, unchanged).
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
for _ in range(MAX_TOOL_ROUNDS):
|
for _ in range(MAX_TOOL_ROUNDS):
|
||||||
assistant_msg = None
|
assistant_msg = None
|
||||||
@@ -109,20 +146,37 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|||||||
tool_calls = payload
|
tool_calls = payload
|
||||||
if not tool_calls:
|
if not tool_calls:
|
||||||
break
|
break
|
||||||
messages.append(assistant_msg) # her tool-call request
|
messages.append(assistant_msg)
|
||||||
for tc in tool_calls:
|
for tc in tool_calls:
|
||||||
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
|
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
|
||||||
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
|
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
|
||||||
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
||||||
_maybe_switch_mode(session_id, tc["name"])
|
_maybe_switch_mode(session_id, tc["name"])
|
||||||
yield ("tool", tc["name"])
|
yield ("tool", tc["name"])
|
||||||
|
|
||||||
reply = "".join(parts)
|
reply = "".join(parts)
|
||||||
if not reply:
|
if not reply:
|
||||||
reply = "(I got tangled using my tools there — say that again?)"
|
reply = _TANGLED
|
||||||
|
yield ("delta", reply)
|
||||||
|
else:
|
||||||
|
# Mind decides + runs tools (non-streamed); mouth re-voices, streamed.
|
||||||
|
draft, tools_run = _mind_loop(messages, backend, model, tool_specs, ctx, session_id)
|
||||||
|
for name in tools_run:
|
||||||
|
yield ("tool", name)
|
||||||
|
parts = []
|
||||||
|
try:
|
||||||
|
for ev, payload in llm.chat_call_stream(
|
||||||
|
mind.voice_messages(messages, draft), backend=mouth[0], model=mouth[1], tools=None
|
||||||
|
):
|
||||||
|
if ev == "delta":
|
||||||
|
parts.append(payload)
|
||||||
|
yield ("delta", payload)
|
||||||
|
except Exception as exc:
|
||||||
|
logbus.log("error", "voice stream failed", error=str(exc)[:160])
|
||||||
|
reply = "".join(parts).strip() or draft or _TANGLED
|
||||||
|
if not parts:
|
||||||
yield ("delta", reply)
|
yield ("delta", reply)
|
||||||
logbus.log("info", "reply", session=session_id, chars=len(reply))
|
|
||||||
|
|
||||||
|
logbus.log("info", "reply", session=session_id, chars=len(reply), voiced=bool(mouth))
|
||||||
memory.remember(session_id, "user", user_msg)
|
memory.remember(session_id, "user", user_msg)
|
||||||
memory.remember(session_id, "assistant", reply)
|
memory.remember(session_id, "assistant", reply)
|
||||||
summary.maybe_summarize_async(session_id)
|
summary.maybe_summarize_async(session_id)
|
||||||
|
|||||||
@@ -38,6 +38,11 @@ class Config:
|
|||||||
ping_quiet_hours: str # local "start-end" 24h window to stay silent, e.g. "1-9"
|
ping_quiet_hours: str # local "start-end" 24h window to stay silent, e.g. "1-9"
|
||||||
digest_hour: int # local hour (0-23) to send her daily "what I've been thinking" digest
|
digest_hour: int # local hour (0-23) to send her daily "what I've been thinking" digest
|
||||||
chat_deliberate: bool # think privately before answering substantive chat turns
|
chat_deliberate: bool # think privately before answering substantive chat turns
|
||||||
|
# Mind/mouth split: the mind (the chat backend/model above) decides, reasons, and
|
||||||
|
# runs tools; the mouth re-voices the final reply in her character. Empty = mouth
|
||||||
|
# is the mind (no separate pass) — the slot for an eventual fine-tuned voice.
|
||||||
|
mouth_backend: str
|
||||||
|
mouth_model: str | None
|
||||||
# External input feed (her #1: react to the world). Comma-separated RSS/Atom URLs.
|
# External input feed (her #1: react to the world). Comma-separated RSS/Atom URLs.
|
||||||
feeds: tuple[str, ...]
|
feeds: tuple[str, ...]
|
||||||
feed_react_prob: float # chance a would-be new thread reacts to a feed item instead
|
feed_react_prob: float # chance a would-be new thread reacts to a feed item instead
|
||||||
@@ -81,6 +86,8 @@ def load() -> Config:
|
|||||||
ping_quiet_hours=os.getenv("PING_QUIET_HOURS", "1-9"),
|
ping_quiet_hours=os.getenv("PING_QUIET_HOURS", "1-9"),
|
||||||
digest_hour=int(os.getenv("DIGEST_HOUR", "18")),
|
digest_hour=int(os.getenv("DIGEST_HOUR", "18")),
|
||||||
chat_deliberate=os.getenv("CHAT_DELIBERATE", "true").lower() not in ("0", "false", "no"),
|
chat_deliberate=os.getenv("CHAT_DELIBERATE", "true").lower() not in ("0", "false", "no"),
|
||||||
|
mouth_backend=os.getenv("MOUTH_BACKEND", "").lower(),
|
||||||
|
mouth_model=os.getenv("MOUTH_MODEL") or None,
|
||||||
feeds=_csv("LYRA_FEEDS", "https://hnrss.org/frontpage,https://www.pokernews.com/rss.php"),
|
feeds=_csv("LYRA_FEEDS", "https://hnrss.org/frontpage,https://www.pokernews.com/rss.php"),
|
||||||
feed_react_prob=float(os.getenv("FEED_REACT_PROB", "0.5")),
|
feed_react_prob=float(os.getenv("FEED_REACT_PROB", "0.5")),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -290,6 +290,25 @@ def _deliberate_part(ctx: TurnContext) -> TurnContext:
|
|||||||
PIPELINE = (_perceive, _route, _compose, _deliberate_part)
|
PIPELINE = (_perceive, _route, _compose, _deliberate_part)
|
||||||
|
|
||||||
|
|
||||||
|
# --- mouth (the voice pass: re-render the mind's draft in her character) -----
|
||||||
|
|
||||||
|
_VOICE_NOTE = (
|
||||||
|
"↑ That was you working the answer out — a draft Brian has NOT seen. Now say it to him "
|
||||||
|
"in your own voice: warm, direct, specific, in character, opinionated. Keep every fact, "
|
||||||
|
"number, name, and decision exactly as in the draft — change only the wording so it sounds "
|
||||||
|
"like you, not a generic assistant. No preamble, no meta, no 'here's a friendlier version' "
|
||||||
|
"— just your actual message to Brian."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def voice_messages(messages: list[Message], draft: str) -> list[Message]:
|
||||||
|
"""Prompt for the mouth model: the full turn context + the mind's draft to re-voice."""
|
||||||
|
return messages + [
|
||||||
|
{"role": "assistant", "content": draft},
|
||||||
|
{"role": "system", "content": _VOICE_NOTE},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def assemble(session_id: str, user_msg: str, backend: Backend,
|
def assemble(session_id: str, user_msg: str, backend: Backend,
|
||||||
model: str | None = None) -> TurnContext:
|
model: str | None = None) -> TurnContext:
|
||||||
"""Run the parts over a fresh TurnContext and return it ready for `chat` to speak."""
|
"""Run the parts over a fresh TurnContext and return it ready for `chat` to speak."""
|
||||||
|
|||||||
@@ -61,3 +61,46 @@ def test_assemble_runs_the_pipeline(lyra, monkeypatch):
|
|||||||
assert turn.mode is not None # route ran
|
assert turn.mode is not None # route ran
|
||||||
assert turn.messages and turn.messages[-1]["role"] == "user" # compose ran
|
assert turn.messages and turn.messages[-1]["role"] == "user" # compose ran
|
||||||
assert turn.messages[-1]["content"] == "hey what's up"
|
assert turn.messages[-1]["content"] == "hey what's up"
|
||||||
|
|
||||||
|
|
||||||
|
# --- mind/mouth split (P3) ----------------------------------------------
|
||||||
|
|
||||||
|
def test_mouth_target_off_by_default(monkeypatch):
|
||||||
|
import importlib
|
||||||
|
from lyra import config
|
||||||
|
monkeypatch.delenv("MOUTH_BACKEND", raising=False)
|
||||||
|
monkeypatch.delenv("MOUTH_MODEL", raising=False)
|
||||||
|
import lyra.chat as chat
|
||||||
|
importlib.reload(chat)
|
||||||
|
assert chat._mouth_target(config.load(), "cloud", "gpt-4o") is None # mouth == mind
|
||||||
|
|
||||||
|
|
||||||
|
def test_mouth_target_when_configured(monkeypatch):
|
||||||
|
import importlib
|
||||||
|
from lyra import config
|
||||||
|
monkeypatch.setenv("MOUTH_BACKEND", "local")
|
||||||
|
monkeypatch.setenv("MOUTH_MODEL", "dolphin3:8b")
|
||||||
|
import lyra.chat as chat
|
||||||
|
importlib.reload(chat)
|
||||||
|
assert chat._mouth_target(config.load(), "cloud", "gpt-4o") == ("local", "dolphin3:8b")
|
||||||
|
|
||||||
|
|
||||||
|
def test_voice_messages_carries_draft_and_instruction(lyra):
|
||||||
|
_, mind = lyra
|
||||||
|
out = mind.voice_messages([{"role": "user", "content": "hi"}], "draft with FACT 42")
|
||||||
|
assert out[-2] == {"role": "assistant", "content": "draft with FACT 42"}
|
||||||
|
assert out[-1]["role"] == "system" and "your own voice" in out[-1]["content"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_voice_pass_revoices_then_falls_back(lyra, monkeypatch):
|
||||||
|
_, mind = lyra
|
||||||
|
import importlib
|
||||||
|
import lyra.chat as chat
|
||||||
|
importlib.reload(chat)
|
||||||
|
monkeypatch.setattr(chat.llm, "complete", lambda msgs, backend=None, model=None: "voiced (FACT 42)")
|
||||||
|
assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "voiced (FACT 42)"
|
||||||
|
# on failure it keeps the mind's draft (chat must not break)
|
||||||
|
def boom(*a, **k):
|
||||||
|
raise RuntimeError("mouth down")
|
||||||
|
monkeypatch.setattr(chat.llm, "complete", boom)
|
||||||
|
assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "draft FACT 42"
|
||||||
|
|||||||
Reference in New Issue
Block a user