904eda3388
First step of the cognition control plane (docs/COGNITION.md). The chat turn is now an explicit society of parts over a shared TurnContext blackboard: perceive (stub) -> route (session mode) -> compose (tiered prompt) -> deliberate. - lyra/mind.py (new): TurnContext + the pipeline + assemble(); moved build_messages and the deliberation helpers here (the assembly belongs in the control plane). - lyra/chat.py: slimmed to "speak + persist" — calls mind.assemble(), runs the tool/generation loop, persists. No behavior change (same prompt, same output). - tests: point test_time/test_chat at mind; add an assemble() structure test; make test_chat/test_tools hermetic (CHAT_DELIBERATE off so respond() doesn't make a real LLM call). Suite 86 green in ~5s, ruff clean, no import cycle. This is the frame; perceive/route/learn get filled in next phases — each opt-in. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
130 lines
5.8 KiB
Python
130 lines
5.8 KiB
Python
"""The chat turn: assemble the prompt (lyra.mind) then speak + persist.
|
|
|
|
`mind.assemble()` runs the society of parts (perceive → route → compose →
|
|
deliberate) and hands back a ready message list + the active mode; `chat` runs the
|
|
tool/generation loop (the "speak" part) and persists the exchange. Keeping speak
|
|
here (not in mind) is deliberate — it's tangled with streaming and tool dispatch.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from lyra import config, llm, logbus, memory, mind, modes, summary
|
|
from lyra import tools as toolkit
|
|
from lyra.llm import Backend
|
|
|
|
MAX_TOOL_ROUNDS = 5 # cap tool-call iterations per turn
|
|
# Backends that support function-calling. The MI50's llama.cpp server only does
|
|
# tools when launched with --jinja; until it is, keep tools to cloud so MI50 chat
|
|
# doesn't 500 on the tools param. Add "mi50" here once that flag is set.
|
|
TOOL_BACKENDS = {"cloud"}
|
|
|
|
|
|
def _resolve_model(backend: Backend, model_override: str | None, cfg) -> str:
|
|
"""Live chat uses the stronger chat_model on cloud; local/mi50 use their own.
|
|
The UI's cloud-model picker only applies on the cloud backend."""
|
|
model = {"local": cfg.local_model, "cloud": cfg.chat_model, "mi50": cfg.mi50_model}.get(
|
|
backend, backend
|
|
)
|
|
if model_override and backend == "cloud":
|
|
model = model_override
|
|
return model
|
|
|
|
|
|
def _maybe_switch_mode(session_id: str, tool_name: str) -> None:
|
|
"""Keep the chat framing aligned with the live data: opening a poker session
|
|
auto-flips this chat into Poker mode (next turn gets the card + full live tools).
|
|
Manual UI switching still overrides anytime."""
|
|
if tool_name == "start_session":
|
|
memory.set_session_mode(session_id, modes.CASH.key)
|
|
logbus.log("info", "mode auto-switch", session=session_id, mode=modes.CASH.key)
|
|
|
|
|
|
def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|
model_override: str | None = None) -> str:
|
|
"""Produce Lyra's reply to a single user message and persist the exchange."""
|
|
cfg = config.load()
|
|
model = _resolve_model(backend, model_override, cfg)
|
|
logbus.log("info", "chat request", session=session_id, backend=backend,
|
|
model=model, embed=cfg.embed_backend)
|
|
|
|
turn = mind.assemble(session_id, user_msg, backend, model)
|
|
messages = turn.messages
|
|
|
|
# Tool loop (speak): offer her tools (scoped to the mode); run any she calls and
|
|
# feed results back until she returns a text reply.
|
|
tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
|
|
ctx = {"session_id": session_id, "backend": backend}
|
|
reply = ""
|
|
for _ in range(MAX_TOOL_ROUNDS):
|
|
assistant_msg, tool_calls = llm.chat_call(
|
|
messages, backend=backend, model=model, tools=tool_specs
|
|
)
|
|
if not tool_calls:
|
|
reply = assistant_msg.get("content") or ""
|
|
break
|
|
messages.append(assistant_msg) # her tool-call request
|
|
for tc in tool_calls:
|
|
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
|
|
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
|
|
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
|
_maybe_switch_mode(session_id, tc["name"])
|
|
if not reply:
|
|
reply = "(I got tangled using my tools there — say that again?)"
|
|
logbus.log("info", "reply", session=session_id, chars=len(reply))
|
|
|
|
memory.remember(session_id, "user", user_msg)
|
|
memory.remember(session_id, "assistant", reply)
|
|
summary.maybe_summarize_async(session_id) # compact once enough new turns pile up
|
|
return reply
|
|
|
|
|
|
def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|
model_override: str | None = None):
|
|
"""Streaming generator version of `respond`.
|
|
|
|
Yields ("delta", text) as content streams in, ("tool", name) when a tool runs,
|
|
and a final ("done", reply). Persists the exchange — same side effects as `respond`.
|
|
"""
|
|
cfg = config.load()
|
|
model = _resolve_model(backend, model_override, cfg)
|
|
logbus.log("info", "chat request (stream)", session=session_id, backend=backend,
|
|
model=model, embed=cfg.embed_backend)
|
|
|
|
turn = mind.assemble(session_id, user_msg, backend, model)
|
|
messages = turn.messages
|
|
tool_specs = toolkit.specs(turn.mode.tools) if backend in TOOL_BACKENDS else None
|
|
ctx = {"session_id": session_id, "backend": backend}
|
|
parts: list[str] = []
|
|
for _ in range(MAX_TOOL_ROUNDS):
|
|
assistant_msg = None
|
|
tool_calls = None
|
|
for ev, payload in llm.chat_call_stream(
|
|
messages, backend=backend, model=model, tools=tool_specs
|
|
):
|
|
if ev == "delta":
|
|
parts.append(payload)
|
|
yield ("delta", payload)
|
|
elif ev == "message":
|
|
assistant_msg = payload
|
|
elif ev == "tool_calls":
|
|
tool_calls = payload
|
|
if not tool_calls:
|
|
break
|
|
messages.append(assistant_msg) # her tool-call request
|
|
for tc in tool_calls:
|
|
result = toolkit.dispatch(tc["name"], tc["arguments"], ctx)
|
|
logbus.log("info", "tool call", session=session_id, tool=tc["name"], result=result[:80])
|
|
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
|
_maybe_switch_mode(session_id, tc["name"])
|
|
yield ("tool", tc["name"])
|
|
|
|
reply = "".join(parts)
|
|
if not reply:
|
|
reply = "(I got tangled using my tools there — say that again?)"
|
|
yield ("delta", reply)
|
|
logbus.log("info", "reply", session=session_id, chars=len(reply))
|
|
|
|
memory.remember(session_id, "user", user_msg)
|
|
memory.remember(session_id, "assistant", reply)
|
|
summary.maybe_summarize_async(session_id)
|
|
yield ("done", reply)
|