a5477ae15c
She can now *do* things mid-conversation, not just reply. Adds a tool-calling loop to the chat path and her first two tools; the same mechanism will carry the poker tools (start_session, log_result, get_stats, solver) next. - tools.py: registry of OpenAI-style tool specs + handlers + safe dispatch; journal_write (knowing journaling) and note (tagged notepad, e.g. poker reads) - llm.chat_call(): OpenAI-style call that returns tool_calls (cloud/mi50); local has no tool support and returns plain content - chat.respond(): tool loop — offer tools, run any calls, feed results back, repeat until a text reply (capped at MAX_TOOL_ROUNDS); persists final reply - tests: dispatch + full chat loop (tool call -> result -> reply) Verified live: she invoked `note`, tagged it 'poker', stored a villain read. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
105 lines
4.0 KiB
Python
105 lines
4.0 KiB
Python
"""LLM router: local (Ollama) chat, cloud (OpenAI) chat + embeddings."""
|
|
from __future__ import annotations
|
|
|
|
from typing import Literal, TypedDict
|
|
|
|
import httpx
|
|
from openai import OpenAI
|
|
|
|
from lyra.config import load
|
|
|
|
|
|
class Message(TypedDict):
|
|
role: Literal["system", "user", "assistant"]
|
|
content: str
|
|
|
|
|
|
Backend = Literal["local", "cloud", "mi50"]
|
|
|
|
|
|
def complete(messages: list[Message], backend: Backend = "local", model: str | None = None) -> str:
|
|
"""Generate a completion. `model` overrides the backend's default model
|
|
(used so live chat can run a stronger cloud model than bulk consolidation)."""
|
|
cfg = load()
|
|
if backend == "cloud":
|
|
if not cfg.openai_api_key:
|
|
raise RuntimeError("OPENAI_API_KEY is not set")
|
|
client = OpenAI(api_key=cfg.openai_api_key)
|
|
resp = client.chat.completions.create(model=model or cfg.cloud_model, messages=messages)
|
|
return resp.choices[0].message.content or ""
|
|
|
|
if backend == "mi50":
|
|
# MI50 box runs an OpenAI-compatible llama.cpp server; key is unused.
|
|
client = OpenAI(api_key="not-needed", base_url=cfg.mi50_base_url)
|
|
resp = client.chat.completions.create(model=model or cfg.mi50_model, messages=messages)
|
|
return resp.choices[0].message.content or ""
|
|
|
|
resp = httpx.post(
|
|
f"{cfg.local_base_url}/api/chat",
|
|
json={"model": model or cfg.local_model, "messages": messages, "stream": False},
|
|
timeout=120,
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json()["message"]["content"]
|
|
|
|
|
|
def chat_call(
|
|
messages: list, backend: Backend = "cloud", model: str | None = None,
|
|
tools: list | None = None,
|
|
) -> tuple[dict, list | None]:
|
|
"""One chat turn that may request tool calls (OpenAI-style backends only).
|
|
|
|
Returns (assistant_message, tool_calls): `assistant_message` is the raw
|
|
message dict to append back to `messages` before any tool results;
|
|
`tool_calls` is a list of {id, name, arguments} or None. `local` (Ollama)
|
|
has no tool support here, so it just returns plain content.
|
|
"""
|
|
cfg = load()
|
|
if backend in ("cloud", "mi50"):
|
|
if backend == "cloud":
|
|
if not cfg.openai_api_key:
|
|
raise RuntimeError("OPENAI_API_KEY is not set")
|
|
client = OpenAI(api_key=cfg.openai_api_key)
|
|
mdl = model or cfg.cloud_model
|
|
else:
|
|
client = OpenAI(api_key="not-needed", base_url=cfg.mi50_base_url)
|
|
mdl = model or cfg.mi50_model
|
|
kwargs: dict = {"model": mdl, "messages": messages}
|
|
if tools:
|
|
kwargs["tools"] = tools
|
|
msg = client.chat.completions.create(**kwargs).choices[0].message
|
|
tcs = None
|
|
if getattr(msg, "tool_calls", None):
|
|
tcs = [
|
|
{"id": tc.id, "name": tc.function.name, "arguments": tc.function.arguments}
|
|
for tc in msg.tool_calls
|
|
]
|
|
return msg.model_dump(), tcs
|
|
|
|
# local (Ollama): no tool-calling here — return plain content.
|
|
return {"role": "assistant", "content": complete(messages, backend=backend, model=model)}, None
|
|
|
|
|
|
def embed(texts: list[str]) -> list[list[float]]:
|
|
"""Embed texts using the configured backend (EMBED_BACKEND: "cloud" or "local").
|
|
|
|
Note: OpenAI and Ollama embeddings live in different vector spaces (and
|
|
dimensions). A given database is tied to whichever backend created it — don't
|
|
switch EMBED_BACKEND against an existing DB or cosine recall will break.
|
|
"""
|
|
cfg = load()
|
|
if cfg.embed_backend == "local":
|
|
resp = httpx.post(
|
|
f"{cfg.local_base_url}/api/embed",
|
|
json={"model": cfg.local_embed_model, "input": texts},
|
|
timeout=120,
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json()["embeddings"]
|
|
|
|
if not cfg.openai_api_key:
|
|
raise RuntimeError("OPENAI_API_KEY is not set")
|
|
client = OpenAI(api_key=cfg.openai_api_key)
|
|
resp = client.embeddings.create(model=cfg.embed_model, input=texts)
|
|
return [d.embedding for d in resp.data]
|