Files
project-lyra/lyra/llm.py
T
serversdown a5477ae15c feat: tool use — Lyra's first real actions (journal_write, note)
She can now *do* things mid-conversation, not just reply. Adds a tool-calling
loop to the chat path and her first two tools; the same mechanism will carry the
poker tools (start_session, log_result, get_stats, solver) next.

- tools.py: registry of OpenAI-style tool specs + handlers + safe dispatch;
  journal_write (knowing journaling) and note (tagged notepad, e.g. poker reads)
- llm.chat_call(): OpenAI-style call that returns tool_calls (cloud/mi50);
  local has no tool support and returns plain content
- chat.respond(): tool loop — offer tools, run any calls, feed results back,
  repeat until a text reply (capped at MAX_TOOL_ROUNDS); persists final reply
- tests: dispatch + full chat loop (tool call -> result -> reply)

Verified live: she invoked `note`, tagged it 'poker', stored a villain read.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-17 19:04:34 +00:00

105 lines
4.0 KiB
Python

"""LLM router: local (Ollama) chat, cloud (OpenAI) chat + embeddings."""
from __future__ import annotations
from typing import Literal, TypedDict
import httpx
from openai import OpenAI
from lyra.config import load
class Message(TypedDict):
role: Literal["system", "user", "assistant"]
content: str
Backend = Literal["local", "cloud", "mi50"]
def complete(messages: list[Message], backend: Backend = "local", model: str | None = None) -> str:
"""Generate a completion. `model` overrides the backend's default model
(used so live chat can run a stronger cloud model than bulk consolidation)."""
cfg = load()
if backend == "cloud":
if not cfg.openai_api_key:
raise RuntimeError("OPENAI_API_KEY is not set")
client = OpenAI(api_key=cfg.openai_api_key)
resp = client.chat.completions.create(model=model or cfg.cloud_model, messages=messages)
return resp.choices[0].message.content or ""
if backend == "mi50":
# MI50 box runs an OpenAI-compatible llama.cpp server; key is unused.
client = OpenAI(api_key="not-needed", base_url=cfg.mi50_base_url)
resp = client.chat.completions.create(model=model or cfg.mi50_model, messages=messages)
return resp.choices[0].message.content or ""
resp = httpx.post(
f"{cfg.local_base_url}/api/chat",
json={"model": model or cfg.local_model, "messages": messages, "stream": False},
timeout=120,
)
resp.raise_for_status()
return resp.json()["message"]["content"]
def chat_call(
messages: list, backend: Backend = "cloud", model: str | None = None,
tools: list | None = None,
) -> tuple[dict, list | None]:
"""One chat turn that may request tool calls (OpenAI-style backends only).
Returns (assistant_message, tool_calls): `assistant_message` is the raw
message dict to append back to `messages` before any tool results;
`tool_calls` is a list of {id, name, arguments} or None. `local` (Ollama)
has no tool support here, so it just returns plain content.
"""
cfg = load()
if backend in ("cloud", "mi50"):
if backend == "cloud":
if not cfg.openai_api_key:
raise RuntimeError("OPENAI_API_KEY is not set")
client = OpenAI(api_key=cfg.openai_api_key)
mdl = model or cfg.cloud_model
else:
client = OpenAI(api_key="not-needed", base_url=cfg.mi50_base_url)
mdl = model or cfg.mi50_model
kwargs: dict = {"model": mdl, "messages": messages}
if tools:
kwargs["tools"] = tools
msg = client.chat.completions.create(**kwargs).choices[0].message
tcs = None
if getattr(msg, "tool_calls", None):
tcs = [
{"id": tc.id, "name": tc.function.name, "arguments": tc.function.arguments}
for tc in msg.tool_calls
]
return msg.model_dump(), tcs
# local (Ollama): no tool-calling here — return plain content.
return {"role": "assistant", "content": complete(messages, backend=backend, model=model)}, None
def embed(texts: list[str]) -> list[list[float]]:
"""Embed texts using the configured backend (EMBED_BACKEND: "cloud" or "local").
Note: OpenAI and Ollama embeddings live in different vector spaces (and
dimensions). A given database is tied to whichever backend created it — don't
switch EMBED_BACKEND against an existing DB or cosine recall will break.
"""
cfg = load()
if cfg.embed_backend == "local":
resp = httpx.post(
f"{cfg.local_base_url}/api/embed",
json={"model": cfg.local_embed_model, "input": texts},
timeout=120,
)
resp.raise_for_status()
return resp.json()["embeddings"]
if not cfg.openai_api_key:
raise RuntimeError("OPENAI_API_KEY is not set")
client = OpenAI(api_key=cfg.openai_api_key)
resp = client.embeddings.create(model=cfg.embed_model, input=texts)
return [d.embedding for d in resp.data]