project-lyra/lyra/llm.py

"""LLM router: local (Ollama) chat, cloud (OpenAI) chat + embeddings."""
from __future__ import annotations

from typing import Literal, TypedDict

import httpx
from openai import OpenAI

from lyra.config import load


class Message(TypedDict):
    role: Literal["system", "user", "assistant"]
    content: str


Backend = Literal["local", "cloud"]


def complete(messages: list[Message], backend: Backend = "local") -> str:
    cfg = load()
    if backend == "cloud":
        if not cfg.openai_api_key:
            raise RuntimeError("OPENAI_API_KEY is not set")
        client = OpenAI(api_key=cfg.openai_api_key)
        resp = client.chat.completions.create(model=cfg.cloud_model, messages=messages)
        return resp.choices[0].message.content or ""

    resp = httpx.post(
        f"{cfg.local_base_url}/api/chat",
        json={"model": cfg.local_model, "messages": messages, "stream": False},
        timeout=120,
    )
    resp.raise_for_status()
    return resp.json()["message"]["content"]


def embed(texts: list[str]) -> list[list[float]]:
    """Embed texts using the configured backend (EMBED_BACKEND: "cloud" or "local").

    Note: OpenAI and Ollama embeddings live in different vector spaces (and
    dimensions). A given database is tied to whichever backend created it — don't
    switch EMBED_BACKEND against an existing DB or cosine recall will break.
    """
    cfg = load()
    if cfg.embed_backend == "local":
        resp = httpx.post(
            f"{cfg.local_base_url}/api/embed",
            json={"model": cfg.local_embed_model, "input": texts},
            timeout=120,
        )
        resp.raise_for_status()
        return resp.json()["embeddings"]

    if not cfg.openai_api_key:
        raise RuntimeError("OPENAI_API_KEY is not set")
    client = OpenAI(api_key=cfg.openai_api_key)
    resp = client.embeddings.create(model=cfg.embed_model, input=texts)
    return [d.embedding for d in resp.data]