# llm_router.py import os import httpx import json import logging from typing import Optional, List, Dict logger = logging.getLogger(__name__) # ------------------------------------------------------------ # Backend Configuration # ------------------------------------------------------------ BACKENDS = { "PRIMARY": { "provider": os.getenv("LLM_PRIMARY_PROVIDER", "").lower(), "url": os.getenv("LLM_PRIMARY_URL", ""), "model": os.getenv("LLM_PRIMARY_MODEL", "") }, "SECONDARY": { "provider": os.getenv("LLM_SECONDARY_PROVIDER", "").lower(), "url": os.getenv("LLM_SECONDARY_URL", ""), "model": os.getenv("LLM_SECONDARY_MODEL", "") }, "OPENAI": { "provider": os.getenv("LLM_OPENAI_PROVIDER", "").lower(), "url": os.getenv("LLM_OPENAI_URL", ""), "model": os.getenv("LLM_OPENAI_MODEL", ""), "api_key": os.getenv("OPENAI_API_KEY", "") }, "FALLBACK": { "provider": os.getenv("LLM_FALLBACK_PROVIDER", "").lower(), "url": os.getenv("LLM_FALLBACK_URL", ""), "model": os.getenv("LLM_FALLBACK_MODEL", "") }, } DEFAULT_BACKEND = "PRIMARY" http_client = httpx.AsyncClient(timeout=120.0) # ------------------------------------------------------------ # Public LLM Call # ------------------------------------------------------------ async def call_llm( prompt: Optional[str] = None, messages: Optional[List[Dict]] = None, backend: Optional[str] = None, temperature: float = 0.7, max_tokens: int = 512, ): """ Simple LLM call. Supports: ollama, mi50 (llama.cpp), openai. Returns plain text response. """ backend = (backend or DEFAULT_BACKEND).upper() if backend not in BACKENDS: raise RuntimeError(f"Unknown backend '{backend}'") cfg = BACKENDS[backend] provider = cfg["provider"] url = cfg["url"] model = cfg["model"] if not url or not model: raise RuntimeError(f"Backend '{backend}' missing url/model in env") # Convert prompt → messages if needed if not messages: messages = [{"role": "user", "content": prompt or ""}] # ------------------------------------------------------------ # OLLAMA # ------------------------------------------------------------ if provider == "ollama": payload = { "model": model, "messages": messages, "stream": False, "options": { "temperature": temperature, "num_predict": max_tokens } } try: r = await http_client.post(f"{url}/api/chat", json=payload) r.raise_for_status() data = r.json() return data["message"]["content"] except Exception as e: logger.error(f"Ollama error: {e}") raise RuntimeError(f"Ollama API error: {e}") # ------------------------------------------------------------ # MI50 (llama.cpp server) # ------------------------------------------------------------ if provider == "mi50": # Convert messages to plain prompt prompt_parts = [] for msg in messages: role = msg.get("role", "user") content = msg.get("content", "") prompt_parts.append(f"{role.capitalize()}: {content}") full_prompt = "\n".join(prompt_parts) + "\nAssistant:" payload = { "prompt": full_prompt, "n_predict": max_tokens, "temperature": temperature, "stop": ["User:", "\nUser:", "Assistant:", "\n\n\n"] } try: r = await http_client.post(f"{url}/completion", json=payload) r.raise_for_status() data = r.json() return data.get("content", "") except Exception as e: logger.error(f"MI50 error: {e}") raise RuntimeError(f"MI50 API error: {e}") # ------------------------------------------------------------ # OPENAI # ------------------------------------------------------------ if provider == "openai": headers = { "Authorization": f"Bearer {cfg.get('api_key')}", "Content-Type": "application/json" } payload = { "model": model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, } try: r = await http_client.post( f"{url}/chat/completions", json=payload, headers=headers ) r.raise_for_status() data = r.json() return data["choices"][0]["message"]["content"] except Exception as e: logger.error(f"OpenAI error: {e}") raise RuntimeError(f"OpenAI API error: {e}") # ------------------------------------------------------------ # Unknown Provider # ------------------------------------------------------------ raise RuntimeError(f"Provider '{provider}' not implemented.")