import os import httpx # ============================================================ # Backend config lookup # ============================================================ def get_backend_config(name: str): """ Reads provider/URL/model for a backend. Example env: LLM_PRIMARY_PROVIDER=vllm LLM_PRIMARY_URL=http://10.0.0.43:8000 LLM_PRIMARY_MODEL=/model """ key = name.upper() provider = os.getenv(f"LLM_{key}_PROVIDER", "vllm").lower() base_url = os.getenv(f"LLM_{key}_URL", "").rstrip("/") model = os.getenv(f"LLM_{key}_MODEL", "/model") if not base_url: raise RuntimeError(f"Backend {name} has no URL configured.") return provider, base_url, model # ============================================================ # Build the final API URL # ============================================================ def build_url(provider: str, base_url: str): """ Provider → correct endpoint. """ if provider == "vllm": return f"{base_url}/v1/completions" if provider == "openai_completions": return f"{base_url}/v1/completions" if provider == "openai_chat": return f"{base_url}/v1/chat/completions" if provider == "ollama": return f"{base_url}/api/generate" raise RuntimeError(f"Unknown provider: {provider}") # ============================================================ # Build the payload depending on provider # ============================================================ def build_payload(provider: str, model: str, prompt: str, temperature: float): if provider == "vllm": return { "model": model, "prompt": prompt, "max_tokens": 512, "temperature": temperature } if provider == "openai_completions": return { "model": model, "prompt": prompt, "max_tokens": 512, "temperature": temperature } if provider == "openai_chat": return { "model": model, "messages": [{"role": "user", "content": prompt}], "temperature": temperature } if provider == "ollama": return { "model": model, "prompt": prompt, "stream": False } raise RuntimeError(f"Unknown provider: {provider}") # ============================================================ # Unified LLM call # ============================================================ async def call_llm(prompt: str, backend: str = "primary", temperature: float = 0.7): provider, base_url, model = get_backend_config(backend) url = build_url(provider, base_url) payload = build_payload(provider, model, prompt, temperature) headers = {"Content-Type": "application/json"} # Cloud auth (OpenAI) if provider.startswith("openai"): api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise RuntimeError("OPENAI_API_KEY missing") headers["Authorization"] = f"Bearer {api_key}" async with httpx.AsyncClient() as client: try: resp = await client.post(url, json=payload, headers=headers, timeout=45) resp.raise_for_status() data = resp.json() except Exception as e: return f"[LLM-Error] {e}" # ======================================================= # Unified output extraction # ======================================================= # vLLM + OpenAI completions if provider in ["vllm", "openai_completions"]: return ( data["choices"][0].get("text") or data["choices"][0].get("message", {}).get("content", "") ).strip() # OpenAI chat if provider == "openai_chat": return data["choices"][0]["message"]["content"].strip() # Ollama if provider == "ollama": # Ollama returns: {"model": "...", "created_at": ..., "response": "..."} return data.get("response", "").strip() return str(data).strip()