Initial clean commit - unified Lyra stack

This commit is contained in:
serversdwn
2025-11-16 03:17:32 -05:00
commit 94fb091e59
270 changed files with 74200 additions and 0 deletions

137
cortex/llm_router.py Normal file
View File

@@ -0,0 +1,137 @@
import os
import httpx
# ============================================================
# Backend config lookup
# ============================================================
def get_backend_config(name: str):
"""
Reads provider/URL/model for a backend.
Example env:
LLM_PRIMARY_PROVIDER=vllm
LLM_PRIMARY_URL=http://10.0.0.43:8000
LLM_PRIMARY_MODEL=/model
"""
key = name.upper()
provider = os.getenv(f"LLM_{key}_PROVIDER", "vllm").lower()
base_url = os.getenv(f"LLM_{key}_URL", "").rstrip("/")
model = os.getenv(f"LLM_{key}_MODEL", "/model")
if not base_url:
raise RuntimeError(f"Backend {name} has no URL configured.")
return provider, base_url, model
# ============================================================
# Build the final API URL
# ============================================================
def build_url(provider: str, base_url: str):
"""
Provider → correct endpoint.
"""
if provider == "vllm":
return f"{base_url}/v1/completions"
if provider == "openai_completions":
return f"{base_url}/v1/completions"
if provider == "openai_chat":
return f"{base_url}/v1/chat/completions"
if provider == "ollama":
return f"{base_url}/api/generate"
raise RuntimeError(f"Unknown provider: {provider}")
# ============================================================
# Build the payload depending on provider
# ============================================================
def build_payload(provider: str, model: str, prompt: str, temperature: float):
if provider == "vllm":
return {
"model": model,
"prompt": prompt,
"max_tokens": 512,
"temperature": temperature
}
if provider == "openai_completions":
return {
"model": model,
"prompt": prompt,
"max_tokens": 512,
"temperature": temperature
}
if provider == "openai_chat":
return {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": temperature
}
if provider == "ollama":
return {
"model": model,
"prompt": prompt,
"stream": False
}
raise RuntimeError(f"Unknown provider: {provider}")
# ============================================================
# Unified LLM call
# ============================================================
async def call_llm(prompt: str,
backend: str = "primary",
temperature: float = 0.7):
provider, base_url, model = get_backend_config(backend)
url = build_url(provider, base_url)
payload = build_payload(provider, model, prompt, temperature)
headers = {"Content-Type": "application/json"}
# Cloud auth (OpenAI)
if provider.startswith("openai"):
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError("OPENAI_API_KEY missing")
headers["Authorization"] = f"Bearer {api_key}"
async with httpx.AsyncClient() as client:
try:
resp = await client.post(url, json=payload, headers=headers, timeout=45)
resp.raise_for_status()
data = resp.json()
except Exception as e:
return f"[LLM-Error] {e}"
# =======================================================
# Unified output extraction
# =======================================================
# vLLM + OpenAI completions
if provider in ["vllm", "openai_completions"]:
return (
data["choices"][0].get("text") or
data["choices"][0].get("message", {}).get("content", "")
).strip()
# OpenAI chat
if provider == "openai_chat":
return data["choices"][0]["message"]["content"].strip()
# Ollama
if provider == "ollama":
# Ollama returns: {"model": "...", "created_at": ..., "response": "..."}
return data.get("response", "").strip()
return str(data).strip()