Major rewire, all modules connected. Intake still wonkey

This commit is contained in:
serversdwn
2025-11-28 15:14:47 -05:00
parent 734999e8bb
commit a83405beb1
19 changed files with 10109 additions and 4072 deletions

View File

@@ -1,102 +1,114 @@
# llm_router.py
import os
import requests
import json
# ---------------------------------------------
# Load backend definition from .env
# ---------------------------------------------
# ------------------------------------------------------------
# Load backend registry from root .env
# ------------------------------------------------------------
def load_backend_config(name: str):
"""
Given a backend name like 'PRIMARY' or 'OPENAI',
load the matching provider / url / model from env.
"""
BACKENDS = {
"PRIMARY": {
"provider": os.getenv("LLM_PRIMARY_PROVIDER", "").lower(),
"url": os.getenv("LLM_PRIMARY_URL", ""),
"model": os.getenv("LLM_PRIMARY_MODEL", "")
},
"SECONDARY": {
"provider": os.getenv("LLM_SECONDARY_PROVIDER", "").lower(),
"url": os.getenv("LLM_SECONDARY_URL", ""),
"model": os.getenv("LLM_SECONDARY_MODEL", "")
},
"OPENAI": {
"provider": os.getenv("LLM_OPENAI_PROVIDER", "").lower(),
"url": os.getenv("LLM_OPENAI_URL", ""),
"model": os.getenv("LLM_OPENAI_MODEL", ""),
"api_key": os.getenv("OPENAI_API_KEY", "")
},
"FALLBACK": {
"provider": os.getenv("LLM_FALLBACK_PROVIDER", "").lower(),
"url": os.getenv("LLM_FALLBACK_URL", ""),
"model": os.getenv("LLM_FALLBACK_MODEL", "")
},
}
prefix = f"LLM_{name.upper()}"
provider = os.getenv(f"{prefix}_PROVIDER")
url = os.getenv(f"{prefix}_URL")
model = os.getenv(f"{prefix}_MODEL")
if not provider or not url or not model:
raise RuntimeError(
f"Backend '{name}' is missing configuration. "
f"Expected {prefix}_PROVIDER / URL / MODEL in .env"
)
return provider, url.rstrip("/"), model
DEFAULT_BACKEND = "PRIMARY"
# ---------------------------------------------
# Core call_llm() — fail hard, no fallback
# ---------------------------------------------
# ------------------------------------------------------------
# Public call
# ------------------------------------------------------------
async def call_llm(
prompt: str,
backend: str | None = None,
temperature: float = 0.7,
max_tokens: int = 512,
):
backend = (backend or DEFAULT_BACKEND).upper()
def call_llm(prompt: str, backend_env_var: str):
"""
Example:
call_llm(prompt, backend_env_var="CORTEX_LLM")
if backend not in BACKENDS:
raise RuntimeError(f"Unknown backend '{backend}'")
backend_env_var should contain one of:
PRIMARY, SECONDARY, OPENAI, FALLBACK, etc
"""
cfg = BACKENDS[backend]
provider = cfg["provider"]
url = cfg["url"]
model = cfg["model"]
backend_name = os.getenv(backend_env_var)
if not backend_name:
raise RuntimeError(f"{backend_env_var} is not set in .env")
provider, base_url, model = load_backend_config(backend_name)
# ---------------------------------------------
# Provider-specific behavior
# ---------------------------------------------
if not url or not model:
raise RuntimeError(f"Backend '{backend}' missing url/model in env")
# -------------------------------
# Provider: VLLM (your MI50)
# -------------------------------
if provider == "vllm":
# vLLM OpenAI-compatible API
response = requests.post(
f"{base_url}/v1/completions",
json={
"model": model,
"prompt": prompt,
"max_tokens": 1024,
"temperature": float(os.getenv("LLM_TEMPERATURE", "0.7"))
},
timeout=30
)
response.raise_for_status()
data = response.json()
payload = {
"model": model,
"prompt": prompt,
"max_tokens": max_tokens,
"temperature": temperature
}
r = requests.post(url, json=payload, timeout=120)
data = r.json()
return data["choices"][0]["text"]
elif provider == "ollama":
response = requests.post(
f"{base_url}/api/chat",
json={
"model": model,
"messages": [{"role": "user", "content": prompt}],
"stream": False
},
timeout=30
)
response.raise_for_status()
data = response.json()
# -------------------------------
# Provider: OLLAMA (your 3090)
# -------------------------------
if provider == "ollama":
payload = {
"model": model,
"messages": [
{"role": "user", "content": prompt}
],
"stream": False # <-- critical fix
}
r = requests.post(f"{url}/api/chat", json=payload, timeout=120)
data = r.json()
return data["message"]["content"]
elif provider == "openai":
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError("OPENAI_API_KEY missing but provider=openai was selected")
response = requests.post(
f"{base_url}/chat/completions",
headers={"Authorization": f"Bearer {api_key}"},
json={
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": float(os.getenv("LLM_TEMPERATURE", "0.7"))
},
timeout=30
)
response.raise_for_status()
data = response.json()
# -------------------------------
# Provider: OPENAI
# -------------------------------
if provider == "openai":
headers = {
"Authorization": f"Bearer {cfg['api_key']}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": [
{"role": "user", "content": prompt}
],
"temperature": temperature,
"max_tokens": max_tokens,
}
r = requests.post(f"{url}/chat/completions", json=payload, headers=headers, timeout=120)
data = r.json()
return data["choices"][0]["message"]["content"]
else:
raise RuntimeError(f"Unknown LLM provider: {provider}")
# -------------------------------
# Unknown provider
# -------------------------------
raise RuntimeError(f"Provider '{provider}' not implemented.")