Files
project-lyra/cortex/llm/llm_router.py
T
serversdown 5f53fb32a4 feat: Refactor LLM router and integrate health check endpoint
- Simplified LLM call logic in llm_router.py, removing tool adapter complexity and enhancing error handling.
- Added health check endpoint to main.py for system status verification.
- Cleaned up router.py by removing unused imports and commented-out code, streamlining the structure.
- Updated docker-compose.yml to unify services under a single Lyra container, enhancing deployment simplicity.
- Created Dockerfile for unified container setup, including both Relay and Cortex services.
- Added QUICKSTART.md for improved onboarding and usage instructions.
- Implemented start.sh script to manage service startup and health checks.
2026-05-29 18:20:56 -04:00

165 lines
5.1 KiB
Python

# llm_router.py
import os
import httpx
import json
import logging
from typing import Optional, List, Dict
logger = logging.getLogger(__name__)
# ------------------------------------------------------------
# Backend Configuration
# ------------------------------------------------------------
BACKENDS = {
"PRIMARY": {
"provider": os.getenv("LLM_PRIMARY_PROVIDER", "").lower(),
"url": os.getenv("LLM_PRIMARY_URL", ""),
"model": os.getenv("LLM_PRIMARY_MODEL", "")
},
"SECONDARY": {
"provider": os.getenv("LLM_SECONDARY_PROVIDER", "").lower(),
"url": os.getenv("LLM_SECONDARY_URL", ""),
"model": os.getenv("LLM_SECONDARY_MODEL", "")
},
"OPENAI": {
"provider": os.getenv("LLM_OPENAI_PROVIDER", "").lower(),
"url": os.getenv("LLM_OPENAI_URL", ""),
"model": os.getenv("LLM_OPENAI_MODEL", ""),
"api_key": os.getenv("OPENAI_API_KEY", "")
},
"FALLBACK": {
"provider": os.getenv("LLM_FALLBACK_PROVIDER", "").lower(),
"url": os.getenv("LLM_FALLBACK_URL", ""),
"model": os.getenv("LLM_FALLBACK_MODEL", "")
},
}
DEFAULT_BACKEND = "PRIMARY"
http_client = httpx.AsyncClient(timeout=120.0)
# ------------------------------------------------------------
# Public LLM Call
# ------------------------------------------------------------
async def call_llm(
prompt: Optional[str] = None,
messages: Optional[List[Dict]] = None,
backend: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 512,
):
"""
Simple LLM call.
Supports: ollama, mi50 (llama.cpp), openai.
Returns plain text response.
"""
backend = (backend or DEFAULT_BACKEND).upper()
if backend not in BACKENDS:
raise RuntimeError(f"Unknown backend '{backend}'")
cfg = BACKENDS[backend]
provider = cfg["provider"]
url = cfg["url"]
model = cfg["model"]
if not url or not model:
raise RuntimeError(f"Backend '{backend}' missing url/model in env")
# Convert prompt → messages if needed
if not messages:
messages = [{"role": "user", "content": prompt or ""}]
# ------------------------------------------------------------
# OLLAMA
# ------------------------------------------------------------
if provider == "ollama":
payload = {
"model": model,
"messages": messages,
"stream": False,
"options": {
"temperature": temperature,
"num_predict": max_tokens
}
}
try:
r = await http_client.post(f"{url}/api/chat", json=payload)
r.raise_for_status()
data = r.json()
return data["message"]["content"]
except Exception as e:
logger.error(f"Ollama error: {e}")
raise RuntimeError(f"Ollama API error: {e}")
# ------------------------------------------------------------
# MI50 (llama.cpp server)
# ------------------------------------------------------------
if provider == "mi50":
# Convert messages to plain prompt
prompt_parts = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
prompt_parts.append(f"{role.capitalize()}: {content}")
full_prompt = "\n".join(prompt_parts) + "\nAssistant:"
payload = {
"prompt": full_prompt,
"n_predict": max_tokens,
"temperature": temperature,
"stop": ["User:", "\nUser:", "Assistant:", "\n\n\n"]
}
try:
r = await http_client.post(f"{url}/completion", json=payload)
r.raise_for_status()
data = r.json()
return data.get("content", "")
except Exception as e:
logger.error(f"MI50 error: {e}")
raise RuntimeError(f"MI50 API error: {e}")
# ------------------------------------------------------------
# OPENAI
# ------------------------------------------------------------
if provider == "openai":
headers = {
"Authorization": f"Bearer {cfg.get('api_key')}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
try:
r = await http_client.post(
f"{url}/chat/completions",
json=payload,
headers=headers
)
r.raise_for_status()
data = r.json()
return data["choices"][0]["message"]["content"]
except Exception as e:
logger.error(f"OpenAI error: {e}")
raise RuntimeError(f"OpenAI API error: {e}")
# ------------------------------------------------------------
# Unknown Provider
# ------------------------------------------------------------
raise RuntimeError(f"Provider '{provider}' not implemented.")