5f53fb32a4
- Simplified LLM call logic in llm_router.py, removing tool adapter complexity and enhancing error handling. - Added health check endpoint to main.py for system status verification. - Cleaned up router.py by removing unused imports and commented-out code, streamlining the structure. - Updated docker-compose.yml to unify services under a single Lyra container, enhancing deployment simplicity. - Created Dockerfile for unified container setup, including both Relay and Cortex services. - Added QUICKSTART.md for improved onboarding and usage instructions. - Implemented start.sh script to manage service startup and health checks.
165 lines
5.1 KiB
Python
165 lines
5.1 KiB
Python
# llm_router.py
|
|
|
|
import os
|
|
import httpx
|
|
import json
|
|
import logging
|
|
from typing import Optional, List, Dict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ------------------------------------------------------------
|
|
# Backend Configuration
|
|
# ------------------------------------------------------------
|
|
|
|
BACKENDS = {
|
|
"PRIMARY": {
|
|
"provider": os.getenv("LLM_PRIMARY_PROVIDER", "").lower(),
|
|
"url": os.getenv("LLM_PRIMARY_URL", ""),
|
|
"model": os.getenv("LLM_PRIMARY_MODEL", "")
|
|
},
|
|
"SECONDARY": {
|
|
"provider": os.getenv("LLM_SECONDARY_PROVIDER", "").lower(),
|
|
"url": os.getenv("LLM_SECONDARY_URL", ""),
|
|
"model": os.getenv("LLM_SECONDARY_MODEL", "")
|
|
},
|
|
"OPENAI": {
|
|
"provider": os.getenv("LLM_OPENAI_PROVIDER", "").lower(),
|
|
"url": os.getenv("LLM_OPENAI_URL", ""),
|
|
"model": os.getenv("LLM_OPENAI_MODEL", ""),
|
|
"api_key": os.getenv("OPENAI_API_KEY", "")
|
|
},
|
|
"FALLBACK": {
|
|
"provider": os.getenv("LLM_FALLBACK_PROVIDER", "").lower(),
|
|
"url": os.getenv("LLM_FALLBACK_URL", ""),
|
|
"model": os.getenv("LLM_FALLBACK_MODEL", "")
|
|
},
|
|
}
|
|
|
|
DEFAULT_BACKEND = "PRIMARY"
|
|
|
|
http_client = httpx.AsyncClient(timeout=120.0)
|
|
|
|
# ------------------------------------------------------------
|
|
# Public LLM Call
|
|
# ------------------------------------------------------------
|
|
|
|
async def call_llm(
|
|
prompt: Optional[str] = None,
|
|
messages: Optional[List[Dict]] = None,
|
|
backend: Optional[str] = None,
|
|
temperature: float = 0.7,
|
|
max_tokens: int = 512,
|
|
):
|
|
"""
|
|
Simple LLM call.
|
|
Supports: ollama, mi50 (llama.cpp), openai.
|
|
Returns plain text response.
|
|
"""
|
|
|
|
backend = (backend or DEFAULT_BACKEND).upper()
|
|
|
|
if backend not in BACKENDS:
|
|
raise RuntimeError(f"Unknown backend '{backend}'")
|
|
|
|
cfg = BACKENDS[backend]
|
|
provider = cfg["provider"]
|
|
url = cfg["url"]
|
|
model = cfg["model"]
|
|
|
|
if not url or not model:
|
|
raise RuntimeError(f"Backend '{backend}' missing url/model in env")
|
|
|
|
# Convert prompt → messages if needed
|
|
if not messages:
|
|
messages = [{"role": "user", "content": prompt or ""}]
|
|
|
|
# ------------------------------------------------------------
|
|
# OLLAMA
|
|
# ------------------------------------------------------------
|
|
if provider == "ollama":
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": temperature,
|
|
"num_predict": max_tokens
|
|
}
|
|
}
|
|
|
|
try:
|
|
r = await http_client.post(f"{url}/api/chat", json=payload)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
return data["message"]["content"]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Ollama error: {e}")
|
|
raise RuntimeError(f"Ollama API error: {e}")
|
|
|
|
# ------------------------------------------------------------
|
|
# MI50 (llama.cpp server)
|
|
# ------------------------------------------------------------
|
|
if provider == "mi50":
|
|
|
|
# Convert messages to plain prompt
|
|
prompt_parts = []
|
|
for msg in messages:
|
|
role = msg.get("role", "user")
|
|
content = msg.get("content", "")
|
|
prompt_parts.append(f"{role.capitalize()}: {content}")
|
|
full_prompt = "\n".join(prompt_parts) + "\nAssistant:"
|
|
|
|
payload = {
|
|
"prompt": full_prompt,
|
|
"n_predict": max_tokens,
|
|
"temperature": temperature,
|
|
"stop": ["User:", "\nUser:", "Assistant:", "\n\n\n"]
|
|
}
|
|
|
|
try:
|
|
r = await http_client.post(f"{url}/completion", json=payload)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
return data.get("content", "")
|
|
|
|
except Exception as e:
|
|
logger.error(f"MI50 error: {e}")
|
|
raise RuntimeError(f"MI50 API error: {e}")
|
|
|
|
# ------------------------------------------------------------
|
|
# OPENAI
|
|
# ------------------------------------------------------------
|
|
if provider == "openai":
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {cfg.get('api_key')}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"temperature": temperature,
|
|
"max_tokens": max_tokens,
|
|
}
|
|
|
|
try:
|
|
r = await http_client.post(
|
|
f"{url}/chat/completions",
|
|
json=payload,
|
|
headers=headers
|
|
)
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
return data["choices"][0]["message"]["content"]
|
|
|
|
except Exception as e:
|
|
logger.error(f"OpenAI error: {e}")
|
|
raise RuntimeError(f"OpenAI API error: {e}")
|
|
|
|
# ------------------------------------------------------------
|
|
# Unknown Provider
|
|
# ------------------------------------------------------------
|
|
raise RuntimeError(f"Provider '{provider}' not implemented.") |