v0.6.1 - reinstated UI, relay > cortex pipeline working

This commit is contained in:
serversdwn
2025-12-11 16:28:25 -05:00
parent 30f6c1a3da
commit 6a20d3981f
9 changed files with 1143 additions and 456 deletions

View File

@@ -57,18 +57,17 @@ async def call_llm(
raise RuntimeError(f"Backend '{backend}' missing url/model in env")
# -------------------------------
# Provider: VLLM (your MI50)
# Provider: MI50 (llama.cpp server)
# -------------------------------
if provider == "vllm":
if provider == "mi50":
payload = {
"model": model,
"prompt": prompt,
"max_tokens": max_tokens,
"n_predict": max_tokens,
"temperature": temperature
}
r = requests.post(url, json=payload, timeout=120)
r = requests.post(f"{url}/completion", json=payload, timeout=120)
data = r.json()
return data["choices"][0]["text"]
return data["content"]
# -------------------------------
# Provider: OLLAMA (your 3090)