v0.6.1 - reinstated UI, relay > cortex pipeline working

2025-12-11 16:28:25 -05:00
parent 30f6c1a3da
commit 6a20d3981f
9 changed files with 1143 additions and 456 deletions
--- a/cortex/llm/llm_router.py
+++ b/cortex/llm/llm_router.py
@@ -57,18 +57,17 @@ async def call_llm(
        raise RuntimeError(f"Backend '{backend}' missing url/model in env")

    # -------------------------------
-    # Provider: VLLM (your MI50)
+    # Provider: MI50 (llama.cpp server)
    # -------------------------------
-    if provider == "vllm":
+    if provider == "mi50":
        payload = {
-            "model": model,
            "prompt": prompt,
-            "max_tokens": max_tokens,
+            "n_predict": max_tokens,
            "temperature": temperature
        }
-        r = requests.post(url, json=payload, timeout=120)
+        r = requests.post(f"{url}/completion", json=payload, timeout=120)
        data = r.json()
-        return data["choices"][0]["text"]
+        return data["content"]

    # -------------------------------
    # Provider: OLLAMA (your 3090)