v0.6.1 - reinstated UI, relay > cortex pipeline working
This commit is contained in:
@@ -57,18 +57,17 @@ async def call_llm(
|
||||
raise RuntimeError(f"Backend '{backend}' missing url/model in env")
|
||||
|
||||
# -------------------------------
|
||||
# Provider: VLLM (your MI50)
|
||||
# Provider: MI50 (llama.cpp server)
|
||||
# -------------------------------
|
||||
if provider == "vllm":
|
||||
if provider == "mi50":
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"max_tokens": max_tokens,
|
||||
"n_predict": max_tokens,
|
||||
"temperature": temperature
|
||||
}
|
||||
r = requests.post(url, json=payload, timeout=120)
|
||||
r = requests.post(f"{url}/completion", json=payload, timeout=120)
|
||||
data = r.json()
|
||||
return data["choices"][0]["text"]
|
||||
return data["content"]
|
||||
|
||||
# -------------------------------
|
||||
# Provider: OLLAMA (your 3090)
|
||||
|
||||
Reference in New Issue
Block a user