v0.5.2 - fixed: llm router async, relay-UI mismatch, intake summarization failure, among others.
Memory relevance thresh. increased.
This commit is contained in:
@@ -282,11 +282,17 @@ JSON only. No text outside JSON.
|
||||
try:
|
||||
llm_response = await call_llm(
|
||||
prompt,
|
||||
backend=INTAKE_LLM,
|
||||
temperature=0.2
|
||||
)
|
||||
|
||||
print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
|
||||
print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
|
||||
|
||||
# LLM should return JSON, parse it
|
||||
if not llm_response or not llm_response.strip():
|
||||
raise ValueError("Empty response from LLM")
|
||||
|
||||
summary = json.loads(llm_response)
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
# llm_router.py
|
||||
import os
|
||||
import requests
|
||||
import httpx
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Load backend registry from root .env
|
||||
@@ -33,6 +36,9 @@ BACKENDS = {
|
||||
|
||||
DEFAULT_BACKEND = "PRIMARY"
|
||||
|
||||
# Reusable async HTTP client
|
||||
http_client = httpx.AsyncClient(timeout=120.0)
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Public call
|
||||
@@ -65,9 +71,20 @@ async def call_llm(
|
||||
"n_predict": max_tokens,
|
||||
"temperature": temperature
|
||||
}
|
||||
r = requests.post(f"{url}/completion", json=payload, timeout=120)
|
||||
data = r.json()
|
||||
return data["content"]
|
||||
try:
|
||||
r = await http_client.post(f"{url}/completion", json=payload)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data.get("content", "")
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"HTTP error calling mi50: {type(e).__name__}: {str(e)}")
|
||||
raise RuntimeError(f"LLM API error (mi50): {type(e).__name__}: {str(e)}")
|
||||
except (KeyError, json.JSONDecodeError) as e:
|
||||
logger.error(f"Response parsing error from mi50: {e}")
|
||||
raise RuntimeError(f"Invalid response format (mi50): {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error calling mi50: {type(e).__name__}: {str(e)}")
|
||||
raise RuntimeError(f"Unexpected error (mi50): {type(e).__name__}: {str(e)}")
|
||||
|
||||
# -------------------------------
|
||||
# Provider: OLLAMA (your 3090)
|
||||
@@ -78,13 +95,22 @@ async def call_llm(
|
||||
"messages": [
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"stream": False # <-- critical fix
|
||||
"stream": False
|
||||
}
|
||||
|
||||
r = requests.post(f"{url}/api/chat", json=payload, timeout=120)
|
||||
data = r.json()
|
||||
|
||||
return data["message"]["content"]
|
||||
try:
|
||||
r = await http_client.post(f"{url}/api/chat", json=payload)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data["message"]["content"]
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"HTTP error calling ollama: {type(e).__name__}: {str(e)}")
|
||||
raise RuntimeError(f"LLM API error (ollama): {type(e).__name__}: {str(e)}")
|
||||
except (KeyError, json.JSONDecodeError) as e:
|
||||
logger.error(f"Response parsing error from ollama: {e}")
|
||||
raise RuntimeError(f"Invalid response format (ollama): {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error calling ollama: {type(e).__name__}: {str(e)}")
|
||||
raise RuntimeError(f"Unexpected error (ollama): {type(e).__name__}: {str(e)}")
|
||||
|
||||
|
||||
# -------------------------------
|
||||
@@ -103,9 +129,20 @@ async def call_llm(
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
r = requests.post(f"{url}/chat/completions", json=payload, headers=headers, timeout=120)
|
||||
data = r.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
try:
|
||||
r = await http_client.post(f"{url}/chat/completions", json=payload, headers=headers)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"HTTP error calling openai: {type(e).__name__}: {str(e)}")
|
||||
raise RuntimeError(f"LLM API error (openai): {type(e).__name__}: {str(e)}")
|
||||
except (KeyError, json.JSONDecodeError) as e:
|
||||
logger.error(f"Response parsing error from openai: {e}")
|
||||
raise RuntimeError(f"Invalid response format (openai): {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error calling openai: {type(e).__name__}: {str(e)}")
|
||||
raise RuntimeError(f"Unexpected error (openai): {type(e).__name__}: {str(e)}")
|
||||
|
||||
# -------------------------------
|
||||
# Unknown provider
|
||||
|
||||
Reference in New Issue
Block a user