v0.5.2 - fixed: llm router async, relay-UI mismatch, intake summarization failure, among others.

Memory relevance thresh. increased.
This commit is contained in:
serversdwn
2025-12-12 02:58:23 -05:00
parent 6a20d3981f
commit fe86759cfd
4 changed files with 106 additions and 14 deletions

View File

@@ -1,7 +1,10 @@
# llm_router.py
import os
import requests
import httpx
import json
import logging
logger = logging.getLogger(__name__)
# ------------------------------------------------------------
# Load backend registry from root .env
@@ -33,6 +36,9 @@ BACKENDS = {
DEFAULT_BACKEND = "PRIMARY"
# Reusable async HTTP client
http_client = httpx.AsyncClient(timeout=120.0)
# ------------------------------------------------------------
# Public call
@@ -65,9 +71,20 @@ async def call_llm(
"n_predict": max_tokens,
"temperature": temperature
}
r = requests.post(f"{url}/completion", json=payload, timeout=120)
data = r.json()
return data["content"]
try:
r = await http_client.post(f"{url}/completion", json=payload)
r.raise_for_status()
data = r.json()
return data.get("content", "")
except httpx.HTTPError as e:
logger.error(f"HTTP error calling mi50: {type(e).__name__}: {str(e)}")
raise RuntimeError(f"LLM API error (mi50): {type(e).__name__}: {str(e)}")
except (KeyError, json.JSONDecodeError) as e:
logger.error(f"Response parsing error from mi50: {e}")
raise RuntimeError(f"Invalid response format (mi50): {e}")
except Exception as e:
logger.error(f"Unexpected error calling mi50: {type(e).__name__}: {str(e)}")
raise RuntimeError(f"Unexpected error (mi50): {type(e).__name__}: {str(e)}")
# -------------------------------
# Provider: OLLAMA (your 3090)
@@ -78,13 +95,22 @@ async def call_llm(
"messages": [
{"role": "user", "content": prompt}
],
"stream": False # <-- critical fix
"stream": False
}
r = requests.post(f"{url}/api/chat", json=payload, timeout=120)
data = r.json()
return data["message"]["content"]
try:
r = await http_client.post(f"{url}/api/chat", json=payload)
r.raise_for_status()
data = r.json()
return data["message"]["content"]
except httpx.HTTPError as e:
logger.error(f"HTTP error calling ollama: {type(e).__name__}: {str(e)}")
raise RuntimeError(f"LLM API error (ollama): {type(e).__name__}: {str(e)}")
except (KeyError, json.JSONDecodeError) as e:
logger.error(f"Response parsing error from ollama: {e}")
raise RuntimeError(f"Invalid response format (ollama): {e}")
except Exception as e:
logger.error(f"Unexpected error calling ollama: {type(e).__name__}: {str(e)}")
raise RuntimeError(f"Unexpected error (ollama): {type(e).__name__}: {str(e)}")
# -------------------------------
@@ -103,9 +129,20 @@ async def call_llm(
"temperature": temperature,
"max_tokens": max_tokens,
}
r = requests.post(f"{url}/chat/completions", json=payload, headers=headers, timeout=120)
data = r.json()
return data["choices"][0]["message"]["content"]
try:
r = await http_client.post(f"{url}/chat/completions", json=payload, headers=headers)
r.raise_for_status()
data = r.json()
return data["choices"][0]["message"]["content"]
except httpx.HTTPError as e:
logger.error(f"HTTP error calling openai: {type(e).__name__}: {str(e)}")
raise RuntimeError(f"LLM API error (openai): {type(e).__name__}: {str(e)}")
except (KeyError, json.JSONDecodeError) as e:
logger.error(f"Response parsing error from openai: {e}")
raise RuntimeError(f"Invalid response format (openai): {e}")
except Exception as e:
logger.error(f"Unexpected error calling openai: {type(e).__name__}: {str(e)}")
raise RuntimeError(f"Unexpected error (openai): {type(e).__name__}: {str(e)}")
# -------------------------------
# Unknown provider