simple context added to standard mode

This commit is contained in:
serversdwn
2025-12-21 13:01:00 -05:00
parent d09425c37b
commit ceb60119fb
3 changed files with 79 additions and 26 deletions

View File

@@ -44,11 +44,22 @@ http_client = httpx.AsyncClient(timeout=120.0)
# Public call
# ------------------------------------------------------------
async def call_llm(
prompt: str,
prompt: str = None,
messages: list = None,
backend: str | None = None,
temperature: float = 0.7,
max_tokens: int = 512,
):
"""
Call an LLM backend.
Args:
prompt: String prompt (for completion-style APIs like mi50)
messages: List of message dicts (for chat-style APIs like Ollama/OpenAI)
backend: Which backend to use (PRIMARY, SECONDARY, OPENAI, etc.)
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
"""
backend = (backend or DEFAULT_BACKEND).upper()
if backend not in BACKENDS:
@@ -69,7 +80,8 @@ async def call_llm(
payload = {
"prompt": prompt,
"n_predict": max_tokens,
"temperature": temperature
"temperature": temperature,
"stop": ["User:", "\nUser:", "Assistant:", "\n\n\n"]
}
try:
r = await http_client.post(f"{url}/completion", json=payload)
@@ -90,12 +102,20 @@ async def call_llm(
# Provider: OLLAMA (your 3090)
# -------------------------------
if provider == "ollama":
# Use messages array if provided, otherwise convert prompt to single user message
if messages:
chat_messages = messages
else:
chat_messages = [{"role": "user", "content": prompt}]
payload = {
"model": model,
"messages": [
{"role": "user", "content": prompt}
],
"stream": False
"messages": chat_messages,
"stream": False,
"options": {
"temperature": temperature,
"num_predict": max_tokens
}
}
try:
r = await http_client.post(f"{url}/api/chat", json=payload)