feat: Implement Trillium notes executor for searching and creating notes via ETAPI

- Added `trillium.py` for searching and creating notes with Trillium's ETAPI. - Implemented `search_notes` and `create_note` functions with appropriate error handling and validation. feat: Add web search functionality using DuckDuckGo - Introduced `web_search.py` for performing web searches without API keys. - Implemented `search_web` function with result handling and validation. feat: Create provider-agnostic function caller for iterative tool calling - Developed `function_caller.py` to manage LLM interactions with tools. - Implemented iterative calling logic with error handling and tool execution. feat: Establish a tool registry for managing available tools - Created `registry.py` to define and manage tool availability and execution. - Integrated feature flags for enabling/disabling tools based on environment variables. feat: Implement event streaming for tool calling processes - Added `stream_events.py` to manage Server-Sent Events (SSE) for tool calling. - Enabled real-time updates during tool execution for enhanced user experience. test: Add tests for tool calling system components - Created `test_tools.py` to validate functionality of code execution, web search, and tool registry. - Implemented asynchronous tests to ensure proper execution and result handling. chore: Add Dockerfile for sandbox environment setup - Created `Dockerfile` to set up a Python environment with necessary dependencies for code execution. chore: Add debug regex script for testing XML parsing - Introduced `debug_regex.py` to validate regex patterns against XML tool calls. chore: Add HTML template for displaying thinking stream events - Created `test_thinking_stream.html` for visualizing tool calling events in a user-friendly format. test: Add tests for OllamaAdapter XML parsing - Developed `test_ollama_parser.py` to validate XML parsing with various test cases, including malformed XML.
2025-12-26 03:49:20 -05:00
parent f1471cde84
commit 64429b19e6
37 changed files with 3238 additions and 23 deletions
--- a/cortex/llm/llm_router.py
+++ b/cortex/llm/llm_router.py
@@ -3,6 +3,8 @@ import os
 import httpx
 import json
 import logging
+from typing import Optional, List, Dict
+from autonomy.tools.adapters import OpenAIAdapter, OllamaAdapter, LlamaCppAdapter

 logger = logging.getLogger(__name__)

@@ -39,6 +41,16 @@ DEFAULT_BACKEND = "PRIMARY"
 # Reusable async HTTP client
 http_client = httpx.AsyncClient(timeout=120.0)

+# Tool adapters for each backend
+TOOL_ADAPTERS = {
+    "OPENAI": OpenAIAdapter(),
+    "OLLAMA": OllamaAdapter(),
+    "MI50": LlamaCppAdapter(),  # MI50 uses llama.cpp
+    "PRIMARY": None,  # Determined at runtime
+    "SECONDARY": None,  # Determined at runtime
+    "FALLBACK": None,  # Determined at runtime
+}
+

 # ------------------------------------------------------------
 # Public call
@@ -49,9 +61,12 @@ async def call_llm(
    backend: str | None = None,
    temperature: float = 0.7,
    max_tokens: int = 512,
+    tools: Optional[List[Dict]] = None,
+    tool_choice: Optional[str] = None,
+    return_adapter_response: bool = False,
 ):
    """
-    Call an LLM backend.
+    Call an LLM backend with optional tool calling support.

    Args:
        prompt: String prompt (for completion-style APIs like mi50)
@@ -59,6 +74,13 @@ async def call_llm(
        backend: Which backend to use (PRIMARY, SECONDARY, OPENAI, etc.)
        temperature: Sampling temperature
        max_tokens: Maximum tokens to generate
+        tools: List of Lyra tool definitions (provider-agnostic)
+        tool_choice: How to use tools ("auto", "required", "none")
+        return_adapter_response: If True, return dict with content and tool_calls
+
+    Returns:
+        str (default) or dict (if return_adapter_response=True):
+            {"content": str, "tool_calls": [...] or None}
    """
    backend = (backend or DEFAULT_BACKEND).upper()

@@ -73,10 +95,54 @@ async def call_llm(
    if not url or not model:
        raise RuntimeError(f"Backend '{backend}' missing url/model in env")

+    # If tools are requested, use adapter to prepare request
+    if tools:
+        # Get adapter for this backend
+        adapter = TOOL_ADAPTERS.get(backend)
+
+        # For PRIMARY/SECONDARY/FALLBACK, determine adapter based on provider
+        if adapter is None and backend in ["PRIMARY", "SECONDARY", "FALLBACK"]:
+            if provider == "openai":
+                adapter = TOOL_ADAPTERS["OPENAI"]
+            elif provider == "ollama":
+                adapter = TOOL_ADAPTERS["OLLAMA"]
+            elif provider == "mi50":
+                adapter = TOOL_ADAPTERS["MI50"]
+
+        if adapter:
+            # Use messages array if provided, otherwise convert prompt to messages
+            if not messages:
+                messages = [{"role": "user", "content": prompt}]
+
+            # Prepare request through adapter
+            adapted_request = await adapter.prepare_request(messages, tools, tool_choice)
+            messages = adapted_request["messages"]
+
+            # Extract tools in provider format if present
+            provider_tools = adapted_request.get("tools")
+            provider_tool_choice = adapted_request.get("tool_choice")
+        else:
+            logger.warning(f"No adapter available for backend {backend}, ignoring tools")
+            provider_tools = None
+            provider_tool_choice = None
+    else:
+        provider_tools = None
+        provider_tool_choice = None
+
    # -------------------------------
    # Provider: MI50 (llama.cpp server)
    # -------------------------------
    if provider == "mi50":
+        # If tools requested, convert messages to prompt with tool instructions
+        if messages and tools:
+            # Combine messages into a prompt
+            prompt_parts = []
+            for msg in messages:
+                role = msg.get("role", "user")
+                content = msg.get("content", "")
+                prompt_parts.append(f"{role.capitalize()}: {content}")
+            prompt = "\n".join(prompt_parts) + "\nAssistant:"
+
        payload = {
            "prompt": prompt,
            "n_predict": max_tokens,
@@ -87,7 +153,15 @@ async def call_llm(
            r = await http_client.post(f"{url}/completion", json=payload)
            r.raise_for_status()
            data = r.json()
-            return data.get("content", "")
+            response_content = data.get("content", "")
+
+            # If caller wants adapter response with tool calls, parse and return
+            if return_adapter_response and tools:
+                adapter = TOOL_ADAPTERS.get(backend) or TOOL_ADAPTERS["MI50"]
+                return await adapter.parse_response(response_content)
+            else:
+                return response_content
+
        except httpx.HTTPError as e:
            logger.error(f"HTTP error calling mi50: {type(e).__name__}: {str(e)}")
            raise RuntimeError(f"LLM API error (mi50): {type(e).__name__}: {str(e)}")
@@ -101,7 +175,9 @@ async def call_llm(
    # -------------------------------
    # Provider: OLLAMA (your 3090)
    # -------------------------------
+    logger.info(f"🔍 LLM Router: provider={provider}, checking if ollama...")
    if provider == "ollama":
+        logger.info(f"🔍 LLM Router: Matched ollama provider, tools={bool(tools)}, return_adapter_response={return_adapter_response}")
        # Use messages array if provided, otherwise convert prompt to single user message
        if messages:
            chat_messages = messages
@@ -121,7 +197,19 @@ async def call_llm(
            r = await http_client.post(f"{url}/api/chat", json=payload)
            r.raise_for_status()
            data = r.json()
-            return data["message"]["content"]
+            response_content = data["message"]["content"]
+
+            # If caller wants adapter response with tool calls, parse and return
+            if return_adapter_response and tools:
+                logger.info(f"🔍 Ollama: return_adapter_response=True, calling adapter.parse_response")
+                adapter = TOOL_ADAPTERS.get(backend) or TOOL_ADAPTERS["OLLAMA"]
+                logger.info(f"🔍 Ollama: Using adapter {adapter.__class__.__name__}")
+                result = await adapter.parse_response(response_content)
+                logger.info(f"🔍 Ollama: Adapter returned {result}")
+                return result
+            else:
+                return response_content
+
        except httpx.HTTPError as e:
            logger.error(f"HTTP error calling ollama: {type(e).__name__}: {str(e)}")
            raise RuntimeError(f"LLM API error (ollama): {type(e).__name__}: {str(e)}")
@@ -154,11 +242,49 @@ async def call_llm(
            "temperature": temperature,
            "max_tokens": max_tokens,
        }
+
+        # Add tools if available (OpenAI native function calling)
+        if provider_tools:
+            payload["tools"] = provider_tools
+            if provider_tool_choice:
+                payload["tool_choice"] = provider_tool_choice
+
        try:
            r = await http_client.post(f"{url}/chat/completions", json=payload, headers=headers)
            r.raise_for_status()
            data = r.json()
-            return data["choices"][0]["message"]["content"]
+
+            # If caller wants adapter response with tool calls, parse and return
+            if return_adapter_response and tools:
+                # Create mock response object for adapter
+                class MockChoice:
+                    def __init__(self, message_data):
+                        self.message = type('obj', (object,), {})()
+                        self.message.content = message_data.get("content")
+                        # Convert tool_calls dicts to objects
+                        raw_tool_calls = message_data.get("tool_calls")
+                        if raw_tool_calls:
+                            self.message.tool_calls = []
+                            for tc in raw_tool_calls:
+                                tool_call_obj = type('obj', (object,), {})()
+                                tool_call_obj.id = tc.get("id")
+                                tool_call_obj.function = type('obj', (object,), {})()
+                                tool_call_obj.function.name = tc.get("function", {}).get("name")
+                                tool_call_obj.function.arguments = tc.get("function", {}).get("arguments")
+                                self.message.tool_calls.append(tool_call_obj)
+                        else:
+                            self.message.tool_calls = None
+
+                class MockResponse:
+                    def __init__(self, data):
+                        self.choices = [MockChoice(data["choices"][0]["message"])]
+
+                mock_resp = MockResponse(data)
+                adapter = TOOL_ADAPTERS.get(backend) or TOOL_ADAPTERS["OPENAI"]
+                return await adapter.parse_response(mock_resp)
+            else:
+                return data["choices"][0]["message"]["content"]
+
        except httpx.HTTPError as e:
            logger.error(f"HTTP error calling openai: {type(e).__name__}: {str(e)}")
            raise RuntimeError(f"LLM API error (openai): {type(e).__name__}: {str(e)}")