feat: Implement Trillium notes executor for searching and creating notes via ETAPI

- Added `trillium.py` for searching and creating notes with Trillium's ETAPI. - Implemented `search_notes` and `create_note` functions with appropriate error handling and validation. feat: Add web search functionality using DuckDuckGo - Introduced `web_search.py` for performing web searches without API keys. - Implemented `search_web` function with result handling and validation. feat: Create provider-agnostic function caller for iterative tool calling - Developed `function_caller.py` to manage LLM interactions with tools. - Implemented iterative calling logic with error handling and tool execution. feat: Establish a tool registry for managing available tools - Created `registry.py` to define and manage tool availability and execution. - Integrated feature flags for enabling/disabling tools based on environment variables. feat: Implement event streaming for tool calling processes - Added `stream_events.py` to manage Server-Sent Events (SSE) for tool calling. - Enabled real-time updates during tool execution for enhanced user experience. test: Add tests for tool calling system components - Created `test_tools.py` to validate functionality of code execution, web search, and tool registry. - Implemented asynchronous tests to ensure proper execution and result handling. chore: Add Dockerfile for sandbox environment setup - Created `Dockerfile` to set up a Python environment with necessary dependencies for code execution. chore: Add debug regex script for testing XML parsing - Introduced `debug_regex.py` to validate regex patterns against XML tool calls. chore: Add HTML template for displaying thinking stream events - Created `test_thinking_stream.html` for visualizing tool calling events in a user-friendly format. test: Add tests for OllamaAdapter XML parsing - Developed `test_ollama_parser.py` to validate XML parsing with various test cases, including malformed XML.
2025-12-26 03:49:20 -05:00
parent f1471cde84
commit 64429b19e6
37 changed files with 3238 additions and 23 deletions
--- a/cortex/autonomy/tools/registry.py
+++ b/cortex/autonomy/tools/registry.py
@@ -0,0 +1,196 @@
+"""
+Provider-agnostic Tool Registry for Lyra.
+
+This module provides a central registry for all available tools with
+Lyra-native definitions (not provider-specific).
+"""
+
+import os
+from typing import Dict, List, Optional
+from .executors import execute_code, search_web, search_notes, create_note
+
+
+class ToolRegistry:
+    """Registry for managing available tools and their definitions.
+
+    Tools are defined in Lyra's own format (provider-agnostic), and
+    adapters convert them to provider-specific formats (OpenAI function
+    calling, Ollama XML prompts, etc.).
+    """
+
+    def __init__(self):
+        """Initialize the tool registry with feature flags from environment."""
+        self.tools = {}
+        self.executors = {}
+
+        # Feature flags from environment
+        self.code_execution_enabled = os.getenv("ENABLE_CODE_EXECUTION", "true").lower() == "true"
+        self.web_search_enabled = os.getenv("ENABLE_WEB_SEARCH", "true").lower() == "true"
+        self.trillium_enabled = os.getenv("ENABLE_TRILLIUM", "false").lower() == "true"
+
+        self._register_tools()
+        self._register_executors()
+
+    def _register_executors(self):
+        """Register executor functions for each tool."""
+        if self.code_execution_enabled:
+            self.executors["execute_code"] = execute_code
+
+        if self.web_search_enabled:
+            self.executors["search_web"] = search_web
+
+        if self.trillium_enabled:
+            self.executors["search_notes"] = search_notes
+            self.executors["create_note"] = create_note
+
+    def _register_tools(self):
+        """Register all available tools based on feature flags."""
+
+        if self.code_execution_enabled:
+            self.tools["execute_code"] = {
+                "name": "execute_code",
+                "description": "Execute Python or bash code in a secure sandbox environment. Use this to perform calculations, data processing, file operations, or any programmatic tasks. The sandbox is persistent across calls within a session and has common Python packages (numpy, pandas, requests, matplotlib, scipy) pre-installed.",
+                "parameters": {
+                    "language": {
+                        "type": "string",
+                        "enum": ["python", "bash"],
+                        "description": "The programming language to execute (python or bash)"
+                    },
+                    "code": {
+                        "type": "string",
+                        "description": "The code to execute. For multi-line code, use proper indentation. For Python, use standard Python 3.11 syntax."
+                    },
+                    "reason": {
+                        "type": "string",
+                        "description": "Brief explanation of why you're executing this code and what you expect to achieve"
+                    }
+                },
+                "required": ["language", "code", "reason"]
+            }
+
+        if self.web_search_enabled:
+            self.tools["search_web"] = {
+                "name": "search_web",
+                "description": "Search the internet using DuckDuckGo to find current information, facts, news, or answers to questions. Returns a list of search results with titles, snippets, and URLs. Use this when you need up-to-date information or facts not in your training data.",
+                "parameters": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query to look up on the internet"
+                    },
+                    "max_results": {
+                        "type": "integer",
+                        "description": "Maximum number of results to return (default: 5, max: 10)"
+                    }
+                },
+                "required": ["query"]
+            }
+
+        if self.trillium_enabled:
+            self.tools["search_notes"] = {
+                "name": "search_notes",
+                "description": "Search through Trillium notes to find relevant information. Use this to retrieve knowledge, context, or information previously stored in the user's notes.",
+                "parameters": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query to find matching notes"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Maximum number of notes to return (default: 5, max: 20)"
+                    }
+                },
+                "required": ["query"]
+            }
+
+            self.tools["create_note"] = {
+                "name": "create_note",
+                "description": "Create a new note in Trillium. Use this to store important information, insights, or knowledge for future reference. Notes are stored in the user's Trillium knowledge base.",
+                "parameters": {
+                    "title": {
+                        "type": "string",
+                        "description": "The title of the note"
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "The content of the note in markdown or HTML format"
+                    },
+                    "parent_note_id": {
+                        "type": "string",
+                        "description": "Optional ID of the parent note to nest this note under"
+                    }
+                },
+                "required": ["title", "content"]
+            }
+
+    def get_tool_definitions(self) -> Optional[List[Dict]]:
+        """Get list of all enabled tool definitions in Lyra format.
+
+        Returns:
+            list: List of tool definition dicts, or None if no tools enabled
+        """
+        if not self.tools:
+            return None
+        return list(self.tools.values())
+
+    def get_tool_names(self) -> List[str]:
+        """Get list of all enabled tool names.
+
+        Returns:
+            list: List of tool name strings
+        """
+        return list(self.tools.keys())
+
+    def is_tool_enabled(self, tool_name: str) -> bool:
+        """Check if a specific tool is enabled.
+
+        Args:
+            tool_name: Name of the tool to check
+
+        Returns:
+            bool: True if tool is enabled, False otherwise
+        """
+        return tool_name in self.tools
+
+    def register_executor(self, tool_name: str, executor_func):
+        """Register an executor function for a tool.
+
+        Args:
+            tool_name: Name of the tool
+            executor_func: Async function that executes the tool
+        """
+        self.executors[tool_name] = executor_func
+
+    async def execute_tool(self, name: str, arguments: dict) -> dict:
+        """Execute a tool by name.
+
+        Args:
+            name: Tool name
+            arguments: Tool arguments dict
+
+        Returns:
+            dict: Tool execution result
+        """
+        if name not in self.executors:
+            return {"error": f"Unknown tool: {name}"}
+
+        executor = self.executors[name]
+        try:
+            return await executor(arguments)
+        except Exception as e:
+            return {"error": f"Tool execution failed: {str(e)}"}
+
+
+# Global registry instance (singleton pattern)
+_registry = None
+
+
+def get_registry() -> ToolRegistry:
+    """Get the global ToolRegistry instance.
+
+    Returns:
+        ToolRegistry: The global registry instance
+    """
+    global _registry
+    if _registry is None:
+        _registry = ToolRegistry()
+    return _registry