feat: Implement Trillium notes executor for searching and creating notes via ETAPI

- Added `trillium.py` for searching and creating notes with Trillium's ETAPI.
- Implemented `search_notes` and `create_note` functions with appropriate error handling and validation.

feat: Add web search functionality using DuckDuckGo

- Introduced `web_search.py` for performing web searches without API keys.
- Implemented `search_web` function with result handling and validation.

feat: Create provider-agnostic function caller for iterative tool calling

- Developed `function_caller.py` to manage LLM interactions with tools.
- Implemented iterative calling logic with error handling and tool execution.

feat: Establish a tool registry for managing available tools

- Created `registry.py` to define and manage tool availability and execution.
- Integrated feature flags for enabling/disabling tools based on environment variables.

feat: Implement event streaming for tool calling processes

- Added `stream_events.py` to manage Server-Sent Events (SSE) for tool calling.
- Enabled real-time updates during tool execution for enhanced user experience.

test: Add tests for tool calling system components

- Created `test_tools.py` to validate functionality of code execution, web search, and tool registry.
- Implemented asynchronous tests to ensure proper execution and result handling.

chore: Add Dockerfile for sandbox environment setup

- Created `Dockerfile` to set up a Python environment with necessary dependencies for code execution.

chore: Add debug regex script for testing XML parsing

- Introduced `debug_regex.py` to validate regex patterns against XML tool calls.

chore: Add HTML template for displaying thinking stream events

- Created `test_thinking_stream.html` for visualizing tool calling events in a user-friendly format.

test: Add tests for OllamaAdapter XML parsing

- Developed `test_ollama_parser.py` to validate XML parsing with various test cases, including malformed XML.
This commit is contained in:
serversdwn
2025-12-26 03:49:20 -05:00
parent f1471cde84
commit 64429b19e6
37 changed files with 3238 additions and 23 deletions

View File

@@ -0,0 +1,12 @@
"""Tool executors for Lyra."""
from .code_executor import execute_code
from .web_search import search_web
from .trillium import search_notes, create_note
__all__ = [
"execute_code",
"search_web",
"search_notes",
"create_note",
]

View File

@@ -0,0 +1,162 @@
"""
Code executor for running Python and bash code in a sandbox container.
This module provides secure code execution with timeout protection,
output limits, and forbidden pattern detection.
"""
import asyncio
import os
import tempfile
import re
from typing import Dict
# Forbidden patterns that pose security risks
FORBIDDEN_PATTERNS = [
r'rm\s+-rf', # Destructive file removal
r':\(\)\{\s*:\|:&\s*\};:', # Fork bomb
r'mkfs', # Filesystem formatting
r'/dev/sd[a-z]', # Direct device access
r'dd\s+if=', # Low-level disk operations
r'>\s*/dev/sd', # Writing to devices
r'curl.*\|.*sh', # Pipe to shell (common attack vector)
r'wget.*\|.*sh', # Pipe to shell
]
async def execute_code(args: Dict) -> Dict:
"""Execute code in sandbox container.
Args:
args: Dictionary containing:
- language (str): "python" or "bash"
- code (str): The code to execute
- reason (str): Why this code is being executed
- timeout (int, optional): Execution timeout in seconds
Returns:
dict: Execution result containing:
- stdout (str): Standard output
- stderr (str): Standard error
- exit_code (int): Process exit code
- execution_time (float): Time taken in seconds
OR
- error (str): Error message if execution failed
"""
language = args.get("language")
code = args.get("code")
reason = args.get("reason", "No reason provided")
timeout = args.get("timeout", 30)
# Validation
if not language or language not in ["python", "bash"]:
return {"error": "Invalid language. Must be 'python' or 'bash'"}
if not code:
return {"error": "No code provided"}
# Security: Check for forbidden patterns
for pattern in FORBIDDEN_PATTERNS:
if re.search(pattern, code, re.IGNORECASE):
return {"error": f"Forbidden pattern detected for security reasons"}
# Validate and cap timeout
max_timeout = int(os.getenv("CODE_SANDBOX_MAX_TIMEOUT", "120"))
timeout = min(max(timeout, 1), max_timeout)
container = os.getenv("CODE_SANDBOX_CONTAINER", "lyra-code-sandbox")
# Write code to temporary file
suffix = ".py" if language == "python" else ".sh"
try:
with tempfile.NamedTemporaryFile(
mode='w',
suffix=suffix,
delete=False,
encoding='utf-8'
) as f:
f.write(code)
temp_file = f.name
except Exception as e:
return {"error": f"Failed to create temp file: {str(e)}"}
try:
# Copy file to container
exec_path = f"/executions/{os.path.basename(temp_file)}"
cp_proc = await asyncio.create_subprocess_exec(
"docker", "cp", temp_file, f"{container}:{exec_path}",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await cp_proc.communicate()
if cp_proc.returncode != 0:
return {"error": "Failed to copy code to sandbox container"}
# Fix permissions so sandbox user can read the file (run as root)
chown_proc = await asyncio.create_subprocess_exec(
"docker", "exec", "-u", "root", container, "chown", "sandbox:sandbox", exec_path,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
await chown_proc.communicate()
# Execute in container as sandbox user
if language == "python":
cmd = ["docker", "exec", "-u", "sandbox", container, "python3", exec_path]
else: # bash
cmd = ["docker", "exec", "-u", "sandbox", container, "bash", exec_path]
start_time = asyncio.get_event_loop().time()
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(),
timeout=timeout
)
execution_time = asyncio.get_event_loop().time() - start_time
# Truncate output to prevent memory issues
max_output = 10 * 1024 # 10KB
stdout_str = stdout[:max_output].decode('utf-8', errors='replace')
stderr_str = stderr[:max_output].decode('utf-8', errors='replace')
if len(stdout) > max_output:
stdout_str += "\n... (output truncated)"
if len(stderr) > max_output:
stderr_str += "\n... (output truncated)"
return {
"stdout": stdout_str,
"stderr": stderr_str,
"exit_code": proc.returncode,
"execution_time": round(execution_time, 2)
}
except asyncio.TimeoutError:
# Kill the process
try:
proc.kill()
await proc.wait()
except:
pass
return {"error": f"Execution timeout after {timeout}s"}
except Exception as e:
return {"error": f"Execution failed: {str(e)}"}
finally:
# Cleanup temporary file
try:
os.unlink(temp_file)
except:
pass

View File

@@ -0,0 +1,134 @@
"""
Trillium notes executor for searching and creating notes via ETAPI.
This module provides integration with Trillium notes through the ETAPI HTTP API.
"""
import os
import aiohttp
from typing import Dict
TRILLIUM_URL = os.getenv("TRILLIUM_URL", "http://localhost:8080")
TRILLIUM_TOKEN = os.getenv("TRILLIUM_ETAPI_TOKEN", "")
async def search_notes(args: Dict) -> Dict:
"""Search Trillium notes via ETAPI.
Args:
args: Dictionary containing:
- query (str): Search query
- limit (int, optional): Maximum notes to return (default: 5, max: 20)
Returns:
dict: Search results containing:
- notes (list): List of notes with noteId, title, content, type
- count (int): Number of notes returned
OR
- error (str): Error message if search failed
"""
query = args.get("query")
limit = args.get("limit", 5)
# Validation
if not query:
return {"error": "No query provided"}
if not TRILLIUM_TOKEN:
return {"error": "TRILLIUM_ETAPI_TOKEN not configured in environment"}
# Cap limit
limit = min(max(limit, 1), 20)
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"{TRILLIUM_URL}/etapi/search-notes",
params={"search": query, "limit": limit},
headers={"Authorization": TRILLIUM_TOKEN}
) as resp:
if resp.status == 200:
data = await resp.json()
return {
"notes": data,
"count": len(data)
}
elif resp.status == 401:
return {"error": "Authentication failed. Check TRILLIUM_ETAPI_TOKEN"}
else:
error_text = await resp.text()
return {"error": f"HTTP {resp.status}: {error_text}"}
except aiohttp.ClientConnectorError:
return {"error": f"Cannot connect to Trillium at {TRILLIUM_URL}"}
except Exception as e:
return {"error": f"Search failed: {str(e)}"}
async def create_note(args: Dict) -> Dict:
"""Create a note in Trillium via ETAPI.
Args:
args: Dictionary containing:
- title (str): Note title
- content (str): Note content in markdown or HTML
- parent_note_id (str, optional): Parent note ID to nest under
Returns:
dict: Creation result containing:
- noteId (str): ID of created note
- title (str): Title of created note
- success (bool): True if created successfully
OR
- error (str): Error message if creation failed
"""
title = args.get("title")
content = args.get("content")
parent_note_id = args.get("parent_note_id")
# Validation
if not title:
return {"error": "No title provided"}
if not content:
return {"error": "No content provided"}
if not TRILLIUM_TOKEN:
return {"error": "TRILLIUM_ETAPI_TOKEN not configured in environment"}
# Prepare payload
payload = {
"title": title,
"content": content,
"type": "text",
"mime": "text/html"
}
if parent_note_id:
payload["parentNoteId"] = parent_note_id
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{TRILLIUM_URL}/etapi/create-note",
json=payload,
headers={"Authorization": TRILLIUM_TOKEN}
) as resp:
if resp.status in [200, 201]:
data = await resp.json()
return {
"noteId": data.get("noteId"),
"title": title,
"success": True
}
elif resp.status == 401:
return {"error": "Authentication failed. Check TRILLIUM_ETAPI_TOKEN"}
else:
error_text = await resp.text()
return {"error": f"HTTP {resp.status}: {error_text}"}
except aiohttp.ClientConnectorError:
return {"error": f"Cannot connect to Trillium at {TRILLIUM_URL}"}
except Exception as e:
return {"error": f"Note creation failed: {str(e)}"}

View File

@@ -0,0 +1,55 @@
"""
Web search executor using DuckDuckGo.
This module provides web search capabilities without requiring API keys.
"""
from typing import Dict
from duckduckgo_search import DDGS
async def search_web(args: Dict) -> Dict:
"""Search the web using DuckDuckGo.
Args:
args: Dictionary containing:
- query (str): The search query
- max_results (int, optional): Maximum results to return (default: 5, max: 10)
Returns:
dict: Search results containing:
- results (list): List of search results with title, url, snippet
- count (int): Number of results returned
OR
- error (str): Error message if search failed
"""
query = args.get("query")
max_results = args.get("max_results", 5)
# Validation
if not query:
return {"error": "No query provided"}
# Cap max_results
max_results = min(max(max_results, 1), 10)
try:
# DuckDuckGo search is synchronous, but we wrap it for consistency
with DDGS() as ddgs:
results = []
# Perform text search
for result in ddgs.text(query, max_results=max_results):
results.append({
"title": result.get("title", ""),
"url": result.get("href", ""),
"snippet": result.get("body", "")
})
return {
"results": results,
"count": len(results)
}
except Exception as e:
return {"error": f"Search failed: {str(e)}"}