feat: Implement Trillium notes executor for searching and creating notes via ETAPI
- Added `trillium.py` for searching and creating notes with Trillium's ETAPI. - Implemented `search_notes` and `create_note` functions with appropriate error handling and validation. feat: Add web search functionality using DuckDuckGo - Introduced `web_search.py` for performing web searches without API keys. - Implemented `search_web` function with result handling and validation. feat: Create provider-agnostic function caller for iterative tool calling - Developed `function_caller.py` to manage LLM interactions with tools. - Implemented iterative calling logic with error handling and tool execution. feat: Establish a tool registry for managing available tools - Created `registry.py` to define and manage tool availability and execution. - Integrated feature flags for enabling/disabling tools based on environment variables. feat: Implement event streaming for tool calling processes - Added `stream_events.py` to manage Server-Sent Events (SSE) for tool calling. - Enabled real-time updates during tool execution for enhanced user experience. test: Add tests for tool calling system components - Created `test_tools.py` to validate functionality of code execution, web search, and tool registry. - Implemented asynchronous tests to ensure proper execution and result handling. chore: Add Dockerfile for sandbox environment setup - Created `Dockerfile` to set up a Python environment with necessary dependencies for code execution. chore: Add debug regex script for testing XML parsing - Introduced `debug_regex.py` to validate regex patterns against XML tool calls. chore: Add HTML template for displaying thinking stream events - Created `test_thinking_stream.html` for visualizing tool calling events in a user-friendly format. test: Add tests for OllamaAdapter XML parsing - Developed `test_ollama_parser.py` to validate XML parsing with various test cases, including malformed XML.
This commit is contained in:
103
cortex/router.py
103
cortex/router.py
@@ -2,7 +2,9 @@
|
||||
|
||||
import os
|
||||
import logging
|
||||
import asyncio
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from reasoning.reasoning import reason_check
|
||||
@@ -15,6 +17,7 @@ from intake.intake import add_exchange_internal
|
||||
|
||||
from autonomy.monologue.monologue import InnerMonologue
|
||||
from autonomy.self.state import load_self_state
|
||||
from autonomy.tools.stream_events import get_stream_manager
|
||||
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
@@ -343,6 +346,7 @@ async def run_simple(req: ReasonRequest):
|
||||
"""
|
||||
from datetime import datetime
|
||||
from llm.llm_router import call_llm
|
||||
from autonomy.tools.function_caller import FunctionCaller
|
||||
|
||||
start_time = datetime.now()
|
||||
|
||||
@@ -396,17 +400,39 @@ async def run_simple(req: ReasonRequest):
|
||||
|
||||
temperature = req.temperature if req.temperature is not None else 0.7
|
||||
|
||||
# Direct LLM call with messages (works for Ollama/OpenAI chat APIs)
|
||||
try:
|
||||
raw_response = await call_llm(
|
||||
messages=messages,
|
||||
backend=backend,
|
||||
temperature=temperature,
|
||||
max_tokens=2048
|
||||
)
|
||||
# Check if tools are enabled
|
||||
enable_tools = os.getenv("STANDARD_MODE_ENABLE_TOOLS", "false").lower() == "true"
|
||||
|
||||
# Clean response - just strip whitespace
|
||||
response = raw_response.strip()
|
||||
# Call LLM with or without tools
|
||||
try:
|
||||
if enable_tools:
|
||||
# Use FunctionCaller for tool-enabled conversation
|
||||
logger.info(f"🛠️ Tool calling enabled for Standard Mode")
|
||||
logger.info(f"🔍 Creating FunctionCaller with backend={backend}, temp={temperature}")
|
||||
function_caller = FunctionCaller(backend, temperature)
|
||||
logger.info(f"🔍 FunctionCaller created, calling call_with_tools...")
|
||||
result = await function_caller.call_with_tools(
|
||||
messages=messages,
|
||||
max_tokens=2048,
|
||||
session_id=req.session_id # Pass session_id for streaming
|
||||
)
|
||||
logger.info(f"🔍 call_with_tools returned: iterations={result.get('iterations')}, tool_calls={len(result.get('tool_calls', []))}")
|
||||
|
||||
# Log tool usage
|
||||
if result.get("tool_calls"):
|
||||
tool_names = [tc["name"] for tc in result["tool_calls"]]
|
||||
logger.info(f"🔧 Tools used: {', '.join(tool_names)} ({result['iterations']} iterations)")
|
||||
|
||||
response = result["content"].strip()
|
||||
else:
|
||||
# Direct LLM call without tools (original behavior)
|
||||
raw_response = await call_llm(
|
||||
messages=messages,
|
||||
backend=backend,
|
||||
temperature=temperature,
|
||||
max_tokens=2048
|
||||
)
|
||||
response = raw_response.strip()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ LLM call failed: {e}")
|
||||
@@ -448,6 +474,63 @@ async def run_simple(req: ReasonRequest):
|
||||
}
|
||||
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# /stream/thinking endpoint - SSE stream for "show your work"
|
||||
# -------------------------------------------------------------------
|
||||
@cortex_router.get("/stream/thinking/{session_id}")
|
||||
async def stream_thinking(session_id: str):
|
||||
"""
|
||||
Server-Sent Events stream for tool calling "show your work" feature.
|
||||
|
||||
Streams real-time updates about:
|
||||
- Thinking/planning steps
|
||||
- Tool calls being made
|
||||
- Tool execution results
|
||||
- Final completion
|
||||
"""
|
||||
stream_manager = get_stream_manager()
|
||||
queue = stream_manager.subscribe(session_id)
|
||||
|
||||
async def event_generator():
|
||||
try:
|
||||
# Send initial connection message
|
||||
import json
|
||||
connected_event = json.dumps({"type": "connected", "session_id": session_id})
|
||||
yield f"data: {connected_event}\n\n"
|
||||
|
||||
while True:
|
||||
# Wait for events with timeout to send keepalive
|
||||
try:
|
||||
event = await asyncio.wait_for(queue.get(), timeout=30.0)
|
||||
|
||||
# Format as SSE
|
||||
event_data = json.dumps(event)
|
||||
yield f"data: {event_data}\n\n"
|
||||
|
||||
# If it's a "done" event, close the stream
|
||||
if event.get("type") == "done":
|
||||
break
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# Send keepalive comment
|
||||
yield ": keepalive\n\n"
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info(f"Stream cancelled for session {session_id}")
|
||||
finally:
|
||||
stream_manager.unsubscribe(session_id, queue)
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no" # Disable nginx buffering
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# /ingest endpoint (internal)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user