feat: Implement Trillium notes executor for searching and creating notes via ETAPI

- Added `trillium.py` for searching and creating notes with Trillium's ETAPI. - Implemented `search_notes` and `create_note` functions with appropriate error handling and validation. feat: Add web search functionality using DuckDuckGo - Introduced `web_search.py` for performing web searches without API keys. - Implemented `search_web` function with result handling and validation. feat: Create provider-agnostic function caller for iterative tool calling - Developed `function_caller.py` to manage LLM interactions with tools. - Implemented iterative calling logic with error handling and tool execution. feat: Establish a tool registry for managing available tools - Created `registry.py` to define and manage tool availability and execution. - Integrated feature flags for enabling/disabling tools based on environment variables. feat: Implement event streaming for tool calling processes - Added `stream_events.py` to manage Server-Sent Events (SSE) for tool calling. - Enabled real-time updates during tool execution for enhanced user experience. test: Add tests for tool calling system components - Created `test_tools.py` to validate functionality of code execution, web search, and tool registry. - Implemented asynchronous tests to ensure proper execution and result handling. chore: Add Dockerfile for sandbox environment setup - Created `Dockerfile` to set up a Python environment with necessary dependencies for code execution. chore: Add debug regex script for testing XML parsing - Introduced `debug_regex.py` to validate regex patterns against XML tool calls. chore: Add HTML template for displaying thinking stream events - Created `test_thinking_stream.html` for visualizing tool calling events in a user-friendly format. test: Add tests for OllamaAdapter XML parsing - Developed `test_ollama_parser.py` to validate XML parsing with various test cases, including malformed XML.
2025-12-26 03:49:20 -05:00
parent f1471cde84
commit 64429b19e6
37 changed files with 3238 additions and 23 deletions
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -2,7 +2,9 @@

 import os
 import logging
+import asyncio
 from fastapi import APIRouter
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel

 from reasoning.reasoning import reason_check
@@ -15,6 +17,7 @@ from intake.intake import add_exchange_internal

 from autonomy.monologue.monologue import InnerMonologue
 from autonomy.self.state import load_self_state
+from autonomy.tools.stream_events import get_stream_manager


 # -------------------------------------------------------------------
@@ -343,6 +346,7 @@ async def run_simple(req: ReasonRequest):
    """
    from datetime import datetime
    from llm.llm_router import call_llm
+    from autonomy.tools.function_caller import FunctionCaller

    start_time = datetime.now()

@@ -396,17 +400,39 @@ async def run_simple(req: ReasonRequest):

    temperature = req.temperature if req.temperature is not None else 0.7

-    # Direct LLM call with messages (works for Ollama/OpenAI chat APIs)
-    try:
-        raw_response = await call_llm(
-            messages=messages,
-            backend=backend,
-            temperature=temperature,
-            max_tokens=2048
-        )
+    # Check if tools are enabled
+    enable_tools = os.getenv("STANDARD_MODE_ENABLE_TOOLS", "false").lower() == "true"

-        # Clean response - just strip whitespace
-        response = raw_response.strip()
+    # Call LLM with or without tools
+    try:
+        if enable_tools:
+            # Use FunctionCaller for tool-enabled conversation
+            logger.info(f"🛠️  Tool calling enabled for Standard Mode")
+            logger.info(f"🔍 Creating FunctionCaller with backend={backend}, temp={temperature}")
+            function_caller = FunctionCaller(backend, temperature)
+            logger.info(f"🔍 FunctionCaller created, calling call_with_tools...")
+            result = await function_caller.call_with_tools(
+                messages=messages,
+                max_tokens=2048,
+                session_id=req.session_id  # Pass session_id for streaming
+            )
+            logger.info(f"🔍 call_with_tools returned: iterations={result.get('iterations')}, tool_calls={len(result.get('tool_calls', []))}")
+
+            # Log tool usage
+            if result.get("tool_calls"):
+                tool_names = [tc["name"] for tc in result["tool_calls"]]
+                logger.info(f"🔧 Tools used: {', '.join(tool_names)} ({result['iterations']} iterations)")
+
+            response = result["content"].strip()
+        else:
+            # Direct LLM call without tools (original behavior)
+            raw_response = await call_llm(
+                messages=messages,
+                backend=backend,
+                temperature=temperature,
+                max_tokens=2048
+            )
+            response = raw_response.strip()

    except Exception as e:
        logger.error(f"❌ LLM call failed: {e}")
@@ -448,6 +474,63 @@ async def run_simple(req: ReasonRequest):
    }


+# -------------------------------------------------------------------
+# /stream/thinking endpoint - SSE stream for "show your work"
+# -------------------------------------------------------------------
+@cortex_router.get("/stream/thinking/{session_id}")
+async def stream_thinking(session_id: str):
+    """
+    Server-Sent Events stream for tool calling "show your work" feature.
+
+    Streams real-time updates about:
+    - Thinking/planning steps
+    - Tool calls being made
+    - Tool execution results
+    - Final completion
+    """
+    stream_manager = get_stream_manager()
+    queue = stream_manager.subscribe(session_id)
+
+    async def event_generator():
+        try:
+            # Send initial connection message
+            import json
+            connected_event = json.dumps({"type": "connected", "session_id": session_id})
+            yield f"data: {connected_event}\n\n"
+
+            while True:
+                # Wait for events with timeout to send keepalive
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
+
+                    # Format as SSE
+                    event_data = json.dumps(event)
+                    yield f"data: {event_data}\n\n"
+
+                    # If it's a "done" event, close the stream
+                    if event.get("type") == "done":
+                        break
+
+                except asyncio.TimeoutError:
+                    # Send keepalive comment
+                    yield ": keepalive\n\n"
+
+        except asyncio.CancelledError:
+            logger.info(f"Stream cancelled for session {session_id}")
+        finally:
+            stream_manager.unsubscribe(session_id, queue)
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Disable nginx buffering
+        }
+    )
+
+
 # -------------------------------------------------------------------
 # /ingest endpoint (internal)
 # -------------------------------------------------------------------