project-lyra/cortex/autonomy/tools/orchestrator.py

"""
Tool Orchestrator - executes autonomous tool invocations asynchronously.
"""

import asyncio
import logging
from typing import Dict, List, Any, Optional
import os

logger = logging.getLogger(__name__)


class ToolOrchestrator:
    """Orchestrates async tool execution and result aggregation."""

    def __init__(self, tool_timeout: int = 30):
        """
        Initialize orchestrator.

        Args:
            tool_timeout: Max seconds per tool call (default 30)
        """
        self.tool_timeout = tool_timeout
        self.available_tools = self._discover_tools()

    def _discover_tools(self) -> Dict[str, Any]:
        """Discover available tool modules."""
        tools = {}

        # Import tool modules as they become available
        try:
            from memory.neomem_client import search_neomem
            tools["RAG"] = search_neomem
            logger.debug("[ORCHESTRATOR] RAG tool available")
        except ImportError:
            logger.debug("[ORCHESTRATOR] RAG tool not available")

        try:
            from integrations.web_search import web_search
            tools["WEB"] = web_search
            logger.debug("[ORCHESTRATOR] WEB tool available")
        except ImportError:
            logger.debug("[ORCHESTRATOR] WEB tool not available")

        try:
            from integrations.weather import get_weather
            tools["WEATHER"] = get_weather
            logger.debug("[ORCHESTRATOR] WEATHER tool available")
        except ImportError:
            logger.debug("[ORCHESTRATOR] WEATHER tool not available")

        try:
            from integrations.codebrain import query_codebrain
            tools["CODEBRAIN"] = query_codebrain
            logger.debug("[ORCHESTRATOR] CODEBRAIN tool available")
        except ImportError:
            logger.debug("[ORCHESTRATOR] CODEBRAIN tool not available")

        return tools

    async def execute_tools(
        self,
        tools_to_invoke: List[Dict[str, Any]],
        context_state: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Execute multiple tools asynchronously.

        Args:
            tools_to_invoke: List of tool specs from decision engine
                [{"tool": "RAG", "query": "...", "reason": "...", "priority": 0.9}, ...]
            context_state: Full context for tool execution

        Returns:
            {
                "results": {
                    "RAG": {...},
                    "WEB": {...},
                    ...
                },
                "execution_summary": {
                    "tools_invoked": ["RAG", "WEB"],
                    "successful": ["RAG"],
                    "failed": ["WEB"],
                    "total_time_ms": 1234
                }
            }
        """
        import time
        start_time = time.time()

        logger.info(f"[ORCHESTRATOR] Executing {len(tools_to_invoke)} tools asynchronously")

        # Create tasks for each tool
        tasks = []
        tool_names = []

        for tool_spec in tools_to_invoke:
            tool_name = tool_spec["tool"]
            query = tool_spec["query"]

            if tool_name in self.available_tools:
                task = self._execute_single_tool(tool_name, query, context_state)
                tasks.append(task)
                tool_names.append(tool_name)
                logger.debug(f"[ORCHESTRATOR] Queued {tool_name}: {query[:50]}...")
            else:
                logger.warning(f"[ORCHESTRATOR] Tool {tool_name} not available, skipping")

        # Execute all tools concurrently with timeout
        results = {}
        successful = []
        failed = []

        if tasks:
            try:
                # Wait for all tasks with global timeout
                completed = await asyncio.wait_for(
                    asyncio.gather(*tasks, return_exceptions=True),
                    timeout=self.tool_timeout
                )

                # Process results
                for tool_name, result in zip(tool_names, completed):
                    if isinstance(result, Exception):
                        logger.error(f"[ORCHESTRATOR] {tool_name} failed: {result}")
                        results[tool_name] = {"error": str(result), "success": False}
                        failed.append(tool_name)
                    else:
                        logger.info(f"[ORCHESTRATOR] {tool_name} completed successfully")
                        results[tool_name] = result
                        successful.append(tool_name)

            except asyncio.TimeoutError:
                logger.error(f"[ORCHESTRATOR] Global timeout ({self.tool_timeout}s) exceeded")
                for tool_name in tool_names:
                    if tool_name not in results:
                        results[tool_name] = {"error": "timeout", "success": False}
                        failed.append(tool_name)

        end_time = time.time()
        total_time_ms = int((end_time - start_time) * 1000)

        execution_summary = {
            "tools_invoked": tool_names,
            "successful": successful,
            "failed": failed,
            "total_time_ms": total_time_ms
        }

        logger.info(f"[ORCHESTRATOR] Execution complete: {len(successful)}/{len(tool_names)} successful in {total_time_ms}ms")

        return {
            "results": results,
            "execution_summary": execution_summary
        }

    async def _execute_single_tool(
        self,
        tool_name: str,
        query: str,
        context_state: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Execute a single tool with error handling.

        Args:
            tool_name: Name of tool (RAG, WEB, etc.)
            query: Query string for the tool
            context_state: Context for tool execution

        Returns:
            Tool-specific result dict
        """
        tool_func = self.available_tools.get(tool_name)
        if not tool_func:
            raise ValueError(f"Tool {tool_name} not available")

        try:
            logger.debug(f"[ORCHESTRATOR] Invoking {tool_name}...")

            # Different tools have different signatures - adapt as needed
            if tool_name == "RAG":
                result = await self._invoke_rag(tool_func, query, context_state)
            elif tool_name == "WEB":
                result = await self._invoke_web(tool_func, query)
            elif tool_name == "WEATHER":
                result = await self._invoke_weather(tool_func, query)
            elif tool_name == "CODEBRAIN":
                result = await self._invoke_codebrain(tool_func, query, context_state)
            else:
                # Generic invocation
                result = await tool_func(query)

            return {
                "success": True,
                "tool": tool_name,
                "query": query,
                "data": result
            }

        except Exception as e:
            logger.error(f"[ORCHESTRATOR] {tool_name} execution failed: {e}")
            raise

    async def _invoke_rag(self, func, query: str, context: Dict[str, Any]) -> Any:
        """Invoke RAG tool (NeoMem search)."""
        session_id = context.get("session_id", "unknown")
        # RAG searches memory for relevant past interactions
        try:
            results = await func(query, limit=5, session_id=session_id)
            return results
        except Exception as e:
            logger.warning(f"[ORCHESTRATOR] RAG invocation failed, returning empty: {e}")
            return []

    async def _invoke_web(self, func, query: str) -> Any:
        """Invoke web search tool."""
        try:
            results = await func(query, max_results=5)
            return results
        except Exception as e:
            logger.warning(f"[ORCHESTRATOR] WEB invocation failed: {e}")
            return {"error": str(e), "results": []}

    async def _invoke_weather(self, func, query: str) -> Any:
        """Invoke weather tool."""
        # Extract location from query (simple heuristic)
        # In future: use LLM to extract location
        try:
            location = self._extract_location(query)
            results = await func(location)
            return results
        except Exception as e:
            logger.warning(f"[ORCHESTRATOR] WEATHER invocation failed: {e}")
            return {"error": str(e)}

    async def _invoke_codebrain(self, func, query: str, context: Dict[str, Any]) -> Any:
        """Invoke codebrain tool."""
        try:
            results = await func(query, context=context)
            return results
        except Exception as e:
            logger.warning(f"[ORCHESTRATOR] CODEBRAIN invocation failed: {e}")
            return {"error": str(e)}

    def _extract_location(self, query: str) -> str:
        """
        Extract location from weather query.
        Simple heuristic - in future use LLM.
        """
        # Common location indicators
        indicators = ["in ", "at ", "for ", "weather in ", "temperature in "]

        query_lower = query.lower()
        for indicator in indicators:
            if indicator in query_lower:
                # Get text after indicator
                parts = query_lower.split(indicator, 1)
                if len(parts) > 1:
                    location = parts[1].strip().split()[0]  # First word after indicator
                    return location

        # Default fallback
        return "current location"

    def format_results_for_context(self, orchestrator_result: Dict[str, Any]) -> str:
        """
        Format tool results for inclusion in context/prompt.

        Args:
            orchestrator_result: Output from execute_tools()

        Returns:
            Formatted string for prompt injection
        """
        results = orchestrator_result.get("results", {})
        summary = orchestrator_result.get("execution_summary", {})

        if not results:
            return ""

        formatted = "\n=== AUTONOMOUS TOOL RESULTS ===\n"

        for tool_name, tool_result in results.items():
            if tool_result.get("success", False):
                formatted += f"\n[{tool_name}]\n"
                data = tool_result.get("data", {})

                # Format based on tool type
                if tool_name == "RAG":
                    formatted += self._format_rag_results(data)
                elif tool_name == "WEB":
                    formatted += self._format_web_results(data)
                elif tool_name == "WEATHER":
                    formatted += self._format_weather_results(data)
                elif tool_name == "CODEBRAIN":
                    formatted += self._format_codebrain_results(data)
                else:
                    formatted += f"{data}\n"
            else:
                formatted += f"\n[{tool_name}] - Failed: {tool_result.get('error', 'unknown')}\n"

        formatted += f"\n(Tools executed in {summary.get('total_time_ms', 0)}ms)\n"
        formatted += "=" * 40 + "\n"

        return formatted

    def _format_rag_results(self, data: Any) -> str:
        """Format RAG/memory search results."""
        if not data:
            return "No relevant memories found.\n"

        formatted = "Relevant memories:\n"
        for i, item in enumerate(data[:3], 1):  # Top 3
            text = item.get("text", item.get("content", str(item)))
            formatted += f"  {i}. {text[:100]}...\n"
        return formatted

    def _format_web_results(self, data: Any) -> str:
        """Format web search results."""
        if isinstance(data, dict) and data.get("error"):
            return f"Web search failed: {data['error']}\n"

        results = data.get("results", []) if isinstance(data, dict) else data
        if not results:
            return "No web results found.\n"

        formatted = "Web search results:\n"
        for i, item in enumerate(results[:3], 1):  # Top 3
            title = item.get("title", "No title")
            snippet = item.get("snippet", item.get("description", ""))
            formatted += f"  {i}. {title}\n     {snippet[:100]}...\n"
        return formatted

    def _format_weather_results(self, data: Any) -> str:
        """Format weather results."""
        if isinstance(data, dict) and data.get("error"):
            return f"Weather lookup failed: {data['error']}\n"

        # Assuming weather API returns temp, conditions, etc.
        temp = data.get("temperature", "unknown")
        conditions = data.get("conditions", "unknown")
        location = data.get("location", "requested location")

        return f"Weather for {location}: {temp}, {conditions}\n"

    def _format_codebrain_results(self, data: Any) -> str:
        """Format codebrain results."""
        if isinstance(data, dict) and data.get("error"):
            return f"Codebrain failed: {data['error']}\n"

        # Format code-related results
        return f"{data}\n"