# router.py import os import logging import asyncio from fastapi import APIRouter from fastapi.responses import StreamingResponse from pydantic import BaseModel from intake.intake import add_exchange_internal # Setup # ------------------------------------------------------------------- LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower() logger = logging.getLogger(__name__) # Always set up basic logging logger.setLevel(logging.INFO) console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter( '%(asctime)s [ROUTER] %(levelname)s: %(message)s', datefmt='%H:%M:%S' )) logger.addHandler(console_handler) cortex_router = APIRouter() # ------------------------------------------------------------------- # Models # ------------------------------------------------------------------- class ReasonRequest(BaseModel): session_id: str user_prompt: str temperature: float | None = None backend: str | None = None # ------------------------------------------------------------------- # /simple endpoint - Standard chatbot mode (no reasoning pipeline) # ------------------------------------------------------------------- @cortex_router.post("/simple") async def run_simple(req: ReasonRequest): """ Standard chatbot mode - bypasses all cortex reasoning pipeline. Just a simple conversation loop like a typical chatbot. """ from datetime import datetime from llm.llm_router import call_llm start_time = datetime.now() logger.info(f"\n{'='*100}") logger.info(f"💬 SIMPLE MODE | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}") logger.info(f"{'='*100}") logger.info(f"📝 User: {req.user_prompt[:150]}...") logger.info(f"{'-'*100}\n") # Get recent messages from Intake buffer from intake.intake import get_recent_messages recent_msgs = get_recent_messages(req.session_id, limit=20) logger.info(f"📋 Retrieved {len(recent_msgs)} recent messages from Intake buffer") # Build simple conversation history with system message system_message = { "role": "system", "content": ( "You are a helpful AI assistant. Provide direct, concise responses to the user's questions. " "Maintain context from previous messages in the conversation." ) } messages = [system_message] # Add conversation history if recent_msgs: for msg in recent_msgs: messages.append({ "role": msg.get("role", "user"), "content": msg.get("content", "") }) logger.info(f" - {msg.get('role')}: {msg.get('content', '')[:50]}...") # Add current user message messages.append({ "role": "user", "content": req.user_prompt }) logger.info(f"📨 Total messages being sent to LLM: {len(messages)} (including system message)") # Get backend from request, otherwise fall back to env variable backend = req.backend if req.backend else os.getenv("STANDARD_MODE_LLM", "SECONDARY") backend = backend.upper() # Normalize to uppercase logger.info(f"🔧 Using backend: {backend}") temperature = req.temperature if req.temperature is not None else 0.7 # Call LLM with or without tools try: # Direct LLM call without tools (original behavior) raw_response = await call_llm( messages=messages, backend=backend, temperature=temperature, max_tokens=2048 ) response = raw_response.strip() except Exception as e: logger.error(f"❌ LLM call failed: {e}") response = f"Error: {str(e)}" # Update session with the exchange try: add_exchange_internal({ "session_id": req.session_id, "role": "user", "content": req.user_prompt }) add_exchange_internal({ "session_id": req.session_id, "role": "assistant", "content": response }) except Exception as e: logger.warning(f"⚠️ Session update failed: {e}") duration = (datetime.now() - start_time).total_seconds() * 1000 logger.info(f"\n{'='*100}") logger.info(f"✨ SIMPLE MODE COMPLETE | Session: {req.session_id} | Total: {duration:.0f}ms") logger.info(f"📤 Output: {len(response)} chars") logger.info(f"{'='*100}\n") return { "draft": response, "neutral": response, "persona": response, "reflection": "", "session_id": req.session_id, "context_summary": { "message_count": len(messages), "mode": "standard" } } # ------------------------------------------------------------------- # /ingest endpoint (internal) # ------------------------------------------------------------------- class IngestPayload(BaseModel): session_id: str user_msg: str assistant_msg: str @cortex_router.post("/ingest") async def ingest(payload: IngestPayload): try: add_exchange_internal({ "session_id": payload.session_id, "user_msg": payload.user_msg, "assistant_msg": payload.assistant_msg, }) except Exception as e: logger.warning(f"[INGEST] Intake update failed: {e}") return {"status": "ok", "session_id": payload.session_id}