Files
project-lyra/cortex/router.py
T
serversdown 5f53fb32a4 feat: Refactor LLM router and integrate health check endpoint
- Simplified LLM call logic in llm_router.py, removing tool adapter complexity and enhancing error handling.
- Added health check endpoint to main.py for system status verification.
- Cleaned up router.py by removing unused imports and commented-out code, streamlining the structure.
- Updated docker-compose.yml to unify services under a single Lyra container, enhancing deployment simplicity.
- Created Dockerfile for unified container setup, including both Relay and Cortex services.
- Added QUICKSTART.md for improved onboarding and usage instructions.
- Implemented start.sh script to manage service startup and health checks.
2026-05-29 18:20:56 -04:00

169 lines
5.3 KiB
Python

# router.py
import os
import logging
import asyncio
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from intake.intake import add_exchange_internal
# Setup
# -------------------------------------------------------------------
LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
logger = logging.getLogger(__name__)
# Always set up basic logging
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'%(asctime)s [ROUTER] %(levelname)s: %(message)s',
datefmt='%H:%M:%S'
))
logger.addHandler(console_handler)
cortex_router = APIRouter()
# -------------------------------------------------------------------
# Models
# -------------------------------------------------------------------
class ReasonRequest(BaseModel):
session_id: str
user_prompt: str
temperature: float | None = None
backend: str | None = None
# -------------------------------------------------------------------
# /simple endpoint - Standard chatbot mode (no reasoning pipeline)
# -------------------------------------------------------------------
@cortex_router.post("/simple")
async def run_simple(req: ReasonRequest):
"""
Standard chatbot mode - bypasses all cortex reasoning pipeline.
Just a simple conversation loop like a typical chatbot.
"""
from datetime import datetime
from llm.llm_router import call_llm
start_time = datetime.now()
logger.info(f"\n{'='*100}")
logger.info(f"💬 SIMPLE MODE | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
logger.info(f"{'='*100}")
logger.info(f"📝 User: {req.user_prompt[:150]}...")
logger.info(f"{'-'*100}\n")
# Get recent messages from Intake buffer
from intake.intake import get_recent_messages
recent_msgs = get_recent_messages(req.session_id, limit=20)
logger.info(f"📋 Retrieved {len(recent_msgs)} recent messages from Intake buffer")
# Build simple conversation history with system message
system_message = {
"role": "system",
"content": (
"You are a helpful AI assistant. Provide direct, concise responses to the user's questions. "
"Maintain context from previous messages in the conversation."
)
}
messages = [system_message]
# Add conversation history
if recent_msgs:
for msg in recent_msgs:
messages.append({
"role": msg.get("role", "user"),
"content": msg.get("content", "")
})
logger.info(f" - {msg.get('role')}: {msg.get('content', '')[:50]}...")
# Add current user message
messages.append({
"role": "user",
"content": req.user_prompt
})
logger.info(f"📨 Total messages being sent to LLM: {len(messages)} (including system message)")
# Get backend from request, otherwise fall back to env variable
backend = req.backend if req.backend else os.getenv("STANDARD_MODE_LLM", "SECONDARY")
backend = backend.upper() # Normalize to uppercase
logger.info(f"🔧 Using backend: {backend}")
temperature = req.temperature if req.temperature is not None else 0.7
# Call LLM with or without tools
try:
# Direct LLM call without tools (original behavior)
raw_response = await call_llm(
messages=messages,
backend=backend,
temperature=temperature,
max_tokens=2048
)
response = raw_response.strip()
except Exception as e:
logger.error(f"❌ LLM call failed: {e}")
response = f"Error: {str(e)}"
# Update session with the exchange
try:
add_exchange_internal({
"session_id": req.session_id,
"role": "user",
"content": req.user_prompt
})
add_exchange_internal({
"session_id": req.session_id,
"role": "assistant",
"content": response
})
except Exception as e:
logger.warning(f"⚠️ Session update failed: {e}")
duration = (datetime.now() - start_time).total_seconds() * 1000
logger.info(f"\n{'='*100}")
logger.info(f"✨ SIMPLE MODE COMPLETE | Session: {req.session_id} | Total: {duration:.0f}ms")
logger.info(f"📤 Output: {len(response)} chars")
logger.info(f"{'='*100}\n")
return {
"draft": response,
"neutral": response,
"persona": response,
"reflection": "",
"session_id": req.session_id,
"context_summary": {
"message_count": len(messages),
"mode": "standard"
}
}
# -------------------------------------------------------------------
# /ingest endpoint (internal)
# -------------------------------------------------------------------
class IngestPayload(BaseModel):
session_id: str
user_msg: str
assistant_msg: str
@cortex_router.post("/ingest")
async def ingest(payload: IngestPayload):
try:
add_exchange_internal({
"session_id": payload.session_id,
"user_msg": payload.user_msg,
"assistant_msg": payload.assistant_msg,
})
except Exception as e:
logger.warning(f"[INGEST] Intake update failed: {e}")
return {"status": "ok", "session_id": payload.session_id}