5f53fb32a4
- Simplified LLM call logic in llm_router.py, removing tool adapter complexity and enhancing error handling. - Added health check endpoint to main.py for system status verification. - Cleaned up router.py by removing unused imports and commented-out code, streamlining the structure. - Updated docker-compose.yml to unify services under a single Lyra container, enhancing deployment simplicity. - Created Dockerfile for unified container setup, including both Relay and Cortex services. - Added QUICKSTART.md for improved onboarding and usage instructions. - Implemented start.sh script to manage service startup and health checks.
169 lines
5.3 KiB
Python
169 lines
5.3 KiB
Python
# router.py
|
|
|
|
import os
|
|
import logging
|
|
import asyncio
|
|
from fastapi import APIRouter
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel
|
|
from intake.intake import add_exchange_internal
|
|
|
|
# Setup
|
|
# -------------------------------------------------------------------
|
|
LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Always set up basic logging
|
|
logger.setLevel(logging.INFO)
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setFormatter(logging.Formatter(
|
|
'%(asctime)s [ROUTER] %(levelname)s: %(message)s',
|
|
datefmt='%H:%M:%S'
|
|
))
|
|
logger.addHandler(console_handler)
|
|
|
|
cortex_router = APIRouter()
|
|
|
|
# -------------------------------------------------------------------
|
|
# Models
|
|
# -------------------------------------------------------------------
|
|
class ReasonRequest(BaseModel):
|
|
session_id: str
|
|
user_prompt: str
|
|
temperature: float | None = None
|
|
backend: str | None = None
|
|
|
|
# -------------------------------------------------------------------
|
|
# /simple endpoint - Standard chatbot mode (no reasoning pipeline)
|
|
# -------------------------------------------------------------------
|
|
@cortex_router.post("/simple")
|
|
async def run_simple(req: ReasonRequest):
|
|
"""
|
|
Standard chatbot mode - bypasses all cortex reasoning pipeline.
|
|
Just a simple conversation loop like a typical chatbot.
|
|
"""
|
|
from datetime import datetime
|
|
from llm.llm_router import call_llm
|
|
|
|
start_time = datetime.now()
|
|
|
|
logger.info(f"\n{'='*100}")
|
|
logger.info(f"💬 SIMPLE MODE | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
|
|
logger.info(f"{'='*100}")
|
|
logger.info(f"📝 User: {req.user_prompt[:150]}...")
|
|
logger.info(f"{'-'*100}\n")
|
|
|
|
# Get recent messages from Intake buffer
|
|
from intake.intake import get_recent_messages
|
|
recent_msgs = get_recent_messages(req.session_id, limit=20)
|
|
logger.info(f"📋 Retrieved {len(recent_msgs)} recent messages from Intake buffer")
|
|
|
|
# Build simple conversation history with system message
|
|
system_message = {
|
|
"role": "system",
|
|
"content": (
|
|
"You are a helpful AI assistant. Provide direct, concise responses to the user's questions. "
|
|
"Maintain context from previous messages in the conversation."
|
|
)
|
|
}
|
|
|
|
messages = [system_message]
|
|
|
|
# Add conversation history
|
|
|
|
if recent_msgs:
|
|
for msg in recent_msgs:
|
|
messages.append({
|
|
"role": msg.get("role", "user"),
|
|
"content": msg.get("content", "")
|
|
})
|
|
logger.info(f" - {msg.get('role')}: {msg.get('content', '')[:50]}...")
|
|
|
|
# Add current user message
|
|
messages.append({
|
|
"role": "user",
|
|
"content": req.user_prompt
|
|
})
|
|
|
|
logger.info(f"📨 Total messages being sent to LLM: {len(messages)} (including system message)")
|
|
|
|
# Get backend from request, otherwise fall back to env variable
|
|
backend = req.backend if req.backend else os.getenv("STANDARD_MODE_LLM", "SECONDARY")
|
|
backend = backend.upper() # Normalize to uppercase
|
|
logger.info(f"🔧 Using backend: {backend}")
|
|
|
|
temperature = req.temperature if req.temperature is not None else 0.7
|
|
|
|
|
|
|
|
# Call LLM with or without tools
|
|
try:
|
|
# Direct LLM call without tools (original behavior)
|
|
raw_response = await call_llm(
|
|
messages=messages,
|
|
backend=backend,
|
|
temperature=temperature,
|
|
max_tokens=2048
|
|
)
|
|
response = raw_response.strip()
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ LLM call failed: {e}")
|
|
response = f"Error: {str(e)}"
|
|
|
|
# Update session with the exchange
|
|
try:
|
|
add_exchange_internal({
|
|
"session_id": req.session_id,
|
|
"role": "user",
|
|
"content": req.user_prompt
|
|
})
|
|
add_exchange_internal({
|
|
"session_id": req.session_id,
|
|
"role": "assistant",
|
|
"content": response
|
|
})
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ Session update failed: {e}")
|
|
|
|
duration = (datetime.now() - start_time).total_seconds() * 1000
|
|
|
|
logger.info(f"\n{'='*100}")
|
|
logger.info(f"✨ SIMPLE MODE COMPLETE | Session: {req.session_id} | Total: {duration:.0f}ms")
|
|
logger.info(f"📤 Output: {len(response)} chars")
|
|
logger.info(f"{'='*100}\n")
|
|
|
|
return {
|
|
"draft": response,
|
|
"neutral": response,
|
|
"persona": response,
|
|
"reflection": "",
|
|
"session_id": req.session_id,
|
|
"context_summary": {
|
|
"message_count": len(messages),
|
|
"mode": "standard"
|
|
}
|
|
}
|
|
|
|
# -------------------------------------------------------------------
|
|
# /ingest endpoint (internal)
|
|
# -------------------------------------------------------------------
|
|
class IngestPayload(BaseModel):
|
|
session_id: str
|
|
user_msg: str
|
|
assistant_msg: str
|
|
|
|
|
|
@cortex_router.post("/ingest")
|
|
async def ingest(payload: IngestPayload):
|
|
try:
|
|
add_exchange_internal({
|
|
"session_id": payload.session_id,
|
|
"user_msg": payload.user_msg,
|
|
"assistant_msg": payload.assistant_msg,
|
|
})
|
|
except Exception as e:
|
|
logger.warning(f"[INGEST] Intake update failed: {e}")
|
|
|
|
return {"status": "ok", "session_id": payload.session_id}
|