Cortex debugging logs cleaned up
This commit is contained in:
@@ -26,7 +26,12 @@ from neomem_client import NeoMemClient
|
||||
NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
|
||||
NEOMEM_ENABLED = os.getenv("NEOMEM_ENABLED", "false").lower() == "true"
|
||||
RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
|
||||
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
|
||||
LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
|
||||
|
||||
# Loop detection settings
|
||||
MAX_MESSAGE_HISTORY = int(os.getenv("MAX_MESSAGE_HISTORY", "100")) # Prevent unbounded growth
|
||||
SESSION_TTL_HOURS = int(os.getenv("SESSION_TTL_HOURS", "24")) # Auto-expire old sessions
|
||||
ENABLE_DUPLICATE_DETECTION = os.getenv("ENABLE_DUPLICATE_DETECTION", "true").lower() == "true"
|
||||
|
||||
# Tools available for future autonomy features
|
||||
TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
|
||||
@@ -39,34 +44,18 @@ SESSION_STATE: Dict[str, Dict[str, Any]] = {}
|
||||
# Logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Set logging level based on VERBOSE_DEBUG
|
||||
if VERBOSE_DEBUG:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s [CONTEXT] %(levelname)s: %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
))
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# File handler - append to log file
|
||||
try:
|
||||
os.makedirs('/app/logs', exist_ok=True)
|
||||
file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
|
||||
file_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s [CONTEXT] %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
))
|
||||
logger.addHandler(file_handler)
|
||||
logger.debug("VERBOSE_DEBUG mode enabled for context.py - logging to file")
|
||||
except Exception as e:
|
||||
logger.debug(f"VERBOSE_DEBUG mode enabled for context.py - file logging failed: {e}")
|
||||
# Always set up basic logging
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s [CONTEXT] %(levelname)s: %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
))
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Session initialization
|
||||
# Session initialization & cleanup
|
||||
# -----------------------------
|
||||
def _init_session(session_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -86,9 +75,76 @@ def _init_session(session_id: str) -> Dict[str, Any]:
|
||||
"active_project": None, # Future: project context
|
||||
"message_count": 0,
|
||||
"message_history": [],
|
||||
"last_message_hash": None, # For duplicate detection
|
||||
}
|
||||
|
||||
|
||||
def _cleanup_expired_sessions():
|
||||
"""Remove sessions that haven't been active for SESSION_TTL_HOURS"""
|
||||
from datetime import timedelta
|
||||
|
||||
now = datetime.now()
|
||||
expired_sessions = []
|
||||
|
||||
for session_id, state in SESSION_STATE.items():
|
||||
last_active = state.get("last_timestamp", state.get("created_at"))
|
||||
time_since_active = (now - last_active).total_seconds() / 3600 # hours
|
||||
|
||||
if time_since_active > SESSION_TTL_HOURS:
|
||||
expired_sessions.append(session_id)
|
||||
|
||||
for session_id in expired_sessions:
|
||||
del SESSION_STATE[session_id]
|
||||
logger.info(f"🗑️ Expired session: {session_id} (inactive for {SESSION_TTL_HOURS}+ hours)")
|
||||
|
||||
return len(expired_sessions)
|
||||
|
||||
|
||||
def _is_duplicate_message(session_id: str, user_prompt: str) -> bool:
|
||||
"""
|
||||
Check if this message is a duplicate of the last processed message.
|
||||
|
||||
Uses simple hash comparison to detect exact duplicates or processing loops.
|
||||
"""
|
||||
if not ENABLE_DUPLICATE_DETECTION:
|
||||
return False
|
||||
|
||||
import hashlib
|
||||
|
||||
state = SESSION_STATE.get(session_id)
|
||||
if not state:
|
||||
return False
|
||||
|
||||
# Create hash of normalized message
|
||||
message_hash = hashlib.md5(user_prompt.strip().lower().encode()).hexdigest()
|
||||
|
||||
# Check if it matches the last message
|
||||
if state.get("last_message_hash") == message_hash:
|
||||
logger.warning(
|
||||
f"⚠️ DUPLICATE MESSAGE DETECTED | Session: {session_id} | "
|
||||
f"Message: {user_prompt[:80]}..."
|
||||
)
|
||||
return True
|
||||
|
||||
# Update hash for next check
|
||||
state["last_message_hash"] = message_hash
|
||||
return False
|
||||
|
||||
|
||||
def _trim_message_history(state: Dict[str, Any]):
|
||||
"""
|
||||
Trim message history to prevent unbounded growth.
|
||||
|
||||
Keeps only the most recent MAX_MESSAGE_HISTORY messages.
|
||||
"""
|
||||
history = state.get("message_history", [])
|
||||
|
||||
if len(history) > MAX_MESSAGE_HISTORY:
|
||||
trimmed_count = len(history) - MAX_MESSAGE_HISTORY
|
||||
state["message_history"] = history[-MAX_MESSAGE_HISTORY:]
|
||||
logger.info(f"✂️ Trimmed {trimmed_count} old messages from session {state['session_id']}")
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Intake context retrieval
|
||||
# -----------------------------
|
||||
@@ -223,26 +279,42 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
|
||||
}
|
||||
"""
|
||||
|
||||
# A. Initialize session state if needed
|
||||
# A. Cleanup expired sessions periodically (every 100th call)
|
||||
import random
|
||||
if random.randint(1, 100) == 1:
|
||||
_cleanup_expired_sessions()
|
||||
|
||||
# B. Initialize session state if needed
|
||||
if session_id not in SESSION_STATE:
|
||||
SESSION_STATE[session_id] = _init_session(session_id)
|
||||
logger.info(f"Initialized new session: {session_id}")
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[COLLECT_CONTEXT] New session state: {SESSION_STATE[session_id]}")
|
||||
|
||||
state = SESSION_STATE[session_id]
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[COLLECT_CONTEXT] Session {session_id} - User prompt: {user_prompt[:100]}...")
|
||||
# C. Check for duplicate messages (loop detection)
|
||||
if _is_duplicate_message(session_id, user_prompt):
|
||||
# Return cached context with warning flag
|
||||
logger.warning(f"🔁 LOOP DETECTED - Returning cached context to prevent processing duplicate")
|
||||
context_state = {
|
||||
"session_id": session_id,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"minutes_since_last_msg": 0,
|
||||
"message_count": state["message_count"],
|
||||
"intake": {},
|
||||
"rag": [],
|
||||
"mode": state["mode"],
|
||||
"mood": state["mood"],
|
||||
"active_project": state["active_project"],
|
||||
"tools_available": TOOLS_AVAILABLE,
|
||||
"duplicate_detected": True,
|
||||
}
|
||||
return context_state
|
||||
|
||||
# B. Calculate time delta
|
||||
now = datetime.now()
|
||||
time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
|
||||
minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[COLLECT_CONTEXT] Time since last message: {minutes_since_last_msg:.2f} minutes")
|
||||
|
||||
# C. Gather Intake context (multilevel summaries)
|
||||
# Build compact message buffer for Intake:
|
||||
messages_for_intake = []
|
||||
@@ -257,12 +329,6 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
|
||||
|
||||
intake_data = await _get_intake_context(session_id, messages_for_intake)
|
||||
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
import json
|
||||
logger.debug(f"[COLLECT_CONTEXT] Intake data retrieved:")
|
||||
logger.debug(json.dumps(intake_data, indent=2, default=str))
|
||||
|
||||
# D. Search NeoMem for relevant memories
|
||||
if NEOMEM_ENABLED:
|
||||
rag_results = await _search_neomem(
|
||||
@@ -274,23 +340,20 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
|
||||
rag_results = []
|
||||
logger.info("Skipping NeoMem RAG retrieval; NEOMEM_ENABLED is false")
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[COLLECT_CONTEXT] NeoMem search returned {len(rag_results)} results")
|
||||
for idx, result in enumerate(rag_results, 1):
|
||||
score = result.get("score", 0)
|
||||
data_preview = str(result.get("payload", {}).get("data", ""))[:100]
|
||||
logger.debug(f" [{idx}] Score: {score:.3f} - {data_preview}...")
|
||||
|
||||
# E. Update session state
|
||||
state["last_user_message"] = user_prompt
|
||||
state["last_timestamp"] = now
|
||||
state["message_count"] += 1
|
||||
|
||||
# Save user turn to history
|
||||
state["message_history"].append({
|
||||
"user": user_prompt,
|
||||
"assistant": "" # assistant reply filled later by update_last_assistant_message()
|
||||
"user": user_prompt,
|
||||
"assistant": "" # assistant reply filled later by update_last_assistant_message()
|
||||
})
|
||||
|
||||
# Trim history to prevent unbounded growth
|
||||
_trim_message_history(state)
|
||||
|
||||
|
||||
|
||||
# F. Assemble unified context
|
||||
@@ -307,18 +370,54 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
|
||||
"tools_available": TOOLS_AVAILABLE,
|
||||
}
|
||||
|
||||
# Log context summary in structured format
|
||||
logger.info(
|
||||
f"Context collected for session {session_id}: "
|
||||
f"{len(rag_results)} RAG results, "
|
||||
f"{minutes_since_last_msg:.1f} minutes since last message"
|
||||
f"📊 Context | Session: {session_id} | "
|
||||
f"Messages: {state['message_count']} | "
|
||||
f"Last: {minutes_since_last_msg:.1f}min | "
|
||||
f"RAG: {len(rag_results)} results"
|
||||
)
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[COLLECT_CONTEXT] Final context state assembled:")
|
||||
logger.debug(f" - Message count: {state['message_count']}")
|
||||
logger.debug(f" - Mode: {state['mode']}, Mood: {state['mood']}")
|
||||
logger.debug(f" - Active project: {state['active_project']}")
|
||||
logger.debug(f" - Tools available: {TOOLS_AVAILABLE}")
|
||||
# Show detailed context in detailed/verbose mode
|
||||
if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
|
||||
import json
|
||||
logger.info(f"\n{'─'*100}")
|
||||
logger.info(f"[CONTEXT] Session {session_id} | User: {user_prompt[:80]}...")
|
||||
logger.info(f"{'─'*100}")
|
||||
logger.info(f" Mode: {state['mode']} | Mood: {state['mood']} | Project: {state['active_project']}")
|
||||
logger.info(f" Tools: {', '.join(TOOLS_AVAILABLE)}")
|
||||
|
||||
# Show intake summaries (condensed)
|
||||
if intake_data:
|
||||
logger.info(f"\n ╭─ INTAKE SUMMARIES ────────────────────────────────────────────────")
|
||||
for level in ["L1", "L5", "L10", "L20", "L30"]:
|
||||
if level in intake_data:
|
||||
summary = intake_data[level]
|
||||
if isinstance(summary, dict):
|
||||
summary_text = summary.get("summary", str(summary)[:100])
|
||||
else:
|
||||
summary_text = str(summary)[:100]
|
||||
logger.info(f" │ {level:4s}: {summary_text}...")
|
||||
logger.info(f" ╰───────────────────────────────────────────────────────────────────")
|
||||
|
||||
# Show RAG results (condensed)
|
||||
if rag_results:
|
||||
logger.info(f"\n ╭─ RAG RESULTS ({len(rag_results)}) ──────────────────────────────────────────────")
|
||||
for idx, result in enumerate(rag_results[:5], 1): # Show top 5
|
||||
score = result.get("score", 0)
|
||||
data_preview = str(result.get("payload", {}).get("data", ""))[:60]
|
||||
logger.info(f" │ [{idx}] {score:.3f} | {data_preview}...")
|
||||
if len(rag_results) > 5:
|
||||
logger.info(f" │ ... and {len(rag_results) - 5} more results")
|
||||
logger.info(f" ╰───────────────────────────────────────────────────────────────────")
|
||||
|
||||
# Show full raw data only in verbose mode
|
||||
if LOG_DETAIL_LEVEL == "verbose":
|
||||
logger.info(f"\n ╭─ RAW INTAKE DATA ─────────────────────────────────────────────────")
|
||||
logger.info(f" │ {json.dumps(intake_data, indent=4, default=str)}")
|
||||
logger.info(f" ╰───────────────────────────────────────────────────────────────────")
|
||||
|
||||
logger.info(f"{'─'*100}\n")
|
||||
|
||||
return context_state
|
||||
|
||||
@@ -346,9 +445,6 @@ def update_last_assistant_message(session_id: str, message: str) -> None:
|
||||
# history entry already contains {"user": "...", "assistant": "...?"}
|
||||
history[-1]["assistant"] = message
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"Updated assistant message for session {session_id}")
|
||||
|
||||
|
||||
|
||||
def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
|
||||
Reference in New Issue
Block a user