224 lines
11 KiB
Python
224 lines
11 KiB
Python
"""
|
|
Structured logging utilities for Cortex pipeline debugging.
|
|
|
|
Provides hierarchical, scannable logs with clear section markers and raw data visibility.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
|
|
|
|
class LogLevel(Enum):
|
|
"""Log detail levels"""
|
|
MINIMAL = 1 # Only errors and final results
|
|
SUMMARY = 2 # Stage summaries + errors
|
|
DETAILED = 3 # Include raw LLM outputs, RAG results
|
|
VERBOSE = 4 # Everything including intermediate states
|
|
|
|
|
|
class PipelineLogger:
|
|
"""
|
|
Hierarchical logger for cortex pipeline debugging.
|
|
|
|
Features:
|
|
- Clear visual section markers
|
|
- Collapsible detail sections
|
|
- Raw data dumps with truncation options
|
|
- Stage timing
|
|
- Error highlighting
|
|
"""
|
|
|
|
def __init__(self, logger: logging.Logger, level: LogLevel = LogLevel.SUMMARY):
|
|
self.logger = logger
|
|
self.level = level
|
|
self.stage_timings = {}
|
|
self.current_stage = None
|
|
self.stage_start_time = None
|
|
self.pipeline_start_time = None
|
|
|
|
def pipeline_start(self, session_id: str, user_prompt: str):
|
|
"""Mark the start of a pipeline run"""
|
|
self.pipeline_start_time = datetime.now()
|
|
self.stage_timings = {}
|
|
|
|
if self.level.value >= LogLevel.SUMMARY.value:
|
|
self.logger.info(f"\n{'='*100}")
|
|
self.logger.info(f"🚀 PIPELINE START | Session: {session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
|
|
self.logger.info(f"{'='*100}")
|
|
if self.level.value >= LogLevel.DETAILED.value:
|
|
self.logger.info(f"📝 User prompt: {user_prompt[:200]}{'...' if len(user_prompt) > 200 else ''}")
|
|
self.logger.info(f"{'-'*100}\n")
|
|
|
|
def stage_start(self, stage_name: str, description: str = ""):
|
|
"""Mark the start of a pipeline stage"""
|
|
self.current_stage = stage_name
|
|
self.stage_start_time = datetime.now()
|
|
|
|
if self.level.value >= LogLevel.SUMMARY.value:
|
|
timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
|
|
desc_suffix = f" - {description}" if description else ""
|
|
self.logger.info(f"▶️ [{stage_name}]{desc_suffix} | {timestamp}")
|
|
|
|
def stage_end(self, result_summary: str = ""):
|
|
"""Mark the end of a pipeline stage"""
|
|
if self.current_stage and self.stage_start_time:
|
|
duration_ms = (datetime.now() - self.stage_start_time).total_seconds() * 1000
|
|
self.stage_timings[self.current_stage] = duration_ms
|
|
|
|
if self.level.value >= LogLevel.SUMMARY.value:
|
|
summary_suffix = f" → {result_summary}" if result_summary else ""
|
|
self.logger.info(f"✅ [{self.current_stage}] Complete in {duration_ms:.0f}ms{summary_suffix}\n")
|
|
|
|
self.current_stage = None
|
|
self.stage_start_time = None
|
|
|
|
def log_llm_call(self, backend: str, prompt: str, response: Any, raw_response: str = None):
|
|
"""
|
|
Log LLM call details with proper formatting.
|
|
|
|
Args:
|
|
backend: Backend name (PRIMARY, SECONDARY, etc.)
|
|
prompt: Input prompt to LLM
|
|
response: Parsed response object
|
|
raw_response: Raw JSON response string
|
|
"""
|
|
if self.level.value >= LogLevel.DETAILED.value:
|
|
self.logger.info(f" 🧠 LLM Call | Backend: {backend}")
|
|
|
|
# Show prompt (truncated)
|
|
if isinstance(prompt, list):
|
|
prompt_preview = prompt[-1].get('content', '')[:150] if prompt else ''
|
|
else:
|
|
prompt_preview = str(prompt)[:150]
|
|
self.logger.info(f" Prompt: {prompt_preview}...")
|
|
|
|
# Show parsed response
|
|
if isinstance(response, dict):
|
|
response_text = (
|
|
response.get('reply') or
|
|
response.get('message', {}).get('content') or
|
|
str(response)
|
|
)[:200]
|
|
else:
|
|
response_text = str(response)[:200]
|
|
|
|
self.logger.info(f" Response: {response_text}...")
|
|
|
|
# Show raw response in collapsible block
|
|
if raw_response and self.level.value >= LogLevel.VERBOSE.value:
|
|
self.logger.debug(f" ╭─ RAW RESPONSE ────────────────────────────────────")
|
|
for line in raw_response.split('\n')[:50]: # Limit to 50 lines
|
|
self.logger.debug(f" │ {line}")
|
|
if raw_response.count('\n') > 50:
|
|
self.logger.debug(f" │ ... ({raw_response.count(chr(10)) - 50} more lines)")
|
|
self.logger.debug(f" ╰───────────────────────────────────────────────────\n")
|
|
|
|
def log_rag_results(self, results: List[Dict[str, Any]]):
|
|
"""Log RAG/NeoMem results in scannable format"""
|
|
if self.level.value >= LogLevel.SUMMARY.value:
|
|
self.logger.info(f" 📚 RAG Results: {len(results)} memories retrieved")
|
|
|
|
if self.level.value >= LogLevel.DETAILED.value and results:
|
|
self.logger.info(f" ╭─ MEMORY SCORES ───────────────────────────────────")
|
|
for idx, result in enumerate(results[:10], 1): # Show top 10
|
|
score = result.get("score", 0)
|
|
data_preview = str(result.get("payload", {}).get("data", ""))[:80]
|
|
self.logger.info(f" │ [{idx}] {score:.3f} | {data_preview}...")
|
|
if len(results) > 10:
|
|
self.logger.info(f" │ ... and {len(results) - 10} more results")
|
|
self.logger.info(f" ╰───────────────────────────────────────────────────")
|
|
|
|
def log_context_state(self, context_state: Dict[str, Any]):
|
|
"""Log context state summary"""
|
|
if self.level.value >= LogLevel.SUMMARY.value:
|
|
msg_count = context_state.get("message_count", 0)
|
|
minutes_since = context_state.get("minutes_since_last_msg", 0)
|
|
rag_count = len(context_state.get("rag", []))
|
|
|
|
self.logger.info(f" 📊 Context | Messages: {msg_count} | Last: {minutes_since:.1f}min ago | RAG: {rag_count} results")
|
|
|
|
if self.level.value >= LogLevel.DETAILED.value:
|
|
intake = context_state.get("intake", {})
|
|
if intake:
|
|
self.logger.info(f" ╭─ INTAKE SUMMARIES ────────────────────────────────")
|
|
for level in ["L1", "L5", "L10", "L20", "L30"]:
|
|
if level in intake:
|
|
summary = intake[level]
|
|
if isinstance(summary, dict):
|
|
summary = summary.get("summary", str(summary)[:100])
|
|
else:
|
|
summary = str(summary)[:100]
|
|
self.logger.info(f" │ {level}: {summary}...")
|
|
self.logger.info(f" ╰───────────────────────────────────────────────────")
|
|
|
|
def log_error(self, stage: str, error: Exception, critical: bool = False):
|
|
"""Log an error with context"""
|
|
level_marker = "🔴 CRITICAL" if critical else "⚠️ WARNING"
|
|
self.logger.error(f"{level_marker} | Stage: {stage} | Error: {type(error).__name__}: {str(error)}")
|
|
|
|
if self.level.value >= LogLevel.VERBOSE.value:
|
|
import traceback
|
|
self.logger.debug(f" Traceback:\n{traceback.format_exc()}")
|
|
|
|
def log_raw_data(self, label: str, data: Any, max_lines: int = 30):
|
|
"""Log raw data in a collapsible format"""
|
|
if self.level.value >= LogLevel.VERBOSE.value:
|
|
self.logger.debug(f" ╭─ {label.upper()} ──────────────────────────────────")
|
|
|
|
if isinstance(data, (dict, list)):
|
|
json_str = json.dumps(data, indent=2, default=str)
|
|
lines = json_str.split('\n')
|
|
for line in lines[:max_lines]:
|
|
self.logger.debug(f" │ {line}")
|
|
if len(lines) > max_lines:
|
|
self.logger.debug(f" │ ... ({len(lines) - max_lines} more lines)")
|
|
else:
|
|
lines = str(data).split('\n')
|
|
for line in lines[:max_lines]:
|
|
self.logger.debug(f" │ {line}")
|
|
if len(lines) > max_lines:
|
|
self.logger.debug(f" │ ... ({len(lines) - max_lines} more lines)")
|
|
|
|
self.logger.debug(f" ╰───────────────────────────────────────────────────")
|
|
|
|
def pipeline_end(self, session_id: str, final_output_length: int):
|
|
"""Mark the end of pipeline run with summary"""
|
|
if self.pipeline_start_time:
|
|
total_duration_ms = (datetime.now() - self.pipeline_start_time).total_seconds() * 1000
|
|
|
|
if self.level.value >= LogLevel.SUMMARY.value:
|
|
self.logger.info(f"\n{'='*100}")
|
|
self.logger.info(f"✨ PIPELINE COMPLETE | Session: {session_id} | Total: {total_duration_ms:.0f}ms")
|
|
self.logger.info(f"{'='*100}")
|
|
|
|
# Show timing breakdown
|
|
if self.stage_timings and self.level.value >= LogLevel.DETAILED.value:
|
|
self.logger.info("⏱️ Stage Timings:")
|
|
for stage, duration in self.stage_timings.items():
|
|
pct = (duration / total_duration_ms) * 100 if total_duration_ms > 0 else 0
|
|
self.logger.info(f" {stage:20s}: {duration:6.0f}ms ({pct:5.1f}%)")
|
|
|
|
self.logger.info(f"📤 Final output: {final_output_length} characters")
|
|
self.logger.info(f"{'='*100}\n")
|
|
|
|
|
|
def get_log_level_from_env() -> LogLevel:
|
|
"""Parse log level from environment variable"""
|
|
import os
|
|
verbose_debug = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
|
|
detail_level = os.getenv("LOG_DETAIL_LEVEL", "").lower()
|
|
|
|
if detail_level == "minimal":
|
|
return LogLevel.MINIMAL
|
|
elif detail_level == "summary":
|
|
return LogLevel.SUMMARY
|
|
elif detail_level == "detailed":
|
|
return LogLevel.DETAILED
|
|
elif detail_level == "verbose" or verbose_debug:
|
|
return LogLevel.VERBOSE
|
|
else:
|
|
return LogLevel.SUMMARY # Default
|