project-lyra/cortex/reasoning/reasoning.py

# reasoning.py
import os
import json
import logging
from llm.llm_router import call_llm


# ============================================================
# Select which backend this module should use
# ============================================================
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
GLOBAL_TEMP = float(os.getenv("LLM_TEMPERATURE", "0.7"))
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"

# Logger
logger = logging.getLogger(__name__)

if VERBOSE_DEBUG:
    logger.setLevel(logging.DEBUG)

    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(logging.Formatter(
        '%(asctime)s [REASONING] %(levelname)s: %(message)s',
        datefmt='%H:%M:%S'
    ))
    logger.addHandler(console_handler)

    # File handler
    try:
        os.makedirs('/app/logs', exist_ok=True)
        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
        file_handler.setFormatter(logging.Formatter(
            '%(asctime)s [REASONING] %(levelname)s: %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        ))
        logger.addHandler(file_handler)
        logger.debug("VERBOSE_DEBUG mode enabled for reasoning.py - logging to file")
    except Exception as e:
        logger.debug(f"VERBOSE_DEBUG mode enabled for reasoning.py - file logging failed: {e}")


async def reason_check(
    user_prompt: str,
    identity_block: dict | None,
    rag_block: dict | None,
    reflection_notes: list[str],
    context: dict | None = None,
    monologue: dict | None = None,  # NEW: Inner monologue guidance
    executive_plan: dict | None = None  # NEW: Executive plan for complex tasks
) -> str:
    """
    Build the *draft answer* for Lyra Cortex.
    This is the first-pass reasoning stage (no refinement yet).

    Args:
        user_prompt: Current user message
        identity_block: Lyra's identity/persona configuration
        rag_block: Relevant long-term memories from NeoMem
        reflection_notes: Meta-awareness notes from reflection stage
        context: Unified context state from context.py (session state, intake, rag, etc.)
        monologue: Inner monologue analysis (intent, tone, depth, consult_executive)
        executive_plan: Executive plan for complex queries (steps, tools, strategy)
    """

    # --------------------------------------------------------
    # Build Reflection Notes block
    # --------------------------------------------------------
    notes_section = ""
    if reflection_notes:
        notes_section = "Reflection Notes (internal, never show to user):\n"
        for note in reflection_notes:
            notes_section += f"- {note}\n"
        notes_section += "\n"

    # --------------------------------------------------------
    # Identity block (constraints, boundaries, rules)
    # --------------------------------------------------------
    identity_txt = ""
    if identity_block:
        try:
            identity_txt = f"Identity Rules:\n{identity_block}\n\n"
        except Exception:
            identity_txt = f"Identity Rules:\n{str(identity_block)}\n\n"

    # --------------------------------------------------------
    # Inner Monologue guidance (NEW)
    # --------------------------------------------------------
    monologue_section = ""
    if monologue:
        intent = monologue.get("intent", "unknown")
        tone_desired = monologue.get("tone", "neutral")
        depth_desired = monologue.get("depth", "medium")

        monologue_section = f"""
=== INNER MONOLOGUE GUIDANCE ===
User Intent Detected: {intent}
Desired Tone: {tone_desired}
Desired Response Depth: {depth_desired}

Adjust your response accordingly:
- Focus on addressing the {intent} intent
- Aim for {depth_desired} depth (short/medium/deep)
- The persona layer will handle {tone_desired} tone, focus on content

"""

    # --------------------------------------------------------
    # Executive Plan (NEW)
    # --------------------------------------------------------
    plan_section = ""
    if executive_plan:
        plan_section = f"""
=== EXECUTIVE PLAN ===
Task Complexity: {executive_plan.get('estimated_complexity', 'unknown')}
Plan Summary: {executive_plan.get('summary', 'No summary')}

Detailed Plan:
{executive_plan.get('plan_text', 'No detailed plan available')}

Required Steps:
"""
        for idx, step in enumerate(executive_plan.get('steps', []), 1):
            plan_section += f"{idx}. {step}\n"

        tools_needed = executive_plan.get('tools_needed', [])
        if tools_needed:
            plan_section += f"\nTools to leverage: {', '.join(tools_needed)}\n"

        plan_section += "\nFollow this plan while generating your response.\n\n"

    # --------------------------------------------------------
    # RAG block (optional factual grounding)
    # --------------------------------------------------------
    rag_txt = ""
    if rag_block:
        try:
            # Format NeoMem results with full structure
            if isinstance(rag_block, list) and rag_block:
                rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
                for idx, mem in enumerate(rag_block, 1):
                    score = mem.get("score", 0.0)
                    payload = mem.get("payload", {})
                    data = payload.get("data", "")
                    metadata = payload.get("metadata", {})

                    rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
                    rag_txt += f"Content: {data}\n"
                    if metadata:
                        rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
                rag_txt += "\n"
            else:
                rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
        except Exception:
            rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"

    # --------------------------------------------------------
    # Context State (session continuity, timing, mode/mood)
    # --------------------------------------------------------
    context_txt = ""
    if context:
        try:
            # Build human-readable context summary
            context_txt = "=== CONTEXT STATE ===\n"
            context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
            context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
            context_txt += f"Message count: {context.get('message_count', 0)}\n"
            context_txt += f"Mode: {context.get('mode', 'default')}\n"
            context_txt += f"Mood: {context.get('mood', 'neutral')}\n"

            if context.get('active_project'):
                context_txt += f"Active project: {context['active_project']}\n"

            # Include Intake multilevel summaries
            intake = context.get('intake', {})
            if intake:
                context_txt += "\nShort-Term Memory (Intake):\n"

                # L1 - Recent exchanges
                if intake.get('L1'):
                    l1_data = intake['L1']
                    if isinstance(l1_data, list):
                        context_txt += f"  L1 (recent): {len(l1_data)} exchanges\n"
                    elif isinstance(l1_data, str):
                        context_txt += f"  L1: {l1_data[:200]}...\n"

                # L20 - Session overview (most important for continuity)
                if intake.get('L20'):
                    l20_data = intake['L20']
                    if isinstance(l20_data, dict):
                        summary = l20_data.get('summary', '')
                        context_txt += f"  L20 (session overview): {summary}\n"
                    elif isinstance(l20_data, str):
                        context_txt += f"  L20: {l20_data}\n"

                # L30 - Continuity report
                if intake.get('L30'):
                    l30_data = intake['L30']
                    if isinstance(l30_data, dict):
                        summary = l30_data.get('summary', '')
                        context_txt += f"  L30 (continuity): {summary}\n"
                    elif isinstance(l30_data, str):
                        context_txt += f"  L30: {l30_data}\n"

            context_txt += "\n"

        except Exception as e:
            # Fallback to JSON dump if formatting fails
            context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"

    # --------------------------------------------------------
    # Final assembled prompt
    # --------------------------------------------------------
    prompt = (
        f"{notes_section}"
        f"{identity_txt}"
        f"{monologue_section}"  # NEW: Intent/tone/depth guidance
        f"{plan_section}"  # NEW: Executive plan if generated
        f"{context_txt}"  # Context BEFORE RAG for better coherence
        f"{rag_txt}"
        f"User message:\n{user_prompt}\n\n"
        "Write the best possible *internal draft answer*.\n"
        "This draft is NOT shown to the user.\n"
        "Be factual, concise, and focused.\n"
        "Use the context state to maintain continuity and reference past interactions naturally.\n"
    )

    # --------------------------------------------------------
    # Call the LLM using the module-specific backend
    # --------------------------------------------------------
    if VERBOSE_DEBUG:
        logger.debug(f"\n{'='*80}")
        logger.debug("[REASONING] Full prompt being sent to LLM:")
        logger.debug(f"{'='*80}")
        logger.debug(prompt)
        logger.debug(f"{'='*80}")
        logger.debug(f"Backend: {CORTEX_LLM}, Temperature: {GLOBAL_TEMP}")
        logger.debug(f"{'='*80}\n")

    draft = await call_llm(
        prompt,
        backend=CORTEX_LLM,
        temperature=GLOBAL_TEMP,
    )

    if VERBOSE_DEBUG:
        logger.debug(f"\n{'='*80}")
        logger.debug("[REASONING] LLM Response received:")
        logger.debug(f"{'='*80}")
        logger.debug(draft)
        logger.debug(f"{'='*80}\n")

    return draft