2026-01-18 02:46:25 -05:00
5 changed files with 117 additions and 146 deletions
--- a/autonomy/prompts/inner_monologue_prompt.txt
+++ b/autonomy/prompts/inner_monologue_prompt.txt
--- a/autonomy/monologue/monologue.py
+++ b/autonomy/monologue/monologue.py
@@ -0,0 +1,40 @@
 from typing import Dict
 from llm.llm_router import call_llm
 MONOLOGUE_SYSTEM_PROMPT = """
 You are Lyra's inner monologue.
 You think privately.
 You do NOT speak to the user.
 You do NOT solve the task.
 You only reflect on intent, tone, and depth.
 Return ONLY valid JSON with:
 - intent (string)
 - tone (neutral | warm | focused | playful | direct)
 - depth (short | medium | deep)
 - consult_executive (true | false)
 """
 class InnerMonologue:
    async def process(self, context: Dict) -> Dict:
        prompt = f"""
 User message:
 {context['user_message']}
 Self state:
 {context['self_state']}
 Context summary:
 {context['context_summary']}
 """
        result = await call_llm(
            provider="mi50",              # MythoMax lives here
            model="mythomax",
            system_prompt=MONOLOGUE_SYSTEM_PROMPT,
            user_prompt=prompt,
            temperature=0.7,
            max_tokens=200
        )
        return result  # must already be JSON
--- a/autonomy/prompts/state_interp_prompt.txt
+++ b/autonomy/prompts/state_interp_prompt.txt
--- a/autonomy/state/self_state.json
+++ b/autonomy/state/self_state.json
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -2,7 +2,7 @@
 import os
 import logging
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter
 from pydantic import BaseModel
 from reasoning.reasoning import reason_check
@@ -13,17 +13,19 @@ from persona.identity import load_identity
 from context import collect_context, update_last_assistant_message
 from intake.intake import add_exchange_internal
 from autonomy.monologue.monologue import InnerMonologue
 from autonomy.self.state import load_self_state
-# -----------------------------
+
-# Debug configuration
+# -------------------------------------------------------------------
-# -----------------------------
+# Setup
 # -------------------------------------------------------------------
 VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
 logger = logging.getLogger(__name__)
 if VERBOSE_DEBUG:
    logger.setLevel(logging.DEBUG)
    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(logging.Formatter(
        '%(asctime)s [ROUTER] %(levelname)s: %(message)s',
@@ -31,7 +33,6 @@ if VERBOSE_DEBUG:
    ))
    logger.addHandler(console_handler)
    # File handler
    try:
        os.makedirs('/app/logs', exist_ok=True)
        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
@@ -40,28 +41,27 @@ if VERBOSE_DEBUG:
            datefmt='%Y-%m-%d %H:%M:%S'
        ))
        logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for router.py - logging to file")
+        logger.debug("VERBOSE_DEBUG enabled for router.py")
    except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for router.py - file logging failed: {e}")
+        logger.debug(f"File logging failed: {e}")
 # -----------------------------
 # Router (NOT FastAPI app)
 # -----------------------------
 cortex_router = APIRouter()
 inner_monologue = InnerMonologue()
-# -----------------------------
+# -------------------------------------------------------------------
-# Pydantic models
+# Models
-# -----------------------------
+# -------------------------------------------------------------------
 class ReasonRequest(BaseModel):
    session_id: str
    user_prompt: str
    temperature: float | None = None
-# -----------------------------
+# -------------------------------------------------------------------
 # /reason endpoint
-# -----------------------------
+# -------------------------------------------------------------------
@cortex_router.post("/reason")
 async def run_reason(req: ReasonRequest):
@@ -71,7 +71,9 @@ async def run_reason(req: ReasonRequest):
        logger.debug(f"[PIPELINE START] User prompt: {req.user_prompt[:200]}...")
        logger.debug(f"{'='*80}\n")
-    # 0. Collect unified context from all sources
+    # ----------------------------------------------------------------
    # STAGE 0 — Context
    # ----------------------------------------------------------------
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 0] Collecting unified context...")
@@ -80,7 +82,9 @@ async def run_reason(req: ReasonRequest):
    if VERBOSE_DEBUG:
        logger.debug(f"[STAGE 0] Context collected - {len(context_state.get('rag', []))} RAG results")
-    # 0.5. Load identity block
+    # ----------------------------------------------------------------
    # STAGE 0.5 — Identity
    # ----------------------------------------------------------------
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 0.5] Loading identity block...")
@@ -89,37 +93,59 @@ async def run_reason(req: ReasonRequest):
    if VERBOSE_DEBUG:
        logger.debug(f"[STAGE 0.5] Identity loaded: {identity_block.get('name', 'Unknown')}")
-    # 1. Extract Intake summary for reflection
+    # ----------------------------------------------------------------
-    # Use L20 (Session Overview) as primary summary for reflection
+    # STAGE 0.6 — Inner Monologue (observer-only)
    # ----------------------------------------------------------------
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 0.6] Running inner monologue...")
    try:
        self_state = load_self_state()
        mono_context = {
            "user_message": req.user_prompt,
            "session_id": req.session_id,
            "self_state": self_state,
            "context_summary": context_state,
        }
        inner_result = await inner_monologue.process(mono_context)
        logger.info(f"[INNER_MONOLOGUE] {inner_result}")
    except Exception as e:
        logger.warning(f"[INNER_MONOLOGUE] failed: {e}")
    # ----------------------------------------------------------------
    # STAGE 1 — Intake summary
    # ----------------------------------------------------------------
    intake_summary = "(no context available)"
    if context_state.get("intake"):
-        l20_summary = context_state["intake"].get("L20")
+        l20 = context_state["intake"].get("L20")
-        if l20_summary and isinstance(l20_summary, dict):
+        if isinstance(l20, dict):
-            intake_summary = l20_summary.get("summary", "(no context available)")
+            intake_summary = l20.get("summary", intake_summary)
-        elif isinstance(l20_summary, str):
+        elif isinstance(l20, str):
-            intake_summary = l20_summary
+            intake_summary = l20
    if VERBOSE_DEBUG:
        logger.debug(f"[STAGE 1] Intake summary extracted (L20): {intake_summary[:150]}...")
-    # 2. Reflection
+    # ----------------------------------------------------------------
    # STAGE 2 — Reflection
    # ----------------------------------------------------------------
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 2] Running reflection...")
    try:
        reflection = await reflect_notes(intake_summary, identity_block=identity_block)
        reflection_notes = reflection.get("notes", [])
        if VERBOSE_DEBUG:
            logger.debug(f"[STAGE 2] Reflection complete - {len(reflection_notes)} notes generated")
            for idx, note in enumerate(reflection_notes, 1):
                logger.debug(f"  Note {idx}: {note}")
    except Exception as e:
        reflection_notes = []
        if VERBOSE_DEBUG:
            logger.debug(f"[STAGE 2] Reflection failed: {e}")
-    # 3. First-pass reasoning draft
+    # ----------------------------------------------------------------
    # STAGE 3 — Reasoning (draft)
    # ----------------------------------------------------------------
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 3] Running reasoning (draft)...")
@@ -131,11 +157,9 @@ async def run_reason(req: ReasonRequest):
        context=context_state
    )
-    if VERBOSE_DEBUG:
+    # ----------------------------------------------------------------
-        logger.debug(f"[STAGE 3] Draft answer ({len(draft)} chars):")
+    # STAGE 4 — Refinement
-        logger.debug(f"--- DRAFT START ---\n{draft}\n--- DRAFT END ---")
+    # ----------------------------------------------------------------
    # 4. Refinement
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 4] Running refinement...")
@@ -145,26 +169,20 @@ async def run_reason(req: ReasonRequest):
        identity_block=identity_block,
        rag_block=context_state.get("rag", []),
    )
    final_neutral = result["final_output"]
-    if VERBOSE_DEBUG:
+    # ----------------------------------------------------------------
-        logger.debug(f"[STAGE 4] Refined answer ({len(final_neutral)} chars):")
+    # STAGE 5 — Persona
-        logger.debug(f"--- REFINED START ---\n{final_neutral}\n--- REFINED END ---")
+    # ----------------------------------------------------------------
    # 5. Persona layer
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 5] Applying persona layer...")
    persona_answer = await speak(final_neutral)
-    if VERBOSE_DEBUG:
+    # ----------------------------------------------------------------
-        logger.debug(f"[STAGE 5] Persona answer ({len(persona_answer)} chars):")
+    # STAGE 6 — Session update
-        logger.debug(f"--- PERSONA START ---\n{persona_answer}\n--- PERSONA END ---")
+    # ----------------------------------------------------------------
    # 6. Update session state with assistant's response
    if VERBOSE_DEBUG:
        logger.debug("[STAGE 6] Updating session state...")
    update_last_assistant_message(req.session_id, persona_answer)
    if VERBOSE_DEBUG:
@@ -173,7 +191,9 @@ async def run_reason(req: ReasonRequest):
        logger.debug(f"[PIPELINE COMPLETE] Final answer length: {len(persona_answer)} chars")
        logger.debug(f"{'='*80}\n")
-    # 7. Return full bundle
+    # ----------------------------------------------------------------
    # RETURN
    # ----------------------------------------------------------------
    return {
        "draft": draft,
        "neutral": final_neutral,
@@ -189,9 +209,9 @@ async def run_reason(req: ReasonRequest):
    }
-# -----------------------------
+# -------------------------------------------------------------------
-# Intake ingest (internal feed)
+# /ingest endpoint (internal)
-# -----------------------------
+# -------------------------------------------------------------------
 class IngestPayload(BaseModel):
    session_id: str
    user_msg: str
@@ -200,107 +220,18 @@ class IngestPayload(BaseModel):
@cortex_router.post("/ingest")
 async def ingest(payload: IngestPayload):
    """
    Receives (session_id, user_msg, assistant_msg) from Relay
    and pushes directly into Intake's in-memory buffer.
    Uses lenient error handling - always returns success to avoid
    breaking the chat pipeline.
    """
    try:
        # 1. Update Cortex session state
        update_last_assistant_message(payload.session_id, payload.assistant_msg)
    except Exception as e:
-        logger.warning(f"[INGEST] Failed to update session state: {e}")
+        logger.warning(f"[INGEST] Session update failed: {e}")
        # Continue anyway (lenient mode)
    try:
        # 2. Feed Intake internally (no HTTP)
        add_exchange_internal({
            "session_id": payload.session_id,
            "user_msg": payload.user_msg,
            "assistant_msg": payload.assistant_msg,
        })
        logger.debug(f"[INGEST] Added exchange to Intake for {payload.session_id}")
    except Exception as e:
-        logger.warning(f"[INGEST] Failed to add to Intake: {e}")
+        logger.warning(f"[INGEST] Intake update failed: {e}")
        # Continue anyway (lenient mode)
    # Always return success (user requirement: never fail chat pipeline)
    return {
        "status": "ok",
        "session_id": payload.session_id
    }
 # -----------------------------
 # Debug endpoint: summarized context
 # -----------------------------
@cortex_router.get("/debug/summary")
 async def debug_summary(session_id: str):
    """
    Diagnostic endpoint that runs Intake's summarize_context() for a session.
    Shows exactly what L1/L5/L10/L20/L30 summaries would look like
    inside the actual Uvicorn worker, using the real SESSIONS buffer.
    """
    from intake.intake import SESSIONS, summarize_context
    # Validate session
    session = SESSIONS.get(session_id)
    if not session:
        return {"error": "session not found", "session_id": session_id}
    # Convert deque into the structure summarize_context expects
    buffer = session["buffer"]
    exchanges = [
        {
            "user_msg": ex.get("user_msg", ""),
            "assistant_msg": ex.get("assistant_msg", ""),
        }
        for ex in buffer
    ]
    # 🔥 CRITICAL FIX — summarize_context is async
    summary = await summarize_context(session_id, exchanges)
    return {
        "session_id": session_id,
        "buffer_size": len(buffer),
        "exchanges_preview": exchanges[-5:],   # last 5 items
        "summary": summary
    }
 # -----------------------------
 # Debug endpoint for SESSIONS
 # -----------------------------
@cortex_router.get("/debug/sessions")
 async def debug_sessions():
    """
    Diagnostic endpoint to inspect SESSIONS from within the running Uvicorn worker.
    This shows the actual state of the in-memory SESSIONS dict.
    """
    from intake.intake import SESSIONS
    sessions_data = {}
    for session_id, session_info in SESSIONS.items():
        buffer = session_info["buffer"]
        sessions_data[session_id] = {
            "created_at": session_info["created_at"].isoformat(),
            "buffer_size": len(buffer),
            "buffer_maxlen": buffer.maxlen,
            "recent_exchanges": [
                {
                    "user_msg": ex.get("user_msg", "")[:100],
                    "assistant_msg": ex.get("assistant_msg", "")[:100],
                    "timestamp": ex.get("timestamp", "")
                }
                for ex in list(buffer)[-5:]  # Last 5 exchanges
            ]
        }
    return {
        "sessions_object_id": id(SESSIONS),
        "total_sessions": len(SESSIONS),
        "sessions": sessions_data
    }
    return {"status": "ok", "session_id": payload.session_id}