diff --git a/autonomy/prompts/inner_monologue_prompt.txt b/autonomy/Assembly-spec.md
similarity index 100%
rename from autonomy/prompts/inner_monologue_prompt.txt
rename to autonomy/Assembly-spec.md
diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py
new file mode 100644
index 0000000..63534b8
--- /dev/null
+++ b/autonomy/monologue/monologue.py
@@ -0,0 +1,40 @@
+from typing import Dict
+from llm.llm_router import call_llm
+
+MONOLOGUE_SYSTEM_PROMPT = """
+You are Lyra's inner monologue.
+You think privately.
+You do NOT speak to the user.
+You do NOT solve the task.
+You only reflect on intent, tone, and depth.
+
+Return ONLY valid JSON with:
+- intent (string)
+- tone (neutral | warm | focused | playful | direct)
+- depth (short | medium | deep)
+- consult_executive (true | false)
+"""
+
+class InnerMonologue:
+    async def process(self, context: Dict) -> Dict:
+        prompt = f"""
+User message:
+{context['user_message']}
+
+Self state:
+{context['self_state']}
+
+Context summary:
+{context['context_summary']}
+"""
+
+        result = await call_llm(
+            provider="mi50",              # MythoMax lives here
+            model="mythomax",
+            system_prompt=MONOLOGUE_SYSTEM_PROMPT,
+            user_prompt=prompt,
+            temperature=0.7,
+            max_tokens=200
+        )
+
+        return result  # must already be JSON
diff --git a/autonomy/prompts/state_interp_prompt.txt b/autonomy/prompts/state_interp_prompt.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/autonomy/state/self_state.json b/autonomy/self/self_state.json
similarity index 100%
rename from autonomy/state/self_state.json
rename to autonomy/self/self_state.json
diff --git a/cortex/router.py b/cortex/router.py
index e6ba161..1e0484b 100644
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -2,7 +2,7 @@
 
 import os
 import logging
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter
 from pydantic import BaseModel
 
 from reasoning.reasoning import reason_check
@@ -13,17 +13,19 @@ from persona.identity import load_identity
 from context import collect_context, update_last_assistant_message
 from intake.intake import add_exchange_internal
 
+from autonomy.monologue.monologue import InnerMonologue
+from autonomy.self.state import load_self_state
 
-# -----------------------------
-# Debug configuration
-# -----------------------------
+
+# -------------------------------------------------------------------
+# Setup
+# -------------------------------------------------------------------
 VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
 logger = logging.getLogger(__name__)
 
 if VERBOSE_DEBUG:
     logger.setLevel(logging.DEBUG)
 
-    # Console handler
     console_handler = logging.StreamHandler()
     console_handler.setFormatter(logging.Formatter(
         '%(asctime)s [ROUTER] %(levelname)s: %(message)s',
@@ -31,7 +33,6 @@ if VERBOSE_DEBUG:
     ))
     logger.addHandler(console_handler)
 
-    # File handler
     try:
         os.makedirs('/app/logs', exist_ok=True)
         file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
@@ -40,28 +41,27 @@ if VERBOSE_DEBUG:
             datefmt='%Y-%m-%d %H:%M:%S'
         ))
         logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for router.py - logging to file")
+        logger.debug("VERBOSE_DEBUG enabled for router.py")
     except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for router.py - file logging failed: {e}")
+        logger.debug(f"File logging failed: {e}")
+
 
-# -----------------------------
-# Router (NOT FastAPI app)
-# -----------------------------
 cortex_router = APIRouter()
+inner_monologue = InnerMonologue()
 
 
-# -----------------------------
-# Pydantic models
-# -----------------------------
+# -------------------------------------------------------------------
+# Models
+# -------------------------------------------------------------------
 class ReasonRequest(BaseModel):
     session_id: str
     user_prompt: str
     temperature: float | None = None
 
 
-# -----------------------------
+# -------------------------------------------------------------------
 # /reason endpoint
-# -----------------------------
+# -------------------------------------------------------------------
 @cortex_router.post("/reason")
 async def run_reason(req: ReasonRequest):
 
@@ -71,7 +71,9 @@ async def run_reason(req: ReasonRequest):
         logger.debug(f"[PIPELINE START] User prompt: {req.user_prompt[:200]}...")
         logger.debug(f"{'='*80}\n")
 
-    # 0. Collect unified context from all sources
+    # ----------------------------------------------------------------
+    # STAGE 0 — Context
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 0] Collecting unified context...")
 
@@ -80,7 +82,9 @@ async def run_reason(req: ReasonRequest):
     if VERBOSE_DEBUG:
         logger.debug(f"[STAGE 0] Context collected - {len(context_state.get('rag', []))} RAG results")
 
-    # 0.5. Load identity block
+    # ----------------------------------------------------------------
+    # STAGE 0.5 — Identity
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 0.5] Loading identity block...")
 
@@ -89,37 +93,59 @@ async def run_reason(req: ReasonRequest):
     if VERBOSE_DEBUG:
         logger.debug(f"[STAGE 0.5] Identity loaded: {identity_block.get('name', 'Unknown')}")
 
-    # 1. Extract Intake summary for reflection
-    # Use L20 (Session Overview) as primary summary for reflection
+    # ----------------------------------------------------------------
+    # STAGE 0.6 — Inner Monologue (observer-only)
+    # ----------------------------------------------------------------
+    if VERBOSE_DEBUG:
+        logger.debug("[STAGE 0.6] Running inner monologue...")
+
+    try:
+        self_state = load_self_state()
+
+        mono_context = {
+            "user_message": req.user_prompt,
+            "session_id": req.session_id,
+            "self_state": self_state,
+            "context_summary": context_state,
+        }
+
+        inner_result = await inner_monologue.process(mono_context)
+        logger.info(f"[INNER_MONOLOGUE] {inner_result}")
+
+    except Exception as e:
+        logger.warning(f"[INNER_MONOLOGUE] failed: {e}")
+
+    # ----------------------------------------------------------------
+    # STAGE 1 — Intake summary
+    # ----------------------------------------------------------------
     intake_summary = "(no context available)"
     if context_state.get("intake"):
-        l20_summary = context_state["intake"].get("L20")
-        if l20_summary and isinstance(l20_summary, dict):
-            intake_summary = l20_summary.get("summary", "(no context available)")
-        elif isinstance(l20_summary, str):
-            intake_summary = l20_summary
+        l20 = context_state["intake"].get("L20")
+        if isinstance(l20, dict):
+            intake_summary = l20.get("summary", intake_summary)
+        elif isinstance(l20, str):
+            intake_summary = l20
 
     if VERBOSE_DEBUG:
         logger.debug(f"[STAGE 1] Intake summary extracted (L20): {intake_summary[:150]}...")
 
-    # 2. Reflection
+    # ----------------------------------------------------------------
+    # STAGE 2 — Reflection
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 2] Running reflection...")
 
     try:
         reflection = await reflect_notes(intake_summary, identity_block=identity_block)
         reflection_notes = reflection.get("notes", [])
-
-        if VERBOSE_DEBUG:
-            logger.debug(f"[STAGE 2] Reflection complete - {len(reflection_notes)} notes generated")
-            for idx, note in enumerate(reflection_notes, 1):
-                logger.debug(f"  Note {idx}: {note}")
     except Exception as e:
         reflection_notes = []
         if VERBOSE_DEBUG:
             logger.debug(f"[STAGE 2] Reflection failed: {e}")
 
-    # 3. First-pass reasoning draft
+    # ----------------------------------------------------------------
+    # STAGE 3 — Reasoning (draft)
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 3] Running reasoning (draft)...")
 
@@ -131,11 +157,9 @@ async def run_reason(req: ReasonRequest):
         context=context_state
     )
 
-    if VERBOSE_DEBUG:
-        logger.debug(f"[STAGE 3] Draft answer ({len(draft)} chars):")
-        logger.debug(f"--- DRAFT START ---\n{draft}\n--- DRAFT END ---")
-
-    # 4. Refinement
+    # ----------------------------------------------------------------
+    # STAGE 4 — Refinement
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 4] Running refinement...")
 
@@ -145,26 +169,20 @@ async def run_reason(req: ReasonRequest):
         identity_block=identity_block,
         rag_block=context_state.get("rag", []),
     )
+
     final_neutral = result["final_output"]
 
-    if VERBOSE_DEBUG:
-        logger.debug(f"[STAGE 4] Refined answer ({len(final_neutral)} chars):")
-        logger.debug(f"--- REFINED START ---\n{final_neutral}\n--- REFINED END ---")
-
-    # 5. Persona layer
+    # ----------------------------------------------------------------
+    # STAGE 5 — Persona
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 5] Applying persona layer...")
 
     persona_answer = await speak(final_neutral)
 
-    if VERBOSE_DEBUG:
-        logger.debug(f"[STAGE 5] Persona answer ({len(persona_answer)} chars):")
-        logger.debug(f"--- PERSONA START ---\n{persona_answer}\n--- PERSONA END ---")
-
-    # 6. Update session state with assistant's response
-    if VERBOSE_DEBUG:
-        logger.debug("[STAGE 6] Updating session state...")
-
+    # ----------------------------------------------------------------
+    # STAGE 6 — Session update
+    # ----------------------------------------------------------------
     update_last_assistant_message(req.session_id, persona_answer)
 
     if VERBOSE_DEBUG:
@@ -173,7 +191,9 @@ async def run_reason(req: ReasonRequest):
         logger.debug(f"[PIPELINE COMPLETE] Final answer length: {len(persona_answer)} chars")
         logger.debug(f"{'='*80}\n")
 
-    # 7. Return full bundle
+    # ----------------------------------------------------------------
+    # RETURN
+    # ----------------------------------------------------------------
     return {
         "draft": draft,
         "neutral": final_neutral,
@@ -189,9 +209,9 @@ async def run_reason(req: ReasonRequest):
     }
 
 
-# -----------------------------
-# Intake ingest (internal feed)
-# -----------------------------
+# -------------------------------------------------------------------
+# /ingest endpoint (internal)
+# -------------------------------------------------------------------
 class IngestPayload(BaseModel):
     session_id: str
     user_msg: str
@@ -200,107 +220,18 @@ class IngestPayload(BaseModel):
 
 @cortex_router.post("/ingest")
 async def ingest(payload: IngestPayload):
-    """
-    Receives (session_id, user_msg, assistant_msg) from Relay
-    and pushes directly into Intake's in-memory buffer.
-
-    Uses lenient error handling - always returns success to avoid
-    breaking the chat pipeline.
-    """
     try:
-        # 1. Update Cortex session state
         update_last_assistant_message(payload.session_id, payload.assistant_msg)
     except Exception as e:
-        logger.warning(f"[INGEST] Failed to update session state: {e}")
-        # Continue anyway (lenient mode)
+        logger.warning(f"[INGEST] Session update failed: {e}")
 
     try:
-        # 2. Feed Intake internally (no HTTP)
         add_exchange_internal({
             "session_id": payload.session_id,
             "user_msg": payload.user_msg,
             "assistant_msg": payload.assistant_msg,
         })
-        logger.debug(f"[INGEST] Added exchange to Intake for {payload.session_id}")
     except Exception as e:
-        logger.warning(f"[INGEST] Failed to add to Intake: {e}")
-        # Continue anyway (lenient mode)
-
-    # Always return success (user requirement: never fail chat pipeline)
-    return {
-        "status": "ok",
-        "session_id": payload.session_id
-    }
-
-# -----------------------------
-# Debug endpoint: summarized context
-# -----------------------------
-@cortex_router.get("/debug/summary")
-async def debug_summary(session_id: str):
-    """
-    Diagnostic endpoint that runs Intake's summarize_context() for a session.
-
-    Shows exactly what L1/L5/L10/L20/L30 summaries would look like
-    inside the actual Uvicorn worker, using the real SESSIONS buffer.
-    """
-    from intake.intake import SESSIONS, summarize_context
-
-    # Validate session
-    session = SESSIONS.get(session_id)
-    if not session:
-        return {"error": "session not found", "session_id": session_id}
-
-    # Convert deque into the structure summarize_context expects
-    buffer = session["buffer"]
-    exchanges = [
-        {
-            "user_msg": ex.get("user_msg", ""),
-            "assistant_msg": ex.get("assistant_msg", ""),
-        }
-        for ex in buffer
-    ]
-
-    # 🔥 CRITICAL FIX — summarize_context is async
-    summary = await summarize_context(session_id, exchanges)
-
-    return {
-        "session_id": session_id,
-        "buffer_size": len(buffer),
-        "exchanges_preview": exchanges[-5:],   # last 5 items
-        "summary": summary
-    }
-
-# -----------------------------
-# Debug endpoint for SESSIONS
-# -----------------------------
-@cortex_router.get("/debug/sessions")
-async def debug_sessions():
-    """
-    Diagnostic endpoint to inspect SESSIONS from within the running Uvicorn worker.
-    This shows the actual state of the in-memory SESSIONS dict.
-    """
-    from intake.intake import SESSIONS
-
-    sessions_data = {}
-    for session_id, session_info in SESSIONS.items():
-        buffer = session_info["buffer"]
-        sessions_data[session_id] = {
-            "created_at": session_info["created_at"].isoformat(),
-            "buffer_size": len(buffer),
-            "buffer_maxlen": buffer.maxlen,
-            "recent_exchanges": [
-                {
-                    "user_msg": ex.get("user_msg", "")[:100],
-                    "assistant_msg": ex.get("assistant_msg", "")[:100],
-                    "timestamp": ex.get("timestamp", "")
-                }
-                for ex in list(buffer)[-5:]  # Last 5 exchanges
-            ]
-        }
-
-    return {
-        "sessions_object_id": id(SESSIONS),
-        "total_sessions": len(SESSIONS),
-        "sessions": sessions_data
-    }
+        logger.warning(f"[INGEST] Intake update failed: {e}")
 
+    return {"status": "ok", "session_id": payload.session_id}