From 7693bc4080ae1145b2d2984e551bbf0963f72cd2 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Sat, 13 Dec 2025 02:55:49 -0500
Subject: [PATCH 01/10] autonomy scaffold

---
 ..._monologue_prompt.txt => Assembly-spec.md} |   0
 autonomy/monologue/monologue.py               |  40 ++++
 autonomy/prompts/state_interp_prompt.txt      |   0
 autonomy/{state => self}/self_state.json      |   0
 cortex/router.py                              | 223 ++++++------------
 5 files changed, 117 insertions(+), 146 deletions(-)
 rename autonomy/{prompts/inner_monologue_prompt.txt => Assembly-spec.md} (100%)
 create mode 100644 autonomy/monologue/monologue.py
 delete mode 100644 autonomy/prompts/state_interp_prompt.txt
 rename autonomy/{state => self}/self_state.json (100%)

diff --git a/autonomy/prompts/inner_monologue_prompt.txt b/autonomy/Assembly-spec.md
similarity index 100%
rename from autonomy/prompts/inner_monologue_prompt.txt
rename to autonomy/Assembly-spec.md
diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py
new file mode 100644
index 0000000..63534b8
--- /dev/null
+++ b/autonomy/monologue/monologue.py
@@ -0,0 +1,40 @@
+from typing import Dict
+from llm.llm_router import call_llm
+
+MONOLOGUE_SYSTEM_PROMPT = """
+You are Lyra's inner monologue.
+You think privately.
+You do NOT speak to the user.
+You do NOT solve the task.
+You only reflect on intent, tone, and depth.
+
+Return ONLY valid JSON with:
+- intent (string)
+- tone (neutral | warm | focused | playful | direct)
+- depth (short | medium | deep)
+- consult_executive (true | false)
+"""
+
+class InnerMonologue:
+    async def process(self, context: Dict) -> Dict:
+        prompt = f"""
+User message:
+{context['user_message']}
+
+Self state:
+{context['self_state']}
+
+Context summary:
+{context['context_summary']}
+"""
+
+        result = await call_llm(
+            provider="mi50",              # MythoMax lives here
+            model="mythomax",
+            system_prompt=MONOLOGUE_SYSTEM_PROMPT,
+            user_prompt=prompt,
+            temperature=0.7,
+            max_tokens=200
+        )
+
+        return result  # must already be JSON
diff --git a/autonomy/prompts/state_interp_prompt.txt b/autonomy/prompts/state_interp_prompt.txt
deleted file mode 100644
index e69de29..0000000
diff --git a/autonomy/state/self_state.json b/autonomy/self/self_state.json
similarity index 100%
rename from autonomy/state/self_state.json
rename to autonomy/self/self_state.json
diff --git a/cortex/router.py b/cortex/router.py
index e6ba161..1e0484b 100644
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -2,7 +2,7 @@
 
 import os
 import logging
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter
 from pydantic import BaseModel
 
 from reasoning.reasoning import reason_check
@@ -13,17 +13,19 @@ from persona.identity import load_identity
 from context import collect_context, update_last_assistant_message
 from intake.intake import add_exchange_internal
 
+from autonomy.monologue.monologue import InnerMonologue
+from autonomy.self.state import load_self_state
 
-# -----------------------------
-# Debug configuration
-# -----------------------------
+
+# -------------------------------------------------------------------
+# Setup
+# -------------------------------------------------------------------
 VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
 logger = logging.getLogger(__name__)
 
 if VERBOSE_DEBUG:
     logger.setLevel(logging.DEBUG)
 
-    # Console handler
     console_handler = logging.StreamHandler()
     console_handler.setFormatter(logging.Formatter(
         '%(asctime)s [ROUTER] %(levelname)s: %(message)s',
@@ -31,7 +33,6 @@ if VERBOSE_DEBUG:
     ))
     logger.addHandler(console_handler)
 
-    # File handler
     try:
         os.makedirs('/app/logs', exist_ok=True)
         file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
@@ -40,28 +41,27 @@ if VERBOSE_DEBUG:
             datefmt='%Y-%m-%d %H:%M:%S'
         ))
         logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for router.py - logging to file")
+        logger.debug("VERBOSE_DEBUG enabled for router.py")
     except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for router.py - file logging failed: {e}")
+        logger.debug(f"File logging failed: {e}")
+
 
-# -----------------------------
-# Router (NOT FastAPI app)
-# -----------------------------
 cortex_router = APIRouter()
+inner_monologue = InnerMonologue()
 
 
-# -----------------------------
-# Pydantic models
-# -----------------------------
+# -------------------------------------------------------------------
+# Models
+# -------------------------------------------------------------------
 class ReasonRequest(BaseModel):
     session_id: str
     user_prompt: str
     temperature: float | None = None
 
 
-# -----------------------------
+# -------------------------------------------------------------------
 # /reason endpoint
-# -----------------------------
+# -------------------------------------------------------------------
 @cortex_router.post("/reason")
 async def run_reason(req: ReasonRequest):
 
@@ -71,7 +71,9 @@ async def run_reason(req: ReasonRequest):
         logger.debug(f"[PIPELINE START] User prompt: {req.user_prompt[:200]}...")
         logger.debug(f"{'='*80}\n")
 
-    # 0. Collect unified context from all sources
+    # ----------------------------------------------------------------
+    # STAGE 0 — Context
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 0] Collecting unified context...")
 
@@ -80,7 +82,9 @@ async def run_reason(req: ReasonRequest):
     if VERBOSE_DEBUG:
         logger.debug(f"[STAGE 0] Context collected - {len(context_state.get('rag', []))} RAG results")
 
-    # 0.5. Load identity block
+    # ----------------------------------------------------------------
+    # STAGE 0.5 — Identity
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 0.5] Loading identity block...")
 
@@ -89,37 +93,59 @@ async def run_reason(req: ReasonRequest):
     if VERBOSE_DEBUG:
         logger.debug(f"[STAGE 0.5] Identity loaded: {identity_block.get('name', 'Unknown')}")
 
-    # 1. Extract Intake summary for reflection
-    # Use L20 (Session Overview) as primary summary for reflection
+    # ----------------------------------------------------------------
+    # STAGE 0.6 — Inner Monologue (observer-only)
+    # ----------------------------------------------------------------
+    if VERBOSE_DEBUG:
+        logger.debug("[STAGE 0.6] Running inner monologue...")
+
+    try:
+        self_state = load_self_state()
+
+        mono_context = {
+            "user_message": req.user_prompt,
+            "session_id": req.session_id,
+            "self_state": self_state,
+            "context_summary": context_state,
+        }
+
+        inner_result = await inner_monologue.process(mono_context)
+        logger.info(f"[INNER_MONOLOGUE] {inner_result}")
+
+    except Exception as e:
+        logger.warning(f"[INNER_MONOLOGUE] failed: {e}")
+
+    # ----------------------------------------------------------------
+    # STAGE 1 — Intake summary
+    # ----------------------------------------------------------------
     intake_summary = "(no context available)"
     if context_state.get("intake"):
-        l20_summary = context_state["intake"].get("L20")
-        if l20_summary and isinstance(l20_summary, dict):
-            intake_summary = l20_summary.get("summary", "(no context available)")
-        elif isinstance(l20_summary, str):
-            intake_summary = l20_summary
+        l20 = context_state["intake"].get("L20")
+        if isinstance(l20, dict):
+            intake_summary = l20.get("summary", intake_summary)
+        elif isinstance(l20, str):
+            intake_summary = l20
 
     if VERBOSE_DEBUG:
         logger.debug(f"[STAGE 1] Intake summary extracted (L20): {intake_summary[:150]}...")
 
-    # 2. Reflection
+    # ----------------------------------------------------------------
+    # STAGE 2 — Reflection
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 2] Running reflection...")
 
     try:
         reflection = await reflect_notes(intake_summary, identity_block=identity_block)
         reflection_notes = reflection.get("notes", [])
-
-        if VERBOSE_DEBUG:
-            logger.debug(f"[STAGE 2] Reflection complete - {len(reflection_notes)} notes generated")
-            for idx, note in enumerate(reflection_notes, 1):
-                logger.debug(f"  Note {idx}: {note}")
     except Exception as e:
         reflection_notes = []
         if VERBOSE_DEBUG:
             logger.debug(f"[STAGE 2] Reflection failed: {e}")
 
-    # 3. First-pass reasoning draft
+    # ----------------------------------------------------------------
+    # STAGE 3 — Reasoning (draft)
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 3] Running reasoning (draft)...")
 
@@ -131,11 +157,9 @@ async def run_reason(req: ReasonRequest):
         context=context_state
     )
 
-    if VERBOSE_DEBUG:
-        logger.debug(f"[STAGE 3] Draft answer ({len(draft)} chars):")
-        logger.debug(f"--- DRAFT START ---\n{draft}\n--- DRAFT END ---")
-
-    # 4. Refinement
+    # ----------------------------------------------------------------
+    # STAGE 4 — Refinement
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 4] Running refinement...")
 
@@ -145,26 +169,20 @@ async def run_reason(req: ReasonRequest):
         identity_block=identity_block,
         rag_block=context_state.get("rag", []),
     )
+
     final_neutral = result["final_output"]
 
-    if VERBOSE_DEBUG:
-        logger.debug(f"[STAGE 4] Refined answer ({len(final_neutral)} chars):")
-        logger.debug(f"--- REFINED START ---\n{final_neutral}\n--- REFINED END ---")
-
-    # 5. Persona layer
+    # ----------------------------------------------------------------
+    # STAGE 5 — Persona
+    # ----------------------------------------------------------------
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 5] Applying persona layer...")
 
     persona_answer = await speak(final_neutral)
 
-    if VERBOSE_DEBUG:
-        logger.debug(f"[STAGE 5] Persona answer ({len(persona_answer)} chars):")
-        logger.debug(f"--- PERSONA START ---\n{persona_answer}\n--- PERSONA END ---")
-
-    # 6. Update session state with assistant's response
-    if VERBOSE_DEBUG:
-        logger.debug("[STAGE 6] Updating session state...")
-
+    # ----------------------------------------------------------------
+    # STAGE 6 — Session update
+    # ----------------------------------------------------------------
     update_last_assistant_message(req.session_id, persona_answer)
 
     if VERBOSE_DEBUG:
@@ -173,7 +191,9 @@ async def run_reason(req: ReasonRequest):
         logger.debug(f"[PIPELINE COMPLETE] Final answer length: {len(persona_answer)} chars")
         logger.debug(f"{'='*80}\n")
 
-    # 7. Return full bundle
+    # ----------------------------------------------------------------
+    # RETURN
+    # ----------------------------------------------------------------
     return {
         "draft": draft,
         "neutral": final_neutral,
@@ -189,9 +209,9 @@ async def run_reason(req: ReasonRequest):
     }
 
 
-# -----------------------------
-# Intake ingest (internal feed)
-# -----------------------------
+# -------------------------------------------------------------------
+# /ingest endpoint (internal)
+# -------------------------------------------------------------------
 class IngestPayload(BaseModel):
     session_id: str
     user_msg: str
@@ -200,107 +220,18 @@ class IngestPayload(BaseModel):
 
 @cortex_router.post("/ingest")
 async def ingest(payload: IngestPayload):
-    """
-    Receives (session_id, user_msg, assistant_msg) from Relay
-    and pushes directly into Intake's in-memory buffer.
-
-    Uses lenient error handling - always returns success to avoid
-    breaking the chat pipeline.
-    """
     try:
-        # 1. Update Cortex session state
         update_last_assistant_message(payload.session_id, payload.assistant_msg)
     except Exception as e:
-        logger.warning(f"[INGEST] Failed to update session state: {e}")
-        # Continue anyway (lenient mode)
+        logger.warning(f"[INGEST] Session update failed: {e}")
 
     try:
-        # 2. Feed Intake internally (no HTTP)
         add_exchange_internal({
             "session_id": payload.session_id,
             "user_msg": payload.user_msg,
             "assistant_msg": payload.assistant_msg,
         })
-        logger.debug(f"[INGEST] Added exchange to Intake for {payload.session_id}")
     except Exception as e:
-        logger.warning(f"[INGEST] Failed to add to Intake: {e}")
-        # Continue anyway (lenient mode)
-
-    # Always return success (user requirement: never fail chat pipeline)
-    return {
-        "status": "ok",
-        "session_id": payload.session_id
-    }
-
-# -----------------------------
-# Debug endpoint: summarized context
-# -----------------------------
-@cortex_router.get("/debug/summary")
-async def debug_summary(session_id: str):
-    """
-    Diagnostic endpoint that runs Intake's summarize_context() for a session.
-
-    Shows exactly what L1/L5/L10/L20/L30 summaries would look like
-    inside the actual Uvicorn worker, using the real SESSIONS buffer.
-    """
-    from intake.intake import SESSIONS, summarize_context
-
-    # Validate session
-    session = SESSIONS.get(session_id)
-    if not session:
-        return {"error": "session not found", "session_id": session_id}
-
-    # Convert deque into the structure summarize_context expects
-    buffer = session["buffer"]
-    exchanges = [
-        {
-            "user_msg": ex.get("user_msg", ""),
-            "assistant_msg": ex.get("assistant_msg", ""),
-        }
-        for ex in buffer
-    ]
-
-    # 🔥 CRITICAL FIX — summarize_context is async
-    summary = await summarize_context(session_id, exchanges)
-
-    return {
-        "session_id": session_id,
-        "buffer_size": len(buffer),
-        "exchanges_preview": exchanges[-5:],   # last 5 items
-        "summary": summary
-    }
-
-# -----------------------------
-# Debug endpoint for SESSIONS
-# -----------------------------
-@cortex_router.get("/debug/sessions")
-async def debug_sessions():
-    """
-    Diagnostic endpoint to inspect SESSIONS from within the running Uvicorn worker.
-    This shows the actual state of the in-memory SESSIONS dict.
-    """
-    from intake.intake import SESSIONS
-
-    sessions_data = {}
-    for session_id, session_info in SESSIONS.items():
-        buffer = session_info["buffer"]
-        sessions_data[session_id] = {
-            "created_at": session_info["created_at"].isoformat(),
-            "buffer_size": len(buffer),
-            "buffer_maxlen": buffer.maxlen,
-            "recent_exchanges": [
-                {
-                    "user_msg": ex.get("user_msg", "")[:100],
-                    "assistant_msg": ex.get("assistant_msg", "")[:100],
-                    "timestamp": ex.get("timestamp", "")
-                }
-                for ex in list(buffer)[-5:]  # Last 5 exchanges
-            ]
-        }
-
-    return {
-        "sessions_object_id": id(SESSIONS),
-        "total_sessions": len(SESSIONS),
-        "sessions": sessions_data
-    }
+        logger.warning(f"[INGEST] Intake update failed: {e}")
 
+    return {"status": "ok", "session_id": payload.session_id}

From 70e57ba5d29b5f9088a6f4f226b43a07f30482bd Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Sat, 13 Dec 2025 04:13:12 -0500
Subject: [PATCH 02/10] cortex pipeline stablized, inner monologue is now
 determining user intent and tone

---
 autonomy/Assembly-spec.md                     |   0
 autonomy/autonomy_core.py                     |   0
 autonomy/inner_self.py                        |   0
 autonomy/monologue/monologue.py               |  40 ---
 cortex/autonomy/Assembly-spec.md              | 249 ++++++++++++++++++
 cortex/autonomy/__init__.py                   |   1 +
 cortex/autonomy/monologue/__init__.py         |   1 +
 cortex/autonomy/monologue/monologue.py        | 115 ++++++++
 cortex/autonomy/self/__init__.py              |   1 +
 .../autonomy}/self/self_state.json            |   0
 cortex/autonomy/self/state.py                 |  11 +
 cortex/intake/intake.py                       | 107 ++++----
 12 files changed, 428 insertions(+), 97 deletions(-)
 delete mode 100644 autonomy/Assembly-spec.md
 delete mode 100644 autonomy/autonomy_core.py
 delete mode 100644 autonomy/inner_self.py
 delete mode 100644 autonomy/monologue/monologue.py
 create mode 100644 cortex/autonomy/Assembly-spec.md
 create mode 100644 cortex/autonomy/__init__.py
 create mode 100644 cortex/autonomy/monologue/__init__.py
 create mode 100644 cortex/autonomy/monologue/monologue.py
 create mode 100644 cortex/autonomy/self/__init__.py
 rename {autonomy => cortex/autonomy}/self/self_state.json (100%)
 create mode 100644 cortex/autonomy/self/state.py

diff --git a/autonomy/Assembly-spec.md b/autonomy/Assembly-spec.md
deleted file mode 100644
index e69de29..0000000
diff --git a/autonomy/autonomy_core.py b/autonomy/autonomy_core.py
deleted file mode 100644
index e69de29..0000000
diff --git a/autonomy/inner_self.py b/autonomy/inner_self.py
deleted file mode 100644
index e69de29..0000000
diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py
deleted file mode 100644
index 63534b8..0000000
--- a/autonomy/monologue/monologue.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from typing import Dict
-from llm.llm_router import call_llm
-
-MONOLOGUE_SYSTEM_PROMPT = """
-You are Lyra's inner monologue.
-You think privately.
-You do NOT speak to the user.
-You do NOT solve the task.
-You only reflect on intent, tone, and depth.
-
-Return ONLY valid JSON with:
-- intent (string)
-- tone (neutral | warm | focused | playful | direct)
-- depth (short | medium | deep)
-- consult_executive (true | false)
-"""
-
-class InnerMonologue:
-    async def process(self, context: Dict) -> Dict:
-        prompt = f"""
-User message:
-{context['user_message']}
-
-Self state:
-{context['self_state']}
-
-Context summary:
-{context['context_summary']}
-"""
-
-        result = await call_llm(
-            provider="mi50",              # MythoMax lives here
-            model="mythomax",
-            system_prompt=MONOLOGUE_SYSTEM_PROMPT,
-            user_prompt=prompt,
-            temperature=0.7,
-            max_tokens=200
-        )
-
-        return result  # must already be JSON
diff --git a/cortex/autonomy/Assembly-spec.md b/cortex/autonomy/Assembly-spec.md
new file mode 100644
index 0000000..25e7442
--- /dev/null
+++ b/cortex/autonomy/Assembly-spec.md
@@ -0,0 +1,249 @@
+# 📐 Project Lyra — Cognitive Assembly Spec
+**Version:** 0.6.1  
+**Status:** Canonical reference  
+**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech
+
+---
+
+## 1. High-Level Overview
+
+Lyra is composed of **four distinct cognitive layers**, plus I/O.
+
+Each layer has:
+- a **responsibility**
+- a **scope**
+- clear **inputs / outputs**
+- explicit **authority boundaries**
+
+No layer is allowed to “do everything.”
+
+---
+
+## 2. Layer Definitions
+
+### 2.1 Autonomy / Self (NON-LLM)
+
+**What it is**
+- Persistent identity
+- Long-term state
+- Mood, preferences, values
+- Continuity across time
+
+**What it is NOT**
+- Not a reasoning engine
+- Not a planner
+- Not a speaker
+- Not creative
+
+**Implementation**
+- Data + light logic
+- JSON / Python objects
+- No LLM calls
+
+**Lives at**
+```
+project-lyra/autonomy/self/
+```
+
+**Inputs**
+- Events (user message received, response sent)
+- Time / idle ticks (later)
+
+**Outputs**
+- Self state snapshot
+- Flags / preferences (e.g. verbosity, tone bias)
+
+---
+
+### 2.2 Inner Monologue (LLM, PRIVATE)
+
+**What it is**
+- Internal language-based thought
+- Reflection
+- Intent formation
+- “What do I think about this?”
+
+**What it is NOT**
+- Not final reasoning
+- Not execution
+- Not user-facing
+
+**Model**
+- MythoMax
+
+**Lives at**
+```
+project-lyra/autonomy/monologue/
+```
+
+**Inputs**
+- User message
+- Self state snapshot
+- Recent context summary
+
+**Outputs**
+- Intent
+- Tone guidance
+- Depth guidance
+- “Consult executive?” flag
+
+**Example Output**
+```json
+{
+  "intent": "technical_exploration",
+  "tone": "focused",
+  "depth": "deep",
+  "consult_executive": true
+}
+```
+
+---
+
+### 2.3 Cortex (Reasoning & Execution)
+
+**What it is**
+- Thinking pipeline
+- Planning
+- Tool selection
+- Task execution
+- Draft generation
+
+**What it is NOT**
+- Not identity
+- Not personality
+- Not persistent self
+
+**Models**
+- DeepSeek-R1 → Executive / Planner
+- GPT-4o-mini → Executor / Drafter
+
+**Lives at**
+```
+project-lyra/cortex/
+```
+
+**Inputs**
+- User message
+- Inner Monologue output
+- Memory / RAG / tools
+
+**Outputs**
+- Draft response (content only)
+- Metadata (sources, confidence, etc.)
+
+---
+
+### 2.4 Persona / Speech (LLM, USER-FACING)
+
+**What it is**
+- Voice
+- Style
+- Expression
+- Social behavior
+
+**What it is NOT**
+- Not planning
+- Not deep reasoning
+- Not decision-making
+
+**Model**
+- MythoMax
+
+**Lives at**
+```
+project-lyra/core/persona/
+```
+
+**Inputs**
+- Draft response (from Cortex)
+- Tone + intent (from Inner Monologue)
+- Persona configuration
+
+**Outputs**
+- Final user-visible text
+
+---
+
+## 3. Message Flow (Authoritative)
+
+### 3.1 Standard Message Path
+
+```
+User
+  ↓
+UI
+  ↓
+Relay
+  ↓
+Cortex
+  ↓
+Autonomy / Self (state snapshot)
+  ↓
+Inner Monologue (MythoMax)
+  ↓
+[ consult_executive? ]
+    ├─ Yes → DeepSeek-R1 (plan)
+    └─ No  → skip
+  ↓
+GPT-4o-mini (execute & draft)
+  ↓
+Persona (MythoMax)
+  ↓
+Relay
+  ↓
+UI
+  ↓
+User
+```
+
+### 3.2 Fast Path (No Thinking)
+
+```
+User → UI → Relay → Persona → Relay → UI
+```
+
+---
+
+## 4. Authority Rules (Non-Negotiable)
+
+- Self never calls an LLM
+- Inner Monologue never speaks to the user
+- Cortex never applies personality
+- Persona never reasons or plans
+- DeepSeek never writes final answers
+- MythoMax never plans execution
+
+---
+
+## 5. Folder Mapping
+
+```
+project-lyra/
+├── autonomy/
+│   ├── self/
+│   ├── monologue/
+│   └── executive/
+├── cortex/
+├── core/
+│   └── persona/
+├── relay/
+└── ui/
+```
+
+---
+
+## 6. Current Status
+
+- UI ✔
+- Relay ✔
+- Cortex ✔
+- Persona ✔
+- Autonomy ✔
+- Inner Monologue ⚠ partially wired
+- Executive gating ⚠ planned
+
+---
+
+## 7. Next Decision
+
+Decide whether **Inner Monologue runs every message** or **only when triggered**.
diff --git a/cortex/autonomy/__init__.py b/cortex/autonomy/__init__.py
new file mode 100644
index 0000000..49f54e0
--- /dev/null
+++ b/cortex/autonomy/__init__.py
@@ -0,0 +1 @@
+# Autonomy module for Lyra
diff --git a/cortex/autonomy/monologue/__init__.py b/cortex/autonomy/monologue/__init__.py
new file mode 100644
index 0000000..8cd4fb8
--- /dev/null
+++ b/cortex/autonomy/monologue/__init__.py
@@ -0,0 +1 @@
+# Inner monologue module
diff --git a/cortex/autonomy/monologue/monologue.py b/cortex/autonomy/monologue/monologue.py
new file mode 100644
index 0000000..a03e5f5
--- /dev/null
+++ b/cortex/autonomy/monologue/monologue.py
@@ -0,0 +1,115 @@
+import os
+import json
+import logging
+from typing import Dict
+from llm.llm_router import call_llm
+
+# Configuration
+MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper()
+VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
+
+# Logger
+logger = logging.getLogger(__name__)
+
+if VERBOSE_DEBUG:
+    logger.setLevel(logging.DEBUG)
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(logging.Formatter(
+        '%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s',
+        datefmt='%H:%M:%S'
+    ))
+    logger.addHandler(console_handler)
+
+MONOLOGUE_SYSTEM_PROMPT = """
+You are Lyra's inner monologue.
+You think privately.
+You do NOT speak to the user.
+You do NOT solve the task.
+You only reflect on intent, tone, and depth.
+
+Return ONLY valid JSON with:
+- intent (string)
+- tone (neutral | warm | focused | playful | direct)
+- depth (short | medium | deep)
+- consult_executive (true | false)
+"""
+
+class InnerMonologue:
+    async def process(self, context: Dict) -> Dict:
+        # Build full prompt with system instructions merged in
+        full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT}
+
+User message:
+{context['user_message']}
+
+Self state:
+{context['self_state']}
+
+Context summary:
+{context['context_summary']}
+
+Output JSON only:
+"""
+
+        # Call LLM using configured backend
+        if VERBOSE_DEBUG:
+            logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}")
+            logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars")
+
+        result = await call_llm(
+            full_prompt,
+            backend=MONOLOGUE_LLM,
+            temperature=0.7,
+            max_tokens=200
+        )
+
+        if VERBOSE_DEBUG:
+            logger.debug(f"[InnerMonologue] Raw LLM response:")
+            logger.debug(f"{'='*80}")
+            logger.debug(result)
+            logger.debug(f"{'='*80}")
+            logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars")
+
+        # Parse JSON response - extract just the JSON part if there's extra text
+        try:
+            # Try direct parsing first
+            parsed = json.loads(result)
+            if VERBOSE_DEBUG:
+                logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}")
+            return parsed
+        except json.JSONDecodeError:
+            # If direct parsing fails, try to extract JSON from the response
+            if VERBOSE_DEBUG:
+                logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...")
+
+            # Look for JSON object (starts with { and ends with })
+            import re
+            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL)
+
+            if json_match:
+                json_str = json_match.group(0)
+                try:
+                    parsed = json.loads(json_str)
+                    if VERBOSE_DEBUG:
+                        logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}")
+                    return parsed
+                except json.JSONDecodeError as e:
+                    if VERBOSE_DEBUG:
+                        logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}")
+            else:
+                if VERBOSE_DEBUG:
+                    logger.warning(f"[InnerMonologue] No JSON object found in response")
+
+            # Final fallback
+            if VERBOSE_DEBUG:
+                logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback")
+            else:
+                print(f"[InnerMonologue] JSON extraction failed")
+                print(f"[InnerMonologue] Raw response was: {result[:500]}")
+
+            return {
+                "intent": "unknown",
+                "tone": "neutral",
+                "depth": "medium",
+                "consult_executive": False
+            }
diff --git a/cortex/autonomy/self/__init__.py b/cortex/autonomy/self/__init__.py
new file mode 100644
index 0000000..60c47c7
--- /dev/null
+++ b/cortex/autonomy/self/__init__.py
@@ -0,0 +1 @@
+# Self state module
diff --git a/autonomy/self/self_state.json b/cortex/autonomy/self/self_state.json
similarity index 100%
rename from autonomy/self/self_state.json
rename to cortex/autonomy/self/self_state.json
diff --git a/cortex/autonomy/self/state.py b/cortex/autonomy/self/state.py
new file mode 100644
index 0000000..ab9c626
--- /dev/null
+++ b/cortex/autonomy/self/state.py
@@ -0,0 +1,11 @@
+"""
+Stub for self state management.
+"""
+
+def load_self_state():
+    """Load self state - stub implementation"""
+    return {
+        "mood": "neutral",
+        "energy": 0.8,
+        "focus": "user_request"
+    }
diff --git a/cortex/intake/intake.py b/cortex/intake/intake.py
index f5d9cba..ce0e592 100644
--- a/cortex/intake/intake.py
+++ b/cortex/intake/intake.py
@@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
 async def summarize_context(session_id: str, exchanges: list[dict]):
     """
     Internal summarizer that uses Cortex's LLM router.
-    Produces L1 / L5 / L10 / L20 / L30 summaries.
+    Produces cascading summaries based on exchange count:
+    - L1: Always (most recent activity)
+    - L2: After 2+ exchanges
+    - L5: After 5+ exchanges
+    - L10: After 10+ exchanges
+    - L20: After 20+ exchanges
+    - L30: After 30+ exchanges
 
     Args:
         session_id: The conversation/session ID
         exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
     """
 
-    # Build raw conversation text
-    convo_lines = []
-    for ex in exchanges:
-        convo_lines.append(f"User: {ex.get('user_msg','')}")
-        convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
-    convo_text = "\n".join(convo_lines)
+    exchange_count = len(exchanges)
 
-    if not convo_text.strip():
+    if exchange_count == 0:
         return {
             "session_id": session_id,
             "exchange_count": 0,
             "L1": "",
+            "L2": "",
             "L5": "",
             "L10": "",
             "L20": "",
@@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]):
             "last_updated": datetime.now().isoformat()
         }
 
-    # Prompt the LLM (internal — no HTTP)
-    prompt = f"""
-Summarize the conversation below into multiple compression levels.
-
-Conversation:
-----------------
-{convo_text}
-----------------
-
-Output strictly in JSON with keys:
-L1  → ultra short summary (1–2 sentences max)
-L5  → short summary
-L10 → medium summary
-L20 → detailed overview
-L30 → full detailed summary
-
-JSON only. No text outside JSON.
-"""
+    result = {
+        "session_id": session_id,
+        "exchange_count": exchange_count,
+        "L1": "",
+        "L2": "",
+        "L5": "",
+        "L10": "",
+        "L20": "",
+        "L30": "",
+        "last_updated": datetime.now().isoformat()
+    }
 
     try:
-        llm_response = await call_llm(
-            prompt,
-            backend=INTAKE_LLM,
-            temperature=0.2
-        )
+        # L1: Always generate (most recent exchanges)
+        result["L1"] = await summarize_simple(exchanges[-5:])
+        print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
 
-        print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
-        print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
+        # L2: After 2+ exchanges
+        if exchange_count >= 2:
+            result["L2"] = await summarize_simple(exchanges[-2:])
+            print(f"[Intake] Generated L2 for {session_id}")
 
-        # LLM should return JSON, parse it
-        if not llm_response or not llm_response.strip():
-            raise ValueError("Empty response from LLM")
+        # L5: After 5+ exchanges
+        if exchange_count >= 5:
+            result["L5"] = await summarize_simple(exchanges[-10:])
+            print(f"[Intake] Generated L5 for {session_id}")
 
-        summary = json.loads(llm_response)
+        # L10: After 10+ exchanges (Reality Check)
+        if exchange_count >= 10:
+            result["L10"] = await summarize_L10(session_id, exchanges)
+            print(f"[Intake] Generated L10 for {session_id}")
 
-        return {
-            "session_id": session_id,
-            "exchange_count": len(exchanges),
-            "L1": summary.get("L1", ""),
-            "L5": summary.get("L5", ""),
-            "L10": summary.get("L10", ""),
-            "L20": summary.get("L20", ""),
-            "L30": summary.get("L30", ""),
-            "last_updated": datetime.now().isoformat()
-        }
+        # L20: After 20+ exchanges (Session Overview - merges L10s)
+        if exchange_count >= 20 and exchange_count % 10 == 0:
+            result["L20"] = await summarize_L20(session_id)
+            print(f"[Intake] Generated L20 for {session_id}")
+
+        # L30: After 30+ exchanges (Continuity Report - merges L20s)
+        if exchange_count >= 30 and exchange_count % 10 == 0:
+            result["L30"] = await summarize_L30(session_id)
+            print(f"[Intake] Generated L30 for {session_id}")
+
+        return result
 
     except Exception as e:
-        return {
-            "session_id": session_id,
-            "exchange_count": len(exchanges),
-            "L1": f"[Error summarizing: {str(e)}]",
-            "L5": "",
-            "L10": "",
-            "L20": "",
-            "L30": "",
-            "last_updated": datetime.now().isoformat()
-        }
+        print(f"[Intake] Error during summarization: {e}")
+        result["L1"] = f"[Error summarizing: {str(e)}]"
+        return result
 
 # ─────────────────────────────────
 # Background summarization stub

From ae41b51888ab3f44220abb896e393e0e3c1da0b3 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Sun, 14 Dec 2025 01:44:05 -0500
Subject: [PATCH 03/10] autonomy build, phase 1

---
 cortex/autonomy/executive/__init__.py |   1 +
 cortex/autonomy/executive/planner.py  | 121 ++++++++++++++++
 cortex/autonomy/self/analyzer.py      |  74 ++++++++++
 cortex/autonomy/self/state.py         | 192 ++++++++++++++++++++++++-
 cortex/data/self_state.json           |  20 +++
 cortex/persona/speak.py               |  40 +++++-
 cortex/reasoning/reasoning.py         |  54 ++++++-
 cortex/router.py                      |  49 ++++++-
 cortex/tests/__init__.py              |   1 +
 cortex/tests/test_autonomy_phase1.py  | 197 ++++++++++++++++++++++++++
 10 files changed, 735 insertions(+), 14 deletions(-)
 create mode 100644 cortex/autonomy/executive/__init__.py
 create mode 100644 cortex/autonomy/executive/planner.py
 create mode 100644 cortex/autonomy/self/analyzer.py
 create mode 100644 cortex/data/self_state.json
 create mode 100644 cortex/tests/__init__.py
 create mode 100644 cortex/tests/test_autonomy_phase1.py

diff --git a/cortex/autonomy/executive/__init__.py b/cortex/autonomy/executive/__init__.py
new file mode 100644
index 0000000..1259881
--- /dev/null
+++ b/cortex/autonomy/executive/__init__.py
@@ -0,0 +1 @@
+"""Executive planning and decision-making module."""
diff --git a/cortex/autonomy/executive/planner.py b/cortex/autonomy/executive/planner.py
new file mode 100644
index 0000000..b6a0639
--- /dev/null
+++ b/cortex/autonomy/executive/planner.py
@@ -0,0 +1,121 @@
+"""
+Executive planner - generates execution plans for complex requests.
+Activated when inner monologue sets consult_executive=true.
+"""
+
+import os
+import logging
+from typing import Dict, Any, Optional
+from llm.llm_router import call_llm
+
+EXECUTIVE_LLM = os.getenv("EXECUTIVE_LLM", "CLOUD").upper()
+VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
+
+logger = logging.getLogger(__name__)
+
+if VERBOSE_DEBUG:
+    logger.setLevel(logging.DEBUG)
+
+
+EXECUTIVE_SYSTEM_PROMPT = """
+You are Lyra's executive planning system.
+You create structured execution plans for complex tasks.
+You do NOT generate the final response - only the plan.
+
+Your plan should include:
+1. Task decomposition (break into steps)
+2. Required tools/resources
+3. Reasoning strategy
+4. Success criteria
+
+Return a concise plan in natural language.
+"""
+
+
+async def plan_execution(
+    user_prompt: str,
+    intent: str,
+    context_state: Dict[str, Any],
+    identity_block: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Generate execution plan for complex request.
+
+    Args:
+        user_prompt: User's message
+        intent: Detected intent from inner monologue
+        context_state: Full context
+        identity_block: Lyra's identity
+
+    Returns:
+        Plan dictionary with structure:
+        {
+            "summary": "One-line plan summary",
+            "plan_text": "Detailed plan",
+            "steps": ["step1", "step2", ...],
+            "tools_needed": ["RAG", "WEB", ...],
+            "estimated_complexity": "low | medium | high"
+        }
+    """
+
+    # Build planning prompt
+    tools_available = context_state.get("tools_available", [])
+
+    prompt = f"""{EXECUTIVE_SYSTEM_PROMPT}
+
+User request: {user_prompt}
+
+Detected intent: {intent}
+
+Available tools: {", ".join(tools_available) if tools_available else "None"}
+
+Session context:
+- Message count: {context_state.get('message_count', 0)}
+- Time since last message: {context_state.get('minutes_since_last_msg', 0):.1f} minutes
+- Active project: {context_state.get('active_project', 'None')}
+
+Generate a structured execution plan.
+"""
+
+    if VERBOSE_DEBUG:
+        logger.debug(f"[EXECUTIVE] Planning prompt:\n{prompt}")
+
+    # Call executive LLM
+    plan_text = await call_llm(
+        prompt,
+        backend=EXECUTIVE_LLM,
+        temperature=0.3,  # Lower temperature for planning
+        max_tokens=500
+    )
+
+    if VERBOSE_DEBUG:
+        logger.debug(f"[EXECUTIVE] Generated plan:\n{plan_text}")
+
+    # Parse plan (simple heuristic extraction for Phase 1)
+    steps = []
+    tools_needed = []
+
+    for line in plan_text.split('\n'):
+        line_lower = line.lower()
+        if any(marker in line_lower for marker in ['step', '1.', '2.', '3.', '-']):
+            steps.append(line.strip())
+
+        if tools_available:
+            for tool in tools_available:
+                if tool.lower() in line_lower and tool not in tools_needed:
+                    tools_needed.append(tool)
+
+    # Estimate complexity (simple heuristic)
+    complexity = "low"
+    if len(steps) > 3 or len(tools_needed) > 1:
+        complexity = "medium"
+    if len(steps) > 5 or "research" in intent.lower() or "analyze" in intent.lower():
+        complexity = "high"
+
+    return {
+        "summary": plan_text.split('\n')[0][:100] if plan_text else "Complex task execution plan",
+        "plan_text": plan_text,
+        "steps": steps[:10],  # Limit to 10 steps
+        "tools_needed": tools_needed,
+        "estimated_complexity": complexity
+    }
diff --git a/cortex/autonomy/self/analyzer.py b/cortex/autonomy/self/analyzer.py
new file mode 100644
index 0000000..4ee22e6
--- /dev/null
+++ b/cortex/autonomy/self/analyzer.py
@@ -0,0 +1,74 @@
+"""
+Analyze interactions and update self-state accordingly.
+"""
+
+import logging
+from typing import Dict, Any
+from .state import update_self_state
+
+logger = logging.getLogger(__name__)
+
+
+async def analyze_and_update_state(
+    monologue: Dict[str, Any],
+    user_prompt: str,
+    response: str,
+    context: Dict[str, Any]
+) -> None:
+    """
+    Analyze interaction and update self-state.
+
+    This runs after response generation to update Lyra's internal state
+    based on the interaction.
+
+    Args:
+        monologue: Inner monologue output
+        user_prompt: User's message
+        response: Lyra's response
+        context: Full context state
+    """
+
+    # Simple heuristics for state updates
+    # TODO: Replace with LLM-based sentiment analysis in Phase 2
+
+    mood_delta = 0.0
+    energy_delta = 0.0
+    confidence_delta = 0.0
+    curiosity_delta = 0.0
+    new_focus = None
+
+    # Analyze intent from monologue
+    intent = monologue.get("intent", "").lower() if monologue else ""
+
+    if "technical" in intent or "complex" in intent:
+        energy_delta = -0.05  # Deep thinking is tiring
+        confidence_delta = 0.05 if len(response) > 200 else -0.05
+        new_focus = "technical_problem"
+
+    elif "creative" in intent or "brainstorm" in intent:
+        mood_delta = 0.1  # Creative work is engaging
+        curiosity_delta = 0.1
+        new_focus = "creative_exploration"
+
+    elif "clarification" in intent or "confused" in intent:
+        confidence_delta = -0.05
+        new_focus = "understanding_user"
+
+    elif "simple" in intent or "casual" in intent:
+        energy_delta = 0.05  # Light conversation is refreshing
+        new_focus = "conversation"
+
+    # Check for learning opportunities (questions in user prompt)
+    if "?" in user_prompt and any(word in user_prompt.lower() for word in ["how", "why", "what"]):
+        curiosity_delta += 0.05
+
+    # Update state
+    update_self_state(
+        mood_delta=mood_delta,
+        energy_delta=energy_delta,
+        new_focus=new_focus,
+        confidence_delta=confidence_delta,
+        curiosity_delta=curiosity_delta
+    )
+
+    logger.info(f"Self-state updated based on interaction: focus={new_focus}")
diff --git a/cortex/autonomy/self/state.py b/cortex/autonomy/self/state.py
index ab9c626..a8d9e46 100644
--- a/cortex/autonomy/self/state.py
+++ b/cortex/autonomy/self/state.py
@@ -1,11 +1,189 @@
 """
-Stub for self state management.
+Self-state management for Project Lyra.
+Maintains persistent identity, mood, energy, and focus across sessions.
 """
 
-def load_self_state():
-    """Load self state - stub implementation"""
-    return {
-        "mood": "neutral",
-        "energy": 0.8,
-        "focus": "user_request"
+import json
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+# Configuration
+STATE_FILE = Path(os.getenv("SELF_STATE_FILE", "/app/data/self_state.json"))
+VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
+
+logger = logging.getLogger(__name__)
+
+if VERBOSE_DEBUG:
+    logger.setLevel(logging.DEBUG)
+
+# Default state structure
+DEFAULT_STATE = {
+    "mood": "neutral",
+    "energy": 0.8,
+    "focus": "user_request",
+    "confidence": 0.7,
+    "curiosity": 0.5,
+    "last_updated": None,
+    "interaction_count": 0,
+    "learning_queue": [],  # Topics Lyra wants to explore
+    "active_goals": [],  # Self-directed goals
+    "preferences": {
+        "verbosity": "medium",
+        "formality": "casual",
+        "proactivity": 0.3  # How likely to suggest things unprompted
+    },
+    "metadata": {
+        "version": "1.0",
+        "created_at": None
     }
+}
+
+
+class SelfState:
+    """Manages Lyra's persistent self-state."""
+
+    def __init__(self):
+        self._state = self._load_state()
+
+    def _load_state(self) -> Dict[str, Any]:
+        """Load state from disk or create default."""
+        if STATE_FILE.exists():
+            try:
+                with open(STATE_FILE, 'r') as f:
+                    state = json.load(f)
+                    logger.info(f"Loaded self-state from {STATE_FILE}")
+                    return state
+            except Exception as e:
+                logger.error(f"Failed to load self-state: {e}")
+                return self._create_default_state()
+        else:
+            return self._create_default_state()
+
+    def _create_default_state(self) -> Dict[str, Any]:
+        """Create and save default state."""
+        state = DEFAULT_STATE.copy()
+        state["metadata"]["created_at"] = datetime.now().isoformat()
+        state["last_updated"] = datetime.now().isoformat()
+        self._save_state(state)
+        logger.info("Created new default self-state")
+        return state
+
+    def _save_state(self, state: Dict[str, Any]) -> None:
+        """Persist state to disk."""
+        try:
+            STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+            with open(STATE_FILE, 'w') as f:
+                json.dump(state, f, indent=2)
+            if VERBOSE_DEBUG:
+                logger.debug(f"Saved self-state to {STATE_FILE}")
+        except Exception as e:
+            logger.error(f"Failed to save self-state: {e}")
+
+    def get_state(self) -> Dict[str, Any]:
+        """Get current state snapshot."""
+        return self._state.copy()
+
+    def update_from_interaction(
+        self,
+        mood_delta: float = 0.0,
+        energy_delta: float = 0.0,
+        new_focus: Optional[str] = None,
+        confidence_delta: float = 0.0,
+        curiosity_delta: float = 0.0
+    ) -> None:
+        """
+        Update state based on interaction.
+
+        Args:
+            mood_delta: Change in mood (-1.0 to 1.0)
+            energy_delta: Change in energy (-1.0 to 1.0)
+            new_focus: New focus area
+            confidence_delta: Change in confidence
+            curiosity_delta: Change in curiosity
+        """
+        # Apply deltas with bounds checking
+        self._state["energy"] = max(0.0, min(1.0,
+            self._state.get("energy", 0.8) + energy_delta))
+
+        self._state["confidence"] = max(0.0, min(1.0,
+            self._state.get("confidence", 0.7) + confidence_delta))
+
+        self._state["curiosity"] = max(0.0, min(1.0,
+            self._state.get("curiosity", 0.5) + curiosity_delta))
+
+        # Update focus if provided
+        if new_focus:
+            self._state["focus"] = new_focus
+
+        # Update mood (simplified sentiment)
+        if mood_delta != 0:
+            mood_map = ["frustrated", "neutral", "engaged", "excited"]
+            current_mood_idx = 1  # neutral default
+            if self._state.get("mood") in mood_map:
+                current_mood_idx = mood_map.index(self._state["mood"])
+
+            new_mood_idx = max(0, min(len(mood_map) - 1,
+                int(current_mood_idx + mood_delta * 2)))
+            self._state["mood"] = mood_map[new_mood_idx]
+
+        # Increment interaction counter
+        self._state["interaction_count"] = self._state.get("interaction_count", 0) + 1
+        self._state["last_updated"] = datetime.now().isoformat()
+
+        # Persist changes
+        self._save_state(self._state)
+
+        if VERBOSE_DEBUG:
+            logger.debug(f"Updated self-state: mood={self._state['mood']}, "
+                        f"energy={self._state['energy']:.2f}, "
+                        f"confidence={self._state['confidence']:.2f}")
+
+    def add_learning_goal(self, topic: str) -> None:
+        """Add topic to learning queue."""
+        queue = self._state.get("learning_queue", [])
+        if topic not in [item.get("topic") for item in queue]:
+            queue.append({
+                "topic": topic,
+                "added_at": datetime.now().isoformat(),
+                "priority": 0.5
+            })
+            self._state["learning_queue"] = queue
+            self._save_state(self._state)
+            logger.info(f"Added learning goal: {topic}")
+
+    def add_active_goal(self, goal: str, context: str = "") -> None:
+        """Add self-directed goal."""
+        goals = self._state.get("active_goals", [])
+        goals.append({
+            "goal": goal,
+            "context": context,
+            "created_at": datetime.now().isoformat(),
+            "status": "active"
+        })
+        self._state["active_goals"] = goals
+        self._save_state(self._state)
+        logger.info(f"Added active goal: {goal}")
+
+
+# Global instance
+_self_state_instance = None
+
+def get_self_state_instance() -> SelfState:
+    """Get or create global SelfState instance."""
+    global _self_state_instance
+    if _self_state_instance is None:
+        _self_state_instance = SelfState()
+    return _self_state_instance
+
+
+def load_self_state() -> Dict[str, Any]:
+    """Load self state - public API for backwards compatibility."""
+    return get_self_state_instance().get_state()
+
+
+def update_self_state(**kwargs) -> None:
+    """Update self state - public API."""
+    get_self_state_instance().update_from_interaction(**kwargs)
diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json
new file mode 100644
index 0000000..1f6871d
--- /dev/null
+++ b/cortex/data/self_state.json
@@ -0,0 +1,20 @@
+{
+  "mood": "neutral",
+  "energy": 0.8,
+  "focus": "user_request",
+  "confidence": 0.7,
+  "curiosity": 0.6000000000000001,
+  "last_updated": "2025-12-14T06:36:21.236816",
+  "interaction_count": 3,
+  "learning_queue": [],
+  "active_goals": [],
+  "preferences": {
+    "verbosity": "medium",
+    "formality": "casual",
+    "proactivity": 0.3
+  },
+  "metadata": {
+    "version": "1.0",
+    "created_at": "2025-12-14T03:28:49.364768"
+  }
+}
\ No newline at end of file
diff --git a/cortex/persona/speak.py b/cortex/persona/speak.py
index 0d5c00a..57f4919 100644
--- a/cortex/persona/speak.py
+++ b/cortex/persona/speak.py
@@ -59,17 +59,44 @@ Guidelines:
 # Build persona prompt
 # ============================================================
 
-def build_speak_prompt(final_answer: str) -> str:
+def build_speak_prompt(final_answer: str, tone: str = "neutral", depth: str = "medium") -> str:
     """
     Wrap Cortex's final neutral answer in the Lyra persona.
     Cortex → neutral reasoning
     Speak → stylistic transformation
-    
+
     The LLM sees the original answer and rewrites it in Lyra's voice.
+
+    Args:
+        final_answer: The neutral reasoning output
+        tone: Desired emotional tone (neutral | warm | focused | playful | direct)
+        depth: Response depth (short | medium | deep)
     """
+
+    # Tone-specific guidance
+    tone_guidance = {
+        "neutral": "balanced and professional",
+        "warm": "friendly and empathetic",
+        "focused": "precise and technical",
+        "playful": "light and engaging",
+        "direct": "concise and straightforward"
+    }
+
+    depth_guidance = {
+        "short": "Keep responses brief and to-the-point.",
+        "medium": "Provide balanced detail.",
+        "deep": "Elaborate thoroughly with nuance and examples."
+    }
+
+    tone_hint = tone_guidance.get(tone, "balanced and professional")
+    depth_hint = depth_guidance.get(depth, "Provide balanced detail.")
+
     return f"""
 {PERSONA_STYLE}
 
+Tone guidance: Your response should be {tone_hint}.
+Depth guidance: {depth_hint}
+
 Rewrite the following message into Lyra's natural voice.
 Preserve meaning exactly.
 
@@ -84,16 +111,21 @@ Preserve meaning exactly.
 # Public API — async wrapper
 # ============================================================
 
-async def speak(final_answer: str) -> str:
+async def speak(final_answer: str, tone: str = "neutral", depth: str = "medium") -> str:
     """
     Given the final refined answer from Cortex,
     apply Lyra persona styling using the designated backend.
+
+    Args:
+        final_answer: The polished answer from refinement stage
+        tone: Desired emotional tone (neutral | warm | focused | playful | direct)
+        depth: Response depth (short | medium | deep)
     """
 
     if not final_answer:
         return ""
 
-    prompt = build_speak_prompt(final_answer)
+    prompt = build_speak_prompt(final_answer, tone, depth)
 
     backend = SPEAK_BACKEND
 
diff --git a/cortex/reasoning/reasoning.py b/cortex/reasoning/reasoning.py
index 6c87ed0..a04aa10 100644
--- a/cortex/reasoning/reasoning.py
+++ b/cortex/reasoning/reasoning.py
@@ -45,7 +45,9 @@ async def reason_check(
     identity_block: dict | None,
     rag_block: dict | None,
     reflection_notes: list[str],
-    context: dict | None = None
+    context: dict | None = None,
+    monologue: dict | None = None,  # NEW: Inner monologue guidance
+    executive_plan: dict | None = None  # NEW: Executive plan for complex tasks
 ) -> str:
     """
     Build the *draft answer* for Lyra Cortex.
@@ -57,6 +59,8 @@ async def reason_check(
         rag_block: Relevant long-term memories from NeoMem
         reflection_notes: Meta-awareness notes from reflection stage
         context: Unified context state from context.py (session state, intake, rag, etc.)
+        monologue: Inner monologue analysis (intent, tone, depth, consult_executive)
+        executive_plan: Executive plan for complex queries (steps, tools, strategy)
     """
 
     # --------------------------------------------------------
@@ -79,6 +83,52 @@ async def reason_check(
         except Exception:
             identity_txt = f"Identity Rules:\n{str(identity_block)}\n\n"
 
+    # --------------------------------------------------------
+    # Inner Monologue guidance (NEW)
+    # --------------------------------------------------------
+    monologue_section = ""
+    if monologue:
+        intent = monologue.get("intent", "unknown")
+        tone_desired = monologue.get("tone", "neutral")
+        depth_desired = monologue.get("depth", "medium")
+
+        monologue_section = f"""
+=== INNER MONOLOGUE GUIDANCE ===
+User Intent Detected: {intent}
+Desired Tone: {tone_desired}
+Desired Response Depth: {depth_desired}
+
+Adjust your response accordingly:
+- Focus on addressing the {intent} intent
+- Aim for {depth_desired} depth (short/medium/deep)
+- The persona layer will handle {tone_desired} tone, focus on content
+
+"""
+
+    # --------------------------------------------------------
+    # Executive Plan (NEW)
+    # --------------------------------------------------------
+    plan_section = ""
+    if executive_plan:
+        plan_section = f"""
+=== EXECUTIVE PLAN ===
+Task Complexity: {executive_plan.get('estimated_complexity', 'unknown')}
+Plan Summary: {executive_plan.get('summary', 'No summary')}
+
+Detailed Plan:
+{executive_plan.get('plan_text', 'No detailed plan available')}
+
+Required Steps:
+"""
+        for idx, step in enumerate(executive_plan.get('steps', []), 1):
+            plan_section += f"{idx}. {step}\n"
+
+        tools_needed = executive_plan.get('tools_needed', [])
+        if tools_needed:
+            plan_section += f"\nTools to leverage: {', '.join(tools_needed)}\n"
+
+        plan_section += "\nFollow this plan while generating your response.\n\n"
+
     # --------------------------------------------------------
     # RAG block (optional factual grounding)
     # --------------------------------------------------------
@@ -164,6 +214,8 @@ async def reason_check(
     prompt = (
         f"{notes_section}"
         f"{identity_txt}"
+        f"{monologue_section}"  # NEW: Intent/tone/depth guidance
+        f"{plan_section}"  # NEW: Executive plan if generated
         f"{context_txt}"  # Context BEFORE RAG for better coherence
         f"{rag_txt}"
         f"User message:\n{user_prompt}\n\n"
diff --git a/cortex/router.py b/cortex/router.py
index 1e0484b..48bb790 100644
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -99,6 +99,7 @@ async def run_reason(req: ReasonRequest):
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 0.6] Running inner monologue...")
 
+    inner_result = None
     try:
         self_state = load_self_state()
 
@@ -112,9 +113,33 @@ async def run_reason(req: ReasonRequest):
         inner_result = await inner_monologue.process(mono_context)
         logger.info(f"[INNER_MONOLOGUE] {inner_result}")
 
+        # Store in context for downstream use
+        context_state["monologue"] = inner_result
+
     except Exception as e:
         logger.warning(f"[INNER_MONOLOGUE] failed: {e}")
 
+    # ----------------------------------------------------------------
+    # STAGE 0.7 — Executive Planning (conditional)
+    # ----------------------------------------------------------------
+    executive_plan = None
+    if inner_result and inner_result.get("consult_executive"):
+        if VERBOSE_DEBUG:
+            logger.debug("[STAGE 0.7] Executive consultation requested...")
+
+        try:
+            from autonomy.executive.planner import plan_execution
+            executive_plan = await plan_execution(
+                user_prompt=req.user_prompt,
+                intent=inner_result.get("intent", "unknown"),
+                context_state=context_state,
+                identity_block=identity_block
+            )
+            logger.info(f"[EXECUTIVE] Generated plan: {executive_plan.get('summary', 'N/A')}")
+        except Exception as e:
+            logger.warning(f"[EXECUTIVE] Planning failed: {e}")
+            executive_plan = None
+
     # ----------------------------------------------------------------
     # STAGE 1 — Intake summary
     # ----------------------------------------------------------------
@@ -154,7 +179,9 @@ async def run_reason(req: ReasonRequest):
         identity_block=identity_block,
         rag_block=context_state.get("rag", []),
         reflection_notes=reflection_notes,
-        context=context_state
+        context=context_state,
+        monologue=inner_result,  # NEW: Pass monologue guidance
+        executive_plan=executive_plan  # NEW: Pass executive plan
     )
 
     # ----------------------------------------------------------------
@@ -178,13 +205,31 @@ async def run_reason(req: ReasonRequest):
     if VERBOSE_DEBUG:
         logger.debug("[STAGE 5] Applying persona layer...")
 
-    persona_answer = await speak(final_neutral)
+    # Extract tone and depth from monologue for persona guidance
+    tone = inner_result.get("tone", "neutral") if inner_result else "neutral"
+    depth = inner_result.get("depth", "medium") if inner_result else "medium"
+
+    persona_answer = await speak(final_neutral, tone=tone, depth=depth)
 
     # ----------------------------------------------------------------
     # STAGE 6 — Session update
     # ----------------------------------------------------------------
     update_last_assistant_message(req.session_id, persona_answer)
 
+    # ----------------------------------------------------------------
+    # STAGE 6.5 — Self-state update
+    # ----------------------------------------------------------------
+    try:
+        from autonomy.self.analyzer import analyze_and_update_state
+        await analyze_and_update_state(
+            monologue=inner_result or {},
+            user_prompt=req.user_prompt,
+            response=persona_answer,
+            context=context_state
+        )
+    except Exception as e:
+        logger.warning(f"[SELF_STATE] Update failed: {e}")
+
     if VERBOSE_DEBUG:
         logger.debug(f"\n{'='*80}")
         logger.debug(f"[PIPELINE COMPLETE] Session: {req.session_id}")
diff --git a/cortex/tests/__init__.py b/cortex/tests/__init__.py
new file mode 100644
index 0000000..f5afebe
--- /dev/null
+++ b/cortex/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for Project Lyra Cortex."""
diff --git a/cortex/tests/test_autonomy_phase1.py b/cortex/tests/test_autonomy_phase1.py
new file mode 100644
index 0000000..4da933e
--- /dev/null
+++ b/cortex/tests/test_autonomy_phase1.py
@@ -0,0 +1,197 @@
+"""
+Integration tests for Phase 1 autonomy features.
+Tests monologue integration, executive planning, and self-state persistence.
+"""
+
+import asyncio
+import json
+import sys
+import os
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from autonomy.monologue.monologue import InnerMonologue
+from autonomy.self.state import load_self_state, update_self_state, get_self_state_instance
+from autonomy.executive.planner import plan_execution
+
+
+async def test_monologue_integration():
+    """Test monologue generates valid output."""
+    print("\n" + "="*60)
+    print("TEST 1: Monologue Integration")
+    print("="*60)
+
+    mono = InnerMonologue()
+
+    context = {
+        "user_message": "Explain quantum computing to me like I'm 5",
+        "session_id": "test_001",
+        "self_state": load_self_state(),
+        "context_summary": {"message_count": 5}
+    }
+
+    result = await mono.process(context)
+
+    assert "intent" in result, "Missing intent field"
+    assert "tone" in result, "Missing tone field"
+    assert "depth" in result, "Missing depth field"
+    assert "consult_executive" in result, "Missing consult_executive field"
+
+    print("✓ Monologue integration test passed")
+    print(f"  Result: {json.dumps(result, indent=2)}")
+
+    return result
+
+
+async def test_executive_planning():
+    """Test executive planner generates valid plans."""
+    print("\n" + "="*60)
+    print("TEST 2: Executive Planning")
+    print("="*60)
+
+    plan = await plan_execution(
+        user_prompt="Help me build a distributed system with microservices architecture",
+        intent="technical_implementation",
+        context_state={
+            "tools_available": ["RAG", "WEB", "CODEBRAIN"],
+            "message_count": 3,
+            "minutes_since_last_msg": 2.5,
+            "active_project": None
+        },
+        identity_block={}
+    )
+
+    assert "summary" in plan, "Missing summary field"
+    assert "plan_text" in plan, "Missing plan_text field"
+    assert "steps" in plan, "Missing steps field"
+    assert len(plan["steps"]) > 0, "No steps generated"
+
+    print("✓ Executive planning test passed")
+    print(f"  Plan summary: {plan['summary']}")
+    print(f"  Steps: {len(plan['steps'])}")
+    print(f"  Complexity: {plan.get('estimated_complexity', 'unknown')}")
+
+    return plan
+
+
+def test_self_state_persistence():
+    """Test self-state loads and updates."""
+    print("\n" + "="*60)
+    print("TEST 3: Self-State Persistence")
+    print("="*60)
+
+    state1 = load_self_state()
+    assert "mood" in state1, "Missing mood field"
+    assert "energy" in state1, "Missing energy field"
+    assert "interaction_count" in state1, "Missing interaction_count"
+
+    initial_count = state1.get("interaction_count", 0)
+    print(f"  Initial interaction count: {initial_count}")
+
+    update_self_state(
+        mood_delta=0.1,
+        energy_delta=-0.05,
+        new_focus="testing"
+    )
+
+    state2 = load_self_state()
+    assert state2["interaction_count"] == initial_count + 1, "Interaction count not incremented"
+    assert state2["focus"] == "testing", "Focus not updated"
+
+    print("✓ Self-state persistence test passed")
+    print(f"  New interaction count: {state2['interaction_count']}")
+    print(f"  New focus: {state2['focus']}")
+    print(f"  New energy: {state2['energy']:.2f}")
+
+    return state2
+
+
+async def test_end_to_end_flow():
+    """Test complete flow from monologue through planning."""
+    print("\n" + "="*60)
+    print("TEST 4: End-to-End Flow")
+    print("="*60)
+
+    # Step 1: Monologue detects complex query
+    mono = InnerMonologue()
+    mono_result = await mono.process({
+        "user_message": "Design a scalable ML pipeline with CI/CD integration",
+        "session_id": "test_e2e",
+        "self_state": load_self_state(),
+        "context_summary": {}
+    })
+
+    print(f"  Monologue intent: {mono_result.get('intent')}")
+    print(f"  Consult executive: {mono_result.get('consult_executive')}")
+
+    # Step 2: If executive requested, generate plan
+    if mono_result.get("consult_executive"):
+        plan = await plan_execution(
+            user_prompt="Design a scalable ML pipeline with CI/CD integration",
+            intent=mono_result.get("intent", "unknown"),
+            context_state={"tools_available": ["CODEBRAIN", "WEB"]},
+            identity_block={}
+        )
+
+        assert plan is not None, "Plan should be generated"
+        print(f"  Executive plan generated: {len(plan.get('steps', []))} steps")
+
+    # Step 3: Update self-state
+    update_self_state(
+        energy_delta=-0.1,  # Complex task is tiring
+        new_focus="ml_pipeline_design",
+        confidence_delta=0.05
+    )
+
+    state = load_self_state()
+    assert state["focus"] == "ml_pipeline_design", "Focus should be updated"
+
+    print("✓ End-to-end flow test passed")
+    print(f"  Final state: {state['mood']}, energy={state['energy']:.2f}")
+
+    return True
+
+
+async def run_all_tests():
+    """Run all Phase 1 tests."""
+    print("\n" + "="*60)
+    print("PHASE 1 AUTONOMY TESTS")
+    print("="*60)
+
+    try:
+        # Test 1: Monologue
+        mono_result = await test_monologue_integration()
+
+        # Test 2: Executive Planning
+        plan_result = await test_executive_planning()
+
+        # Test 3: Self-State
+        state_result = test_self_state_persistence()
+
+        # Test 4: End-to-End
+        await test_end_to_end_flow()
+
+        print("\n" + "="*60)
+        print("ALL TESTS PASSED ✓")
+        print("="*60)
+
+        print("\nSummary:")
+        print(f"  - Monologue: {mono_result.get('intent')} ({mono_result.get('tone')})")
+        print(f"  - Executive: {plan_result.get('estimated_complexity')} complexity")
+        print(f"  - Self-state: {state_result.get('interaction_count')} interactions")
+
+        return True
+
+    except Exception as e:
+        print("\n" + "="*60)
+        print(f"TEST FAILED: {e}")
+        print("="*60)
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    success = asyncio.run(run_all_tests())
+    sys.exit(0 if success else 1)

From e2e55a0fda4ad37573927a5f352ac1b5a205d7f2 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Sun, 14 Dec 2025 14:43:08 -0500
Subject: [PATCH 04/10] autonomy phase 2

---
 cortex/autonomy/actions/__init__.py           |   1 +
 cortex/autonomy/actions/autonomous_actions.py | 480 +++++++++++++++++
 cortex/autonomy/learning/__init__.py          |   1 +
 cortex/autonomy/learning/pattern_learner.py   | 383 ++++++++++++++
 cortex/autonomy/proactive/__init__.py         |   1 +
 cortex/autonomy/proactive/monitor.py          | 321 ++++++++++++
 cortex/autonomy/tools/__init__.py             |   1 +
 cortex/autonomy/tools/decision_engine.py      | 124 +++++
 cortex/autonomy/tools/orchestrator.py         | 354 +++++++++++++
 cortex/data/self_state.json                   |   6 +-
 cortex/router.py                              |  95 +++-
 cortex/tests/test_autonomy_phase2.py          | 495 ++++++++++++++++++
 12 files changed, 2258 insertions(+), 4 deletions(-)
 create mode 100644 cortex/autonomy/actions/__init__.py
 create mode 100644 cortex/autonomy/actions/autonomous_actions.py
 create mode 100644 cortex/autonomy/learning/__init__.py
 create mode 100644 cortex/autonomy/learning/pattern_learner.py
 create mode 100644 cortex/autonomy/proactive/__init__.py
 create mode 100644 cortex/autonomy/proactive/monitor.py
 create mode 100644 cortex/autonomy/tools/__init__.py
 create mode 100644 cortex/autonomy/tools/decision_engine.py
 create mode 100644 cortex/autonomy/tools/orchestrator.py
 create mode 100644 cortex/tests/test_autonomy_phase2.py

diff --git a/cortex/autonomy/actions/__init__.py b/cortex/autonomy/actions/__init__.py
new file mode 100644
index 0000000..f7f9355
--- /dev/null
+++ b/cortex/autonomy/actions/__init__.py
@@ -0,0 +1 @@
+"""Autonomous action execution system."""
diff --git a/cortex/autonomy/actions/autonomous_actions.py b/cortex/autonomy/actions/autonomous_actions.py
new file mode 100644
index 0000000..98d573e
--- /dev/null
+++ b/cortex/autonomy/actions/autonomous_actions.py
@@ -0,0 +1,480 @@
+"""
+Autonomous Action Manager - executes safe, self-initiated actions.
+"""
+
+import logging
+import json
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+
+class AutonomousActionManager:
+    """
+    Manages safe autonomous actions that Lyra can take without explicit user prompting.
+
+    Whitelist of allowed actions:
+    - create_memory: Store information in NeoMem
+    - update_goal: Modify goal status
+    - schedule_reminder: Create future reminder
+    - summarize_session: Generate conversation summary
+    - learn_topic: Add topic to learning queue
+    - update_focus: Change current focus area
+    """
+
+    def __init__(self):
+        """Initialize action manager with whitelisted actions."""
+        self.allowed_actions = {
+            "create_memory": self._create_memory,
+            "update_goal": self._update_goal,
+            "schedule_reminder": self._schedule_reminder,
+            "summarize_session": self._summarize_session,
+            "learn_topic": self._learn_topic,
+            "update_focus": self._update_focus
+        }
+
+        self.action_log = []  # Track all actions for audit
+
+    async def execute_action(
+        self,
+        action_type: str,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Execute a single autonomous action.
+
+        Args:
+            action_type: Type of action (must be in whitelist)
+            parameters: Action-specific parameters
+            context: Current context state
+
+        Returns:
+            {
+                "success": bool,
+                "action": action_type,
+                "result": action_result,
+                "timestamp": ISO timestamp,
+                "error": optional error message
+            }
+        """
+        # Safety check: action must be whitelisted
+        if action_type not in self.allowed_actions:
+            logger.error(f"[ACTIONS] Attempted to execute non-whitelisted action: {action_type}")
+            return {
+                "success": False,
+                "action": action_type,
+                "error": f"Action '{action_type}' not in whitelist",
+                "timestamp": datetime.utcnow().isoformat()
+            }
+
+        try:
+            logger.info(f"[ACTIONS] Executing autonomous action: {action_type}")
+
+            # Execute the action
+            action_func = self.allowed_actions[action_type]
+            result = await action_func(parameters, context)
+
+            # Log successful action
+            action_record = {
+                "success": True,
+                "action": action_type,
+                "result": result,
+                "timestamp": datetime.utcnow().isoformat(),
+                "parameters": parameters
+            }
+
+            self.action_log.append(action_record)
+            logger.info(f"[ACTIONS] Action {action_type} completed successfully")
+
+            return action_record
+
+        except Exception as e:
+            logger.error(f"[ACTIONS] Action {action_type} failed: {e}")
+
+            error_record = {
+                "success": False,
+                "action": action_type,
+                "error": str(e),
+                "timestamp": datetime.utcnow().isoformat(),
+                "parameters": parameters
+            }
+
+            self.action_log.append(error_record)
+            return error_record
+
+    async def execute_batch(
+        self,
+        actions: List[Dict[str, Any]],
+        context: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """
+        Execute multiple actions sequentially.
+
+        Args:
+            actions: List of {"action": str, "parameters": dict}
+            context: Current context state
+
+        Returns:
+            List of action results
+        """
+        results = []
+
+        for action_spec in actions:
+            action_type = action_spec.get("action")
+            parameters = action_spec.get("parameters", {})
+
+            result = await self.execute_action(action_type, parameters, context)
+            results.append(result)
+
+            # Stop on first failure if critical
+            if not result["success"] and action_spec.get("critical", False):
+                logger.warning(f"[ACTIONS] Critical action {action_type} failed, stopping batch")
+                break
+
+        return results
+
+    # ========================================
+    # Whitelisted Action Implementations
+    # ========================================
+
+    async def _create_memory(
+        self,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Create a memory entry in NeoMem.
+
+        Parameters:
+            - text: Memory content (required)
+            - tags: Optional tags for memory
+            - importance: 0.0-1.0 importance score
+        """
+        text = parameters.get("text")
+        if not text:
+            raise ValueError("Memory text required")
+
+        tags = parameters.get("tags", [])
+        importance = parameters.get("importance", 0.5)
+        session_id = context.get("session_id", "autonomous")
+
+        # Import NeoMem client
+        try:
+            from memory.neomem_client import store_memory
+
+            result = await store_memory(
+                text=text,
+                session_id=session_id,
+                tags=tags,
+                importance=importance
+            )
+
+            return {
+                "memory_id": result.get("id"),
+                "text": text[:50] + "..." if len(text) > 50 else text
+            }
+
+        except ImportError:
+            logger.warning("[ACTIONS] NeoMem client not available, simulating memory storage")
+            return {
+                "memory_id": "simulated",
+                "text": text[:50] + "..." if len(text) > 50 else text,
+                "note": "NeoMem not available, memory not persisted"
+            }
+
+    async def _update_goal(
+        self,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Update goal status in self-state.
+
+        Parameters:
+            - goal_id: Goal identifier (required)
+            - status: New status (pending/in_progress/completed)
+            - progress: Optional progress note
+        """
+        goal_id = parameters.get("goal_id")
+        if not goal_id:
+            raise ValueError("goal_id required")
+
+        status = parameters.get("status", "in_progress")
+        progress = parameters.get("progress")
+
+        # Import self-state manager
+        from autonomy.self.state import get_self_state_instance
+
+        state = get_self_state_instance()
+        active_goals = state._state.get("active_goals", [])
+
+        # Find and update goal
+        updated = False
+        for goal in active_goals:
+            if isinstance(goal, dict) and goal.get("id") == goal_id:
+                goal["status"] = status
+                if progress:
+                    goal["progress"] = progress
+                goal["updated_at"] = datetime.utcnow().isoformat()
+                updated = True
+                break
+
+        if updated:
+            state._save_state()
+            return {
+                "goal_id": goal_id,
+                "status": status,
+                "updated": True
+            }
+        else:
+            return {
+                "goal_id": goal_id,
+                "updated": False,
+                "note": "Goal not found"
+            }
+
+    async def _schedule_reminder(
+        self,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Schedule a future reminder.
+
+        Parameters:
+            - message: Reminder text (required)
+            - delay_minutes: Minutes until reminder
+            - priority: 0.0-1.0 priority score
+        """
+        message = parameters.get("message")
+        if not message:
+            raise ValueError("Reminder message required")
+
+        delay_minutes = parameters.get("delay_minutes", 60)
+        priority = parameters.get("priority", 0.5)
+
+        # For now, store in self-state's learning queue
+        # In future: integrate with scheduler/cron system
+        from autonomy.self.state import get_self_state_instance
+
+        state = get_self_state_instance()
+
+        reminder = {
+            "type": "reminder",
+            "message": message,
+            "scheduled_at": datetime.utcnow().isoformat(),
+            "trigger_at_minutes": delay_minutes,
+            "priority": priority
+        }
+
+        # Add to learning queue as placeholder
+        state._state.setdefault("reminders", []).append(reminder)
+        state._save_state(state._state)  # Pass state dict as argument
+
+        logger.info(f"[ACTIONS] Reminder scheduled: {message} (in {delay_minutes}min)")
+
+        return {
+            "message": message,
+            "delay_minutes": delay_minutes,
+            "note": "Reminder stored in self-state (scheduler integration pending)"
+        }
+
+    async def _summarize_session(
+        self,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Generate a summary of current session.
+
+        Parameters:
+            - max_length: Max summary length in words
+            - focus_topics: Optional list of topics to emphasize
+        """
+        max_length = parameters.get("max_length", 200)
+        session_id = context.get("session_id", "unknown")
+
+        # Import summarizer (from deferred_summary or create simple one)
+        try:
+            from utils.deferred_summary import summarize_conversation
+
+            summary = await summarize_conversation(
+                session_id=session_id,
+                max_words=max_length
+            )
+
+            return {
+                "summary": summary,
+                "word_count": len(summary.split())
+            }
+
+        except ImportError:
+            # Fallback: simple summary
+            message_count = context.get("message_count", 0)
+            focus = context.get("monologue", {}).get("intent", "general")
+
+            summary = f"Session {session_id}: {message_count} messages exchanged, focused on {focus}."
+
+            return {
+                "summary": summary,
+                "word_count": len(summary.split()),
+                "note": "Simple summary (full summarizer not available)"
+            }
+
+    async def _learn_topic(
+        self,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Add topic to learning queue.
+
+        Parameters:
+            - topic: Topic name (required)
+            - reason: Why this topic
+            - priority: 0.0-1.0 priority score
+        """
+        topic = parameters.get("topic")
+        if not topic:
+            raise ValueError("Topic required")
+
+        reason = parameters.get("reason", "autonomous learning")
+        priority = parameters.get("priority", 0.5)
+
+        # Import self-state manager
+        from autonomy.self.state import get_self_state_instance
+
+        state = get_self_state_instance()
+        state.add_learning_goal(topic)  # Only pass topic parameter
+
+        logger.info(f"[ACTIONS] Added to learning queue: {topic} (reason: {reason})")
+
+        return {
+            "topic": topic,
+            "reason": reason,
+            "queue_position": len(state._state.get("learning_queue", []))
+        }
+
+    async def _update_focus(
+        self,
+        parameters: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Update current focus area.
+
+        Parameters:
+            - focus: New focus area (required)
+            - reason: Why this focus
+        """
+        focus = parameters.get("focus")
+        if not focus:
+            raise ValueError("Focus required")
+
+        reason = parameters.get("reason", "autonomous update")
+
+        # Import self-state manager
+        from autonomy.self.state import get_self_state_instance
+
+        state = get_self_state_instance()
+        old_focus = state._state.get("focus", "none")
+
+        state._state["focus"] = focus
+        state._state["focus_updated_at"] = datetime.utcnow().isoformat()
+        state._state["focus_reason"] = reason
+        state._save_state(state._state)  # Pass state dict as argument
+
+        logger.info(f"[ACTIONS] Focus updated: {old_focus} -> {focus}")
+
+        return {
+            "old_focus": old_focus,
+            "new_focus": focus,
+            "reason": reason
+        }
+
+    # ========================================
+    # Utility Methods
+    # ========================================
+
+    def get_allowed_actions(self) -> List[str]:
+        """Get list of all allowed action types."""
+        return list(self.allowed_actions.keys())
+
+    def get_action_log(self, limit: int = 50) -> List[Dict[str, Any]]:
+        """
+        Get recent action log.
+
+        Args:
+            limit: Max number of entries to return
+
+        Returns:
+            List of action records
+        """
+        return self.action_log[-limit:]
+
+    def clear_action_log(self) -> None:
+        """Clear action log."""
+        self.action_log = []
+        logger.info("[ACTIONS] Action log cleared")
+
+    def validate_action(self, action_type: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Validate an action without executing it.
+
+        Args:
+            action_type: Type of action
+            parameters: Action parameters
+
+        Returns:
+            {
+                "valid": bool,
+                "action": action_type,
+                "errors": [error messages] or []
+            }
+        """
+        errors = []
+
+        # Check whitelist
+        if action_type not in self.allowed_actions:
+            errors.append(f"Action '{action_type}' not in whitelist")
+
+        # Check required parameters (basic validation)
+        if action_type == "create_memory" and not parameters.get("text"):
+            errors.append("Memory 'text' parameter required")
+
+        if action_type == "update_goal" and not parameters.get("goal_id"):
+            errors.append("Goal 'goal_id' parameter required")
+
+        if action_type == "schedule_reminder" and not parameters.get("message"):
+            errors.append("Reminder 'message' parameter required")
+
+        if action_type == "learn_topic" and not parameters.get("topic"):
+            errors.append("Learning 'topic' parameter required")
+
+        if action_type == "update_focus" and not parameters.get("focus"):
+            errors.append("Focus 'focus' parameter required")
+
+        return {
+            "valid": len(errors) == 0,
+            "action": action_type,
+            "errors": errors
+        }
+
+
+# Singleton instance
+_action_manager_instance = None
+
+
+def get_action_manager() -> AutonomousActionManager:
+    """
+    Get singleton action manager instance.
+
+    Returns:
+        AutonomousActionManager instance
+    """
+    global _action_manager_instance
+    if _action_manager_instance is None:
+        _action_manager_instance = AutonomousActionManager()
+    return _action_manager_instance
diff --git a/cortex/autonomy/learning/__init__.py b/cortex/autonomy/learning/__init__.py
new file mode 100644
index 0000000..aa193cb
--- /dev/null
+++ b/cortex/autonomy/learning/__init__.py
@@ -0,0 +1 @@
+"""Pattern learning and adaptation system."""
diff --git a/cortex/autonomy/learning/pattern_learner.py b/cortex/autonomy/learning/pattern_learner.py
new file mode 100644
index 0000000..61dd74c
--- /dev/null
+++ b/cortex/autonomy/learning/pattern_learner.py
@@ -0,0 +1,383 @@
+"""
+Pattern Learning System - learns from interaction patterns to improve autonomy.
+"""
+
+import logging
+import json
+import os
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+from collections import defaultdict
+
+logger = logging.getLogger(__name__)
+
+
+class PatternLearner:
+    """
+    Learns from interaction patterns to improve Lyra's autonomous behavior.
+
+    Tracks:
+    - Topic frequencies (what users talk about)
+    - Time-of-day patterns (when users interact)
+    - User preferences (how users like responses)
+    - Successful response strategies (what works well)
+    """
+
+    def __init__(self, patterns_file: str = "/app/data/learned_patterns.json"):
+        """
+        Initialize pattern learner.
+
+        Args:
+            patterns_file: Path to persistent patterns storage
+        """
+        self.patterns_file = patterns_file
+        self.patterns = self._load_patterns()
+
+    def _load_patterns(self) -> Dict[str, Any]:
+        """Load patterns from disk."""
+        if os.path.exists(self.patterns_file):
+            try:
+                with open(self.patterns_file, 'r') as f:
+                    patterns = json.load(f)
+                    logger.info(f"[PATTERN_LEARNER] Loaded patterns from {self.patterns_file}")
+                    return patterns
+            except Exception as e:
+                logger.error(f"[PATTERN_LEARNER] Failed to load patterns: {e}")
+
+        # Initialize empty patterns
+        return {
+            "topic_frequencies": {},
+            "time_patterns": {},
+            "user_preferences": {},
+            "successful_strategies": {},
+            "interaction_count": 0,
+            "last_updated": datetime.utcnow().isoformat()
+        }
+
+    def _save_patterns(self) -> None:
+        """Save patterns to disk."""
+        try:
+            # Ensure directory exists
+            os.makedirs(os.path.dirname(self.patterns_file), exist_ok=True)
+
+            self.patterns["last_updated"] = datetime.utcnow().isoformat()
+
+            with open(self.patterns_file, 'w') as f:
+                json.dump(self.patterns, f, indent=2)
+
+            logger.debug(f"[PATTERN_LEARNER] Saved patterns to {self.patterns_file}")
+
+        except Exception as e:
+            logger.error(f"[PATTERN_LEARNER] Failed to save patterns: {e}")
+
+    async def learn_from_interaction(
+        self,
+        user_prompt: str,
+        response: str,
+        monologue: Dict[str, Any],
+        context: Dict[str, Any]
+    ) -> None:
+        """
+        Learn from a single interaction.
+
+        Args:
+            user_prompt: User's message
+            response: Lyra's response
+            monologue: Inner monologue analysis
+            context: Full context state
+        """
+        self.patterns["interaction_count"] += 1
+
+        # Learn topic frequencies
+        self._learn_topics(user_prompt, monologue)
+
+        # Learn time patterns
+        self._learn_time_patterns()
+
+        # Learn user preferences
+        self._learn_preferences(monologue, context)
+
+        # Learn successful strategies
+        self._learn_strategies(monologue, response, context)
+
+        # Save periodically (every 10 interactions)
+        if self.patterns["interaction_count"] % 10 == 0:
+            self._save_patterns()
+
+    def _learn_topics(self, user_prompt: str, monologue: Dict[str, Any]) -> None:
+        """Track topic frequencies."""
+        intent = monologue.get("intent", "unknown")
+
+        # Increment topic counter
+        topic_freq = self.patterns["topic_frequencies"]
+        topic_freq[intent] = topic_freq.get(intent, 0) + 1
+
+        # Extract keywords (simple approach - words > 5 chars)
+        keywords = [word.lower() for word in user_prompt.split() if len(word) > 5]
+
+        for keyword in keywords:
+            topic_freq[f"keyword:{keyword}"] = topic_freq.get(f"keyword:{keyword}", 0) + 1
+
+        logger.debug(f"[PATTERN_LEARNER] Topic learned: {intent}")
+
+    def _learn_time_patterns(self) -> None:
+        """Track time-of-day patterns."""
+        now = datetime.utcnow()
+        hour = now.hour
+
+        # Track interactions by hour
+        time_patterns = self.patterns["time_patterns"]
+        hour_key = f"hour_{hour:02d}"
+        time_patterns[hour_key] = time_patterns.get(hour_key, 0) + 1
+
+        # Track day of week
+        day_key = f"day_{now.strftime('%A').lower()}"
+        time_patterns[day_key] = time_patterns.get(day_key, 0) + 1
+
+    def _learn_preferences(self, monologue: Dict[str, Any], context: Dict[str, Any]) -> None:
+        """Learn user preferences from detected tone and depth."""
+        tone = monologue.get("tone", "neutral")
+        depth = monologue.get("depth", "medium")
+
+        prefs = self.patterns["user_preferences"]
+
+        # Track preferred tone
+        prefs.setdefault("tone_counts", {})
+        prefs["tone_counts"][tone] = prefs["tone_counts"].get(tone, 0) + 1
+
+        # Track preferred depth
+        prefs.setdefault("depth_counts", {})
+        prefs["depth_counts"][depth] = prefs["depth_counts"].get(depth, 0) + 1
+
+    def _learn_strategies(
+        self,
+        monologue: Dict[str, Any],
+        response: str,
+        context: Dict[str, Any]
+    ) -> None:
+        """
+        Learn which response strategies are successful.
+
+        Success indicators:
+        - Executive was consulted and plan generated
+        - Response length matches depth request
+        - Tone matches request
+        """
+        intent = monologue.get("intent", "unknown")
+        executive_used = context.get("executive_plan") is not None
+
+        strategies = self.patterns["successful_strategies"]
+        strategies.setdefault(intent, {})
+
+        # Track executive usage for this intent
+        if executive_used:
+            key = f"{intent}:executive_used"
+            strategies.setdefault(key, 0)
+            strategies[key] += 1
+
+        # Track response length patterns
+        response_length = len(response.split())
+        depth = monologue.get("depth", "medium")
+
+        length_key = f"{depth}:avg_words"
+        if length_key not in strategies:
+            strategies[length_key] = response_length
+        else:
+            # Running average
+            strategies[length_key] = (strategies[length_key] + response_length) / 2
+
+    # ========================================
+    # Pattern Analysis and Recommendations
+    # ========================================
+
+    def get_top_topics(self, limit: int = 10) -> List[tuple]:
+        """
+        Get most frequent topics.
+
+        Args:
+            limit: Max number of topics to return
+
+        Returns:
+            List of (topic, count) tuples, sorted by count
+        """
+        topics = self.patterns["topic_frequencies"]
+        sorted_topics = sorted(topics.items(), key=lambda x: x[1], reverse=True)
+        return sorted_topics[:limit]
+
+    def get_preferred_tone(self) -> str:
+        """
+        Get user's most preferred tone.
+
+        Returns:
+            Preferred tone string
+        """
+        prefs = self.patterns["user_preferences"]
+        tone_counts = prefs.get("tone_counts", {})
+
+        if not tone_counts:
+            return "neutral"
+
+        return max(tone_counts.items(), key=lambda x: x[1])[0]
+
+    def get_preferred_depth(self) -> str:
+        """
+        Get user's most preferred response depth.
+
+        Returns:
+            Preferred depth string
+        """
+        prefs = self.patterns["user_preferences"]
+        depth_counts = prefs.get("depth_counts", {})
+
+        if not depth_counts:
+            return "medium"
+
+        return max(depth_counts.items(), key=lambda x: x[1])[0]
+
+    def get_peak_hours(self, limit: int = 3) -> List[int]:
+        """
+        Get peak interaction hours.
+
+        Args:
+            limit: Number of top hours to return
+
+        Returns:
+            List of hours (0-23)
+        """
+        time_patterns = self.patterns["time_patterns"]
+        hour_counts = {k: v for k, v in time_patterns.items() if k.startswith("hour_")}
+
+        if not hour_counts:
+            return []
+
+        sorted_hours = sorted(hour_counts.items(), key=lambda x: x[1], reverse=True)
+        top_hours = sorted_hours[:limit]
+
+        # Extract hour numbers
+        return [int(h[0].split("_")[1]) for h in top_hours]
+
+    def should_use_executive(self, intent: str) -> bool:
+        """
+        Recommend whether to use executive for given intent based on patterns.
+
+        Args:
+            intent: Intent type
+
+        Returns:
+            True if executive is recommended
+        """
+        strategies = self.patterns["successful_strategies"]
+        key = f"{intent}:executive_used"
+
+        # If we've used executive for this intent >= 3 times, recommend it
+        return strategies.get(key, 0) >= 3
+
+    def get_recommended_response_length(self, depth: str) -> int:
+        """
+        Get recommended response length in words for given depth.
+
+        Args:
+            depth: Depth level (short/medium/deep)
+
+        Returns:
+            Recommended word count
+        """
+        strategies = self.patterns["successful_strategies"]
+        key = f"{depth}:avg_words"
+
+        avg_length = strategies.get(key, None)
+
+        if avg_length:
+            return int(avg_length)
+
+        # Defaults if no pattern learned
+        defaults = {
+            "short": 50,
+            "medium": 150,
+            "deep": 300
+        }
+
+        return defaults.get(depth, 150)
+
+    def get_insights(self) -> Dict[str, Any]:
+        """
+        Get high-level insights from learned patterns.
+
+        Returns:
+            {
+                "total_interactions": int,
+                "top_topics": [(topic, count), ...],
+                "preferred_tone": str,
+                "preferred_depth": str,
+                "peak_hours": [hours],
+                "learning_recommendations": [str]
+            }
+        """
+        recommendations = []
+
+        # Check if user consistently prefers certain settings
+        preferred_tone = self.get_preferred_tone()
+        preferred_depth = self.get_preferred_depth()
+
+        if preferred_tone != "neutral":
+            recommendations.append(f"User prefers {preferred_tone} tone")
+
+        if preferred_depth != "medium":
+            recommendations.append(f"User prefers {preferred_depth} depth responses")
+
+        # Check for recurring topics
+        top_topics = self.get_top_topics(limit=3)
+        if top_topics:
+            top_topic = top_topics[0][0]
+            recommendations.append(f"Consider adding '{top_topic}' to learning queue")
+
+        return {
+            "total_interactions": self.patterns["interaction_count"],
+            "top_topics": self.get_top_topics(limit=5),
+            "preferred_tone": preferred_tone,
+            "preferred_depth": preferred_depth,
+            "peak_hours": self.get_peak_hours(limit=3),
+            "learning_recommendations": recommendations
+        }
+
+    def reset_patterns(self) -> None:
+        """Reset all learned patterns (use with caution)."""
+        self.patterns = {
+            "topic_frequencies": {},
+            "time_patterns": {},
+            "user_preferences": {},
+            "successful_strategies": {},
+            "interaction_count": 0,
+            "last_updated": datetime.utcnow().isoformat()
+        }
+        self._save_patterns()
+        logger.warning("[PATTERN_LEARNER] Patterns reset")
+
+    def export_patterns(self) -> Dict[str, Any]:
+        """
+        Export all patterns for analysis.
+
+        Returns:
+            Complete patterns dict
+        """
+        return self.patterns.copy()
+
+
+# Singleton instance
+_learner_instance = None
+
+
+def get_pattern_learner(patterns_file: str = "/app/data/learned_patterns.json") -> PatternLearner:
+    """
+    Get singleton pattern learner instance.
+
+    Args:
+        patterns_file: Path to patterns file (only used on first call)
+
+    Returns:
+        PatternLearner instance
+    """
+    global _learner_instance
+    if _learner_instance is None:
+        _learner_instance = PatternLearner(patterns_file=patterns_file)
+    return _learner_instance
diff --git a/cortex/autonomy/proactive/__init__.py b/cortex/autonomy/proactive/__init__.py
new file mode 100644
index 0000000..056c046
--- /dev/null
+++ b/cortex/autonomy/proactive/__init__.py
@@ -0,0 +1 @@
+"""Proactive monitoring and suggestion system."""
diff --git a/cortex/autonomy/proactive/monitor.py b/cortex/autonomy/proactive/monitor.py
new file mode 100644
index 0000000..c324709
--- /dev/null
+++ b/cortex/autonomy/proactive/monitor.py
@@ -0,0 +1,321 @@
+"""
+Proactive Context Monitor - detects opportunities for autonomous suggestions.
+"""
+
+import logging
+import time
+from typing import Dict, List, Any, Optional
+from datetime import datetime, timedelta
+
+logger = logging.getLogger(__name__)
+
+
+class ProactiveMonitor:
+    """
+    Monitors conversation context and detects opportunities for proactive suggestions.
+
+    Triggers:
+    - Long silence → Check-in
+    - Learning queue + high curiosity → Suggest exploration
+    - Active goals → Progress reminders
+    - Conversation milestones → Offer summary
+    - Pattern detection → Helpful suggestions
+    """
+
+    def __init__(self, min_priority: float = 0.6):
+        """
+        Initialize proactive monitor.
+
+        Args:
+            min_priority: Minimum priority for suggestions (0.0-1.0)
+        """
+        self.min_priority = min_priority
+        self.last_suggestion_time = {}  # session_id -> timestamp
+        self.cooldown_seconds = 300  # 5 minutes between proactive suggestions
+
+    async def analyze_session(
+        self,
+        session_id: str,
+        context_state: Dict[str, Any],
+        self_state: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Analyze session for proactive suggestion opportunities.
+
+        Args:
+            session_id: Current session ID
+            context_state: Full context including message history
+            self_state: Lyra's current self-state
+
+        Returns:
+            {
+                "suggestion": "text to append to response",
+                "priority": 0.0-1.0,
+                "reason": "why this suggestion",
+                "type": "check_in | learning | goal_reminder | summary | pattern"
+            }
+            or None if no suggestion
+        """
+        # Check cooldown
+        if not self._check_cooldown(session_id):
+            logger.debug(f"[PROACTIVE] Session {session_id} in cooldown, skipping")
+            return None
+
+        suggestions = []
+
+        # Check 1: Long silence detection
+        silence_suggestion = self._check_long_silence(context_state)
+        if silence_suggestion:
+            suggestions.append(silence_suggestion)
+
+        # Check 2: Learning queue + high curiosity
+        learning_suggestion = self._check_learning_opportunity(self_state)
+        if learning_suggestion:
+            suggestions.append(learning_suggestion)
+
+        # Check 3: Active goals reminder
+        goal_suggestion = self._check_active_goals(self_state, context_state)
+        if goal_suggestion:
+            suggestions.append(goal_suggestion)
+
+        # Check 4: Conversation milestones
+        milestone_suggestion = self._check_conversation_milestone(context_state)
+        if milestone_suggestion:
+            suggestions.append(milestone_suggestion)
+
+        # Check 5: Pattern-based suggestions
+        pattern_suggestion = self._check_patterns(context_state, self_state)
+        if pattern_suggestion:
+            suggestions.append(pattern_suggestion)
+
+        # Filter by priority and return highest
+        valid_suggestions = [s for s in suggestions if s["priority"] >= self.min_priority]
+
+        if not valid_suggestions:
+            return None
+
+        # Return highest priority suggestion
+        best_suggestion = max(valid_suggestions, key=lambda x: x["priority"])
+
+        # Update cooldown timer
+        self._update_cooldown(session_id)
+
+        logger.info(f"[PROACTIVE] Suggestion generated: {best_suggestion['type']} (priority: {best_suggestion['priority']:.2f})")
+
+        return best_suggestion
+
+    def _check_cooldown(self, session_id: str) -> bool:
+        """Check if session is past cooldown period."""
+        if session_id not in self.last_suggestion_time:
+            return True
+
+        elapsed = time.time() - self.last_suggestion_time[session_id]
+        return elapsed >= self.cooldown_seconds
+
+    def _update_cooldown(self, session_id: str) -> None:
+        """Update cooldown timer for session."""
+        self.last_suggestion_time[session_id] = time.time()
+
+    def _check_long_silence(self, context_state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """
+        Check if user has been silent for a long time.
+        """
+        minutes_since_last = context_state.get("minutes_since_last_msg", 0)
+
+        # If > 30 minutes, suggest check-in
+        if minutes_since_last > 30:
+            return {
+                "suggestion": "\n\n[Aside: I'm still here if you need anything!]",
+                "priority": 0.7,
+                "reason": f"User silent for {minutes_since_last:.0f} minutes",
+                "type": "check_in"
+            }
+
+        return None
+
+    def _check_learning_opportunity(self, self_state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """
+        Check if Lyra has learning queue items and high curiosity.
+        """
+        learning_queue = self_state.get("learning_queue", [])
+        curiosity = self_state.get("curiosity", 0.5)
+
+        # If curiosity > 0.7 and learning queue exists
+        if curiosity > 0.7 and learning_queue:
+            topic = learning_queue[0] if learning_queue else "new topics"
+            return {
+                "suggestion": f"\n\n[Aside: I've been curious about {topic} lately. Would you like to explore it together?]",
+                "priority": 0.65,
+                "reason": f"High curiosity ({curiosity:.2f}) and learning queue present",
+                "type": "learning"
+            }
+
+        return None
+
+    def _check_active_goals(
+        self,
+        self_state: Dict[str, Any],
+        context_state: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Check if there are active goals worth reminding about.
+        """
+        active_goals = self_state.get("active_goals", [])
+
+        if not active_goals:
+            return None
+
+        # Check if we've had multiple messages without goal progress
+        message_count = context_state.get("message_count", 0)
+
+        # Every 10 messages, consider goal reminder
+        if message_count % 10 == 0 and message_count > 0:
+            goal = active_goals[0]  # First active goal
+            goal_name = goal if isinstance(goal, str) else goal.get("name", "your goal")
+
+            return {
+                "suggestion": f"\n\n[Aside: Still thinking about {goal_name}. Let me know if you want to work on it.]",
+                "priority": 0.6,
+                "reason": f"Active goal present, {message_count} messages since start",
+                "type": "goal_reminder"
+            }
+
+        return None
+
+    def _check_conversation_milestone(self, context_state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """
+        Check for conversation milestones (e.g., every 50 messages).
+        """
+        message_count = context_state.get("message_count", 0)
+
+        # Every 50 messages, offer summary
+        if message_count > 0 and message_count % 50 == 0:
+            return {
+                "suggestion": f"\n\n[Aside: We've exchanged {message_count} messages! Would you like a summary of our conversation?]",
+                "priority": 0.65,
+                "reason": f"Milestone: {message_count} messages",
+                "type": "summary"
+            }
+
+        return None
+
+    def _check_patterns(
+        self,
+        context_state: Dict[str, Any],
+        self_state: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Check for behavioral patterns that merit suggestions.
+        """
+        # Get current focus
+        focus = self_state.get("focus", "")
+
+        # Check if user keeps asking similar questions (detected via focus)
+        if focus and "repeated" in focus.lower():
+            return {
+                "suggestion": "\n\n[Aside: I notice we keep coming back to this topic. Would it help to create a summary or action plan?]",
+                "priority": 0.7,
+                "reason": "Repeated topic detected",
+                "type": "pattern"
+            }
+
+        # Check energy levels - if Lyra is low energy, maybe suggest break
+        energy = self_state.get("energy", 0.8)
+        if energy < 0.3:
+            return {
+                "suggestion": "\n\n[Aside: We've been at this for a while. Need a break or want to keep going?]",
+                "priority": 0.65,
+                "reason": f"Low energy ({energy:.2f})",
+                "type": "pattern"
+            }
+
+        return None
+
+    def format_suggestion(self, suggestion: Dict[str, Any]) -> str:
+        """
+        Format suggestion for appending to response.
+
+        Args:
+            suggestion: Suggestion dict from analyze_session()
+
+        Returns:
+            Formatted string to append to response
+        """
+        return suggestion.get("suggestion", "")
+
+    def set_cooldown_duration(self, seconds: int) -> None:
+        """
+        Update cooldown duration.
+
+        Args:
+            seconds: New cooldown duration
+        """
+        self.cooldown_seconds = seconds
+        logger.info(f"[PROACTIVE] Cooldown updated to {seconds}s")
+
+    def reset_cooldown(self, session_id: str) -> None:
+        """
+        Reset cooldown for a specific session.
+
+        Args:
+            session_id: Session to reset
+        """
+        if session_id in self.last_suggestion_time:
+            del self.last_suggestion_time[session_id]
+            logger.info(f"[PROACTIVE] Cooldown reset for session {session_id}")
+
+    def get_session_stats(self, session_id: str) -> Dict[str, Any]:
+        """
+        Get stats for a session's proactive monitoring.
+
+        Args:
+            session_id: Session to check
+
+        Returns:
+            {
+                "last_suggestion_time": timestamp or None,
+                "seconds_since_last": int,
+                "cooldown_active": bool,
+                "cooldown_remaining": int
+            }
+        """
+        last_time = self.last_suggestion_time.get(session_id)
+
+        if not last_time:
+            return {
+                "last_suggestion_time": None,
+                "seconds_since_last": 0,
+                "cooldown_active": False,
+                "cooldown_remaining": 0
+            }
+
+        seconds_since = int(time.time() - last_time)
+        cooldown_active = seconds_since < self.cooldown_seconds
+        cooldown_remaining = max(0, self.cooldown_seconds - seconds_since)
+
+        return {
+            "last_suggestion_time": last_time,
+            "seconds_since_last": seconds_since,
+            "cooldown_active": cooldown_active,
+            "cooldown_remaining": cooldown_remaining
+        }
+
+
+# Singleton instance
+_monitor_instance = None
+
+
+def get_proactive_monitor(min_priority: float = 0.6) -> ProactiveMonitor:
+    """
+    Get singleton proactive monitor instance.
+
+    Args:
+        min_priority: Minimum priority threshold (only used on first call)
+
+    Returns:
+        ProactiveMonitor instance
+    """
+    global _monitor_instance
+    if _monitor_instance is None:
+        _monitor_instance = ProactiveMonitor(min_priority=min_priority)
+    return _monitor_instance
diff --git a/cortex/autonomy/tools/__init__.py b/cortex/autonomy/tools/__init__.py
new file mode 100644
index 0000000..510fad9
--- /dev/null
+++ b/cortex/autonomy/tools/__init__.py
@@ -0,0 +1 @@
+"""Autonomous tool invocation system."""
diff --git a/cortex/autonomy/tools/decision_engine.py b/cortex/autonomy/tools/decision_engine.py
new file mode 100644
index 0000000..3247436
--- /dev/null
+++ b/cortex/autonomy/tools/decision_engine.py
@@ -0,0 +1,124 @@
+"""
+Tool Decision Engine - decides which tools to invoke autonomously.
+"""
+
+import logging
+from typing import Dict, List, Any
+
+logger = logging.getLogger(__name__)
+
+
+class ToolDecisionEngine:
+    """Decides which tools to invoke based on context analysis."""
+
+    async def analyze_tool_needs(
+        self,
+        user_prompt: str,
+        monologue: Dict[str, Any],
+        context_state: Dict[str, Any],
+        available_tools: List[str]
+    ) -> Dict[str, Any]:
+        """
+        Analyze if tools should be invoked and which ones.
+
+        Args:
+            user_prompt: User's message
+            monologue: Inner monologue analysis
+            context_state: Full context
+            available_tools: List of available tools
+
+        Returns:
+            {
+                "should_invoke_tools": bool,
+                "tools_to_invoke": [
+                    {
+                        "tool": "RAG | WEB | WEATHER | etc",
+                        "query": "search query",
+                        "reason": "why this tool",
+                        "priority": 0.0-1.0
+                    },
+                    ...
+                ],
+                "confidence": 0.0-1.0
+            }
+        """
+
+        tools_to_invoke = []
+
+        # Check for memory/context needs
+        if any(word in user_prompt.lower() for word in [
+            "remember", "you said", "we discussed", "earlier", "before",
+            "last time", "previously", "what did"
+        ]):
+            tools_to_invoke.append({
+                "tool": "RAG",
+                "query": user_prompt,
+                "reason": "User references past conversation",
+                "priority": 0.9
+            })
+
+        # Check for web search needs
+        if any(word in user_prompt.lower() for word in [
+            "current", "latest", "news", "today", "what's happening",
+            "look up", "search for", "find information", "recent"
+        ]):
+            tools_to_invoke.append({
+                "tool": "WEB",
+                "query": user_prompt,
+                "reason": "Requires current information",
+                "priority": 0.8
+            })
+
+        # Check for weather needs
+        if any(word in user_prompt.lower() for word in [
+            "weather", "temperature", "forecast", "rain", "sunny", "climate"
+        ]):
+            tools_to_invoke.append({
+                "tool": "WEATHER",
+                "query": user_prompt,
+                "reason": "Weather information requested",
+                "priority": 0.95
+            })
+
+        # Check for code-related needs
+        if any(word in user_prompt.lower() for word in [
+            "code", "function", "debug", "implement", "algorithm",
+            "programming", "script", "syntax"
+        ]):
+            if "CODEBRAIN" in available_tools:
+                tools_to_invoke.append({
+                    "tool": "CODEBRAIN",
+                    "query": user_prompt,
+                    "reason": "Code-related task",
+                    "priority": 0.85
+                })
+
+        # Proactive RAG for complex queries (based on monologue)
+        intent = monologue.get("intent", "") if monologue else ""
+        if monologue and monologue.get("consult_executive"):
+            # Complex query - might benefit from context
+            if not any(t["tool"] == "RAG" for t in tools_to_invoke):
+                tools_to_invoke.append({
+                    "tool": "RAG",
+                    "query": user_prompt,
+                    "reason": "Complex query benefits from context",
+                    "priority": 0.6
+                })
+
+        # Sort by priority
+        tools_to_invoke.sort(key=lambda x: x["priority"], reverse=True)
+
+        max_priority = max([t["priority"] for t in tools_to_invoke]) if tools_to_invoke else 0.0
+
+        result = {
+            "should_invoke_tools": len(tools_to_invoke) > 0,
+            "tools_to_invoke": tools_to_invoke,
+            "confidence": max_priority
+        }
+
+        if tools_to_invoke:
+            logger.info(f"[TOOL_DECISION] Autonomous tool invocation recommended: {len(tools_to_invoke)} tools")
+            for tool in tools_to_invoke:
+                logger.info(f"  - {tool['tool']} (priority: {tool['priority']:.2f}): {tool['reason']}")
+
+        return result
diff --git a/cortex/autonomy/tools/orchestrator.py b/cortex/autonomy/tools/orchestrator.py
new file mode 100644
index 0000000..9658721
--- /dev/null
+++ b/cortex/autonomy/tools/orchestrator.py
@@ -0,0 +1,354 @@
+"""
+Tool Orchestrator - executes autonomous tool invocations asynchronously.
+"""
+
+import asyncio
+import logging
+from typing import Dict, List, Any, Optional
+import os
+
+logger = logging.getLogger(__name__)
+
+
+class ToolOrchestrator:
+    """Orchestrates async tool execution and result aggregation."""
+
+    def __init__(self, tool_timeout: int = 30):
+        """
+        Initialize orchestrator.
+
+        Args:
+            tool_timeout: Max seconds per tool call (default 30)
+        """
+        self.tool_timeout = tool_timeout
+        self.available_tools = self._discover_tools()
+
+    def _discover_tools(self) -> Dict[str, Any]:
+        """Discover available tool modules."""
+        tools = {}
+
+        # Import tool modules as they become available
+        try:
+            from memory.neomem_client import search_neomem
+            tools["RAG"] = search_neomem
+            logger.debug("[ORCHESTRATOR] RAG tool available")
+        except ImportError:
+            logger.debug("[ORCHESTRATOR] RAG tool not available")
+
+        try:
+            from integrations.web_search import web_search
+            tools["WEB"] = web_search
+            logger.debug("[ORCHESTRATOR] WEB tool available")
+        except ImportError:
+            logger.debug("[ORCHESTRATOR] WEB tool not available")
+
+        try:
+            from integrations.weather import get_weather
+            tools["WEATHER"] = get_weather
+            logger.debug("[ORCHESTRATOR] WEATHER tool available")
+        except ImportError:
+            logger.debug("[ORCHESTRATOR] WEATHER tool not available")
+
+        try:
+            from integrations.codebrain import query_codebrain
+            tools["CODEBRAIN"] = query_codebrain
+            logger.debug("[ORCHESTRATOR] CODEBRAIN tool available")
+        except ImportError:
+            logger.debug("[ORCHESTRATOR] CODEBRAIN tool not available")
+
+        return tools
+
+    async def execute_tools(
+        self,
+        tools_to_invoke: List[Dict[str, Any]],
+        context_state: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Execute multiple tools asynchronously.
+
+        Args:
+            tools_to_invoke: List of tool specs from decision engine
+                [{"tool": "RAG", "query": "...", "reason": "...", "priority": 0.9}, ...]
+            context_state: Full context for tool execution
+
+        Returns:
+            {
+                "results": {
+                    "RAG": {...},
+                    "WEB": {...},
+                    ...
+                },
+                "execution_summary": {
+                    "tools_invoked": ["RAG", "WEB"],
+                    "successful": ["RAG"],
+                    "failed": ["WEB"],
+                    "total_time_ms": 1234
+                }
+            }
+        """
+        import time
+        start_time = time.time()
+
+        logger.info(f"[ORCHESTRATOR] Executing {len(tools_to_invoke)} tools asynchronously")
+
+        # Create tasks for each tool
+        tasks = []
+        tool_names = []
+
+        for tool_spec in tools_to_invoke:
+            tool_name = tool_spec["tool"]
+            query = tool_spec["query"]
+
+            if tool_name in self.available_tools:
+                task = self._execute_single_tool(tool_name, query, context_state)
+                tasks.append(task)
+                tool_names.append(tool_name)
+                logger.debug(f"[ORCHESTRATOR] Queued {tool_name}: {query[:50]}...")
+            else:
+                logger.warning(f"[ORCHESTRATOR] Tool {tool_name} not available, skipping")
+
+        # Execute all tools concurrently with timeout
+        results = {}
+        successful = []
+        failed = []
+
+        if tasks:
+            try:
+                # Wait for all tasks with global timeout
+                completed = await asyncio.wait_for(
+                    asyncio.gather(*tasks, return_exceptions=True),
+                    timeout=self.tool_timeout
+                )
+
+                # Process results
+                for tool_name, result in zip(tool_names, completed):
+                    if isinstance(result, Exception):
+                        logger.error(f"[ORCHESTRATOR] {tool_name} failed: {result}")
+                        results[tool_name] = {"error": str(result), "success": False}
+                        failed.append(tool_name)
+                    else:
+                        logger.info(f"[ORCHESTRATOR] {tool_name} completed successfully")
+                        results[tool_name] = result
+                        successful.append(tool_name)
+
+            except asyncio.TimeoutError:
+                logger.error(f"[ORCHESTRATOR] Global timeout ({self.tool_timeout}s) exceeded")
+                for tool_name in tool_names:
+                    if tool_name not in results:
+                        results[tool_name] = {"error": "timeout", "success": False}
+                        failed.append(tool_name)
+
+        end_time = time.time()
+        total_time_ms = int((end_time - start_time) * 1000)
+
+        execution_summary = {
+            "tools_invoked": tool_names,
+            "successful": successful,
+            "failed": failed,
+            "total_time_ms": total_time_ms
+        }
+
+        logger.info(f"[ORCHESTRATOR] Execution complete: {len(successful)}/{len(tool_names)} successful in {total_time_ms}ms")
+
+        return {
+            "results": results,
+            "execution_summary": execution_summary
+        }
+
+    async def _execute_single_tool(
+        self,
+        tool_name: str,
+        query: str,
+        context_state: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Execute a single tool with error handling.
+
+        Args:
+            tool_name: Name of tool (RAG, WEB, etc.)
+            query: Query string for the tool
+            context_state: Context for tool execution
+
+        Returns:
+            Tool-specific result dict
+        """
+        tool_func = self.available_tools.get(tool_name)
+        if not tool_func:
+            raise ValueError(f"Tool {tool_name} not available")
+
+        try:
+            logger.debug(f"[ORCHESTRATOR] Invoking {tool_name}...")
+
+            # Different tools have different signatures - adapt as needed
+            if tool_name == "RAG":
+                result = await self._invoke_rag(tool_func, query, context_state)
+            elif tool_name == "WEB":
+                result = await self._invoke_web(tool_func, query)
+            elif tool_name == "WEATHER":
+                result = await self._invoke_weather(tool_func, query)
+            elif tool_name == "CODEBRAIN":
+                result = await self._invoke_codebrain(tool_func, query, context_state)
+            else:
+                # Generic invocation
+                result = await tool_func(query)
+
+            return {
+                "success": True,
+                "tool": tool_name,
+                "query": query,
+                "data": result
+            }
+
+        except Exception as e:
+            logger.error(f"[ORCHESTRATOR] {tool_name} execution failed: {e}")
+            raise
+
+    async def _invoke_rag(self, func, query: str, context: Dict[str, Any]) -> Any:
+        """Invoke RAG tool (NeoMem search)."""
+        session_id = context.get("session_id", "unknown")
+        # RAG searches memory for relevant past interactions
+        try:
+            results = await func(query, limit=5, session_id=session_id)
+            return results
+        except Exception as e:
+            logger.warning(f"[ORCHESTRATOR] RAG invocation failed, returning empty: {e}")
+            return []
+
+    async def _invoke_web(self, func, query: str) -> Any:
+        """Invoke web search tool."""
+        try:
+            results = await func(query, max_results=5)
+            return results
+        except Exception as e:
+            logger.warning(f"[ORCHESTRATOR] WEB invocation failed: {e}")
+            return {"error": str(e), "results": []}
+
+    async def _invoke_weather(self, func, query: str) -> Any:
+        """Invoke weather tool."""
+        # Extract location from query (simple heuristic)
+        # In future: use LLM to extract location
+        try:
+            location = self._extract_location(query)
+            results = await func(location)
+            return results
+        except Exception as e:
+            logger.warning(f"[ORCHESTRATOR] WEATHER invocation failed: {e}")
+            return {"error": str(e)}
+
+    async def _invoke_codebrain(self, func, query: str, context: Dict[str, Any]) -> Any:
+        """Invoke codebrain tool."""
+        try:
+            results = await func(query, context=context)
+            return results
+        except Exception as e:
+            logger.warning(f"[ORCHESTRATOR] CODEBRAIN invocation failed: {e}")
+            return {"error": str(e)}
+
+    def _extract_location(self, query: str) -> str:
+        """
+        Extract location from weather query.
+        Simple heuristic - in future use LLM.
+        """
+        # Common location indicators
+        indicators = ["in ", "at ", "for ", "weather in ", "temperature in "]
+
+        query_lower = query.lower()
+        for indicator in indicators:
+            if indicator in query_lower:
+                # Get text after indicator
+                parts = query_lower.split(indicator, 1)
+                if len(parts) > 1:
+                    location = parts[1].strip().split()[0]  # First word after indicator
+                    return location
+
+        # Default fallback
+        return "current location"
+
+    def format_results_for_context(self, orchestrator_result: Dict[str, Any]) -> str:
+        """
+        Format tool results for inclusion in context/prompt.
+
+        Args:
+            orchestrator_result: Output from execute_tools()
+
+        Returns:
+            Formatted string for prompt injection
+        """
+        results = orchestrator_result.get("results", {})
+        summary = orchestrator_result.get("execution_summary", {})
+
+        if not results:
+            return ""
+
+        formatted = "\n=== AUTONOMOUS TOOL RESULTS ===\n"
+
+        for tool_name, tool_result in results.items():
+            if tool_result.get("success", False):
+                formatted += f"\n[{tool_name}]\n"
+                data = tool_result.get("data", {})
+
+                # Format based on tool type
+                if tool_name == "RAG":
+                    formatted += self._format_rag_results(data)
+                elif tool_name == "WEB":
+                    formatted += self._format_web_results(data)
+                elif tool_name == "WEATHER":
+                    formatted += self._format_weather_results(data)
+                elif tool_name == "CODEBRAIN":
+                    formatted += self._format_codebrain_results(data)
+                else:
+                    formatted += f"{data}\n"
+            else:
+                formatted += f"\n[{tool_name}] - Failed: {tool_result.get('error', 'unknown')}\n"
+
+        formatted += f"\n(Tools executed in {summary.get('total_time_ms', 0)}ms)\n"
+        formatted += "=" * 40 + "\n"
+
+        return formatted
+
+    def _format_rag_results(self, data: Any) -> str:
+        """Format RAG/memory search results."""
+        if not data:
+            return "No relevant memories found.\n"
+
+        formatted = "Relevant memories:\n"
+        for i, item in enumerate(data[:3], 1):  # Top 3
+            text = item.get("text", item.get("content", str(item)))
+            formatted += f"  {i}. {text[:100]}...\n"
+        return formatted
+
+    def _format_web_results(self, data: Any) -> str:
+        """Format web search results."""
+        if isinstance(data, dict) and data.get("error"):
+            return f"Web search failed: {data['error']}\n"
+
+        results = data.get("results", []) if isinstance(data, dict) else data
+        if not results:
+            return "No web results found.\n"
+
+        formatted = "Web search results:\n"
+        for i, item in enumerate(results[:3], 1):  # Top 3
+            title = item.get("title", "No title")
+            snippet = item.get("snippet", item.get("description", ""))
+            formatted += f"  {i}. {title}\n     {snippet[:100]}...\n"
+        return formatted
+
+    def _format_weather_results(self, data: Any) -> str:
+        """Format weather results."""
+        if isinstance(data, dict) and data.get("error"):
+            return f"Weather lookup failed: {data['error']}\n"
+
+        # Assuming weather API returns temp, conditions, etc.
+        temp = data.get("temperature", "unknown")
+        conditions = data.get("conditions", "unknown")
+        location = data.get("location", "requested location")
+
+        return f"Weather for {location}: {temp}, {conditions}\n"
+
+    def _format_codebrain_results(self, data: Any) -> str:
+        """Format codebrain results."""
+        if isinstance(data, dict) and data.get("error"):
+            return f"Codebrain failed: {data['error']}\n"
+
+        # Format code-related results
+        return f"{data}\n"
diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json
index 1f6871d..b9fc83f 100644
--- a/cortex/data/self_state.json
+++ b/cortex/data/self_state.json
@@ -3,9 +3,9 @@
   "energy": 0.8,
   "focus": "user_request",
   "confidence": 0.7,
-  "curiosity": 0.6000000000000001,
-  "last_updated": "2025-12-14T06:36:21.236816",
-  "interaction_count": 3,
+  "curiosity": 0.7000000000000002,
+  "last_updated": "2025-12-14T19:29:49.051207",
+  "interaction_count": 5,
   "learning_queue": [],
   "active_goals": [],
   "preferences": {
diff --git a/cortex/router.py b/cortex/router.py
index 48bb790..75d514d 100644
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -140,6 +140,55 @@ async def run_reason(req: ReasonRequest):
             logger.warning(f"[EXECUTIVE] Planning failed: {e}")
             executive_plan = None
 
+    # ----------------------------------------------------------------
+    # STAGE 0.8 — Autonomous Tool Invocation
+    # ----------------------------------------------------------------
+    tool_results = None
+    autonomous_enabled = os.getenv("ENABLE_AUTONOMOUS_TOOLS", "true").lower() == "true"
+    tool_confidence_threshold = float(os.getenv("AUTONOMOUS_TOOL_CONFIDENCE_THRESHOLD", "0.6"))
+
+    if autonomous_enabled and inner_result:
+        if VERBOSE_DEBUG:
+            logger.debug("[STAGE 0.8] Analyzing autonomous tool needs...")
+
+        try:
+            from autonomy.tools.decision_engine import ToolDecisionEngine
+            from autonomy.tools.orchestrator import ToolOrchestrator
+
+            # Analyze which tools to invoke
+            decision_engine = ToolDecisionEngine()
+            tool_decision = await decision_engine.analyze_tool_needs(
+                user_prompt=req.user_prompt,
+                monologue=inner_result,
+                context_state=context_state,
+                available_tools=["RAG", "WEB", "WEATHER", "CODEBRAIN"]
+            )
+
+            # Execute tools if confidence threshold met
+            if tool_decision["should_invoke_tools"] and tool_decision["confidence"] >= tool_confidence_threshold:
+                orchestrator = ToolOrchestrator(tool_timeout=30)
+                tool_results = await orchestrator.execute_tools(
+                    tools_to_invoke=tool_decision["tools_to_invoke"],
+                    context_state=context_state
+                )
+
+                # Format results for context injection
+                tool_context = orchestrator.format_results_for_context(tool_results)
+                context_state["autonomous_tool_results"] = tool_context
+
+                if VERBOSE_DEBUG:
+                    summary = tool_results.get("execution_summary", {})
+                    logger.debug(f"[STAGE 0.8] Tools executed: {summary.get('successful', [])} succeeded")
+            else:
+                if VERBOSE_DEBUG:
+                    logger.debug(f"[STAGE 0.8] No tools invoked (confidence: {tool_decision.get('confidence', 0):.2f})")
+
+        except Exception as e:
+            logger.warning(f"[STAGE 0.8] Autonomous tool invocation failed: {e}")
+            if VERBOSE_DEBUG:
+                import traceback
+                traceback.print_exc()
+
     # ----------------------------------------------------------------
     # STAGE 1 — Intake summary
     # ----------------------------------------------------------------
@@ -217,7 +266,7 @@ async def run_reason(req: ReasonRequest):
     update_last_assistant_message(req.session_id, persona_answer)
 
     # ----------------------------------------------------------------
-    # STAGE 6.5 — Self-state update
+    # STAGE 6.5 — Self-state update & Pattern Learning
     # ----------------------------------------------------------------
     try:
         from autonomy.self.analyzer import analyze_and_update_state
@@ -230,6 +279,50 @@ async def run_reason(req: ReasonRequest):
     except Exception as e:
         logger.warning(f"[SELF_STATE] Update failed: {e}")
 
+    # Pattern learning
+    try:
+        from autonomy.learning.pattern_learner import get_pattern_learner
+        learner = get_pattern_learner()
+        await learner.learn_from_interaction(
+            user_prompt=req.user_prompt,
+            response=persona_answer,
+            monologue=inner_result or {},
+            context=context_state
+        )
+    except Exception as e:
+        logger.warning(f"[PATTERN_LEARNER] Learning failed: {e}")
+
+    # ----------------------------------------------------------------
+    # STAGE 7 — Proactive Monitoring & Suggestions
+    # ----------------------------------------------------------------
+    proactive_enabled = os.getenv("ENABLE_PROACTIVE_MONITORING", "true").lower() == "true"
+    proactive_min_priority = float(os.getenv("PROACTIVE_SUGGESTION_MIN_PRIORITY", "0.6"))
+
+    if proactive_enabled:
+        try:
+            from autonomy.proactive.monitor import get_proactive_monitor
+            from autonomy.self.state import load_self_state
+
+            monitor = get_proactive_monitor(min_priority=proactive_min_priority)
+            self_state = load_self_state()
+
+            suggestion = await monitor.analyze_session(
+                session_id=req.session_id,
+                context_state=context_state,
+                self_state=self_state
+            )
+
+            # Append suggestion to response if exists
+            if suggestion:
+                suggestion_text = monitor.format_suggestion(suggestion)
+                persona_answer += suggestion_text
+
+                if VERBOSE_DEBUG:
+                    logger.debug(f"[STAGE 7] Proactive suggestion added: {suggestion['type']} (priority: {suggestion['priority']:.2f})")
+
+        except Exception as e:
+            logger.warning(f"[STAGE 7] Proactive monitoring failed: {e}")
+
     if VERBOSE_DEBUG:
         logger.debug(f"\n{'='*80}")
         logger.debug(f"[PIPELINE COMPLETE] Session: {req.session_id}")
diff --git a/cortex/tests/test_autonomy_phase2.py b/cortex/tests/test_autonomy_phase2.py
new file mode 100644
index 0000000..aa5956a
--- /dev/null
+++ b/cortex/tests/test_autonomy_phase2.py
@@ -0,0 +1,495 @@
+"""
+Integration tests for Phase 2 autonomy features.
+Tests autonomous tool invocation, proactive monitoring, actions, and pattern learning.
+"""
+
+import asyncio
+import json
+import sys
+import os
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# Override self-state file path for testing
+os.environ["SELF_STATE_FILE"] = "/tmp/test_self_state.json"
+
+from autonomy.tools.decision_engine import ToolDecisionEngine
+from autonomy.tools.orchestrator import ToolOrchestrator
+from autonomy.proactive.monitor import ProactiveMonitor
+from autonomy.actions.autonomous_actions import AutonomousActionManager
+from autonomy.learning.pattern_learner import PatternLearner
+from autonomy.self.state import load_self_state, get_self_state_instance
+
+
+async def test_tool_decision_engine():
+    """Test autonomous tool decision making."""
+    print("\n" + "="*60)
+    print("TEST 1: Tool Decision Engine")
+    print("="*60)
+
+    engine = ToolDecisionEngine()
+
+    # Test 1a: Memory reference detection
+    result = await engine.analyze_tool_needs(
+        user_prompt="What did we discuss earlier about Python?",
+        monologue={"intent": "clarification", "consult_executive": False},
+        context_state={},
+        available_tools=["RAG", "WEB", "WEATHER"]
+    )
+
+    assert result["should_invoke_tools"], "Should invoke tools for memory reference"
+    assert any(t["tool"] == "RAG" for t in result["tools_to_invoke"]), "Should recommend RAG"
+    assert result["confidence"] > 0.8, f"Confidence should be high for clear memory reference: {result['confidence']}"
+
+    print(f"  ✓ Memory reference detection passed")
+    print(f"    Tools: {[t['tool'] for t in result['tools_to_invoke']]}")
+    print(f"    Confidence: {result['confidence']:.2f}")
+
+    # Test 1b: Web search detection
+    result = await engine.analyze_tool_needs(
+        user_prompt="What's the latest news about AI developments?",
+        monologue={"intent": "information_seeking", "consult_executive": False},
+        context_state={},
+        available_tools=["RAG", "WEB", "WEATHER"]
+    )
+
+    assert result["should_invoke_tools"], "Should invoke tools for current info request"
+    assert any(t["tool"] == "WEB" for t in result["tools_to_invoke"]), "Should recommend WEB"
+
+    print(f"  ✓ Web search detection passed")
+    print(f"    Tools: {[t['tool'] for t in result['tools_to_invoke']]}")
+
+    # Test 1c: Weather detection
+    result = await engine.analyze_tool_needs(
+        user_prompt="What's the weather like today in Boston?",
+        monologue={"intent": "information_seeking", "consult_executive": False},
+        context_state={},
+        available_tools=["RAG", "WEB", "WEATHER"]
+    )
+
+    assert result["should_invoke_tools"], "Should invoke tools for weather query"
+    assert any(t["tool"] == "WEATHER" for t in result["tools_to_invoke"]), "Should recommend WEATHER"
+
+    print(f"  ✓ Weather detection passed")
+
+    # Test 1d: Proactive RAG for complex queries
+    result = await engine.analyze_tool_needs(
+        user_prompt="Design a microservices architecture",
+        monologue={"intent": "technical_implementation", "consult_executive": True},
+        context_state={},
+        available_tools=["RAG", "WEB", "CODEBRAIN"]
+    )
+
+    assert result["should_invoke_tools"], "Should proactively invoke tools for complex queries"
+    rag_tools = [t for t in result["tools_to_invoke"] if t["tool"] == "RAG"]
+    assert len(rag_tools) > 0, "Should include proactive RAG"
+
+    print(f"  ✓ Proactive RAG detection passed")
+    print(f"    Reason: {rag_tools[0]['reason']}")
+
+    print("\n✓ Tool Decision Engine tests passed\n")
+    return result
+
+
+async def test_tool_orchestrator():
+    """Test tool orchestration (mock mode)."""
+    print("\n" + "="*60)
+    print("TEST 2: Tool Orchestrator (Mock Mode)")
+    print("="*60)
+
+    orchestrator = ToolOrchestrator(tool_timeout=5)
+
+    # Since actual tools may not be available, test the orchestrator structure
+    print(f"  Available tools: {list(orchestrator.available_tools.keys())}")
+
+    # Test with tools_to_invoke (will fail gracefully if tools unavailable)
+    tools_to_invoke = [
+        {"tool": "RAG", "query": "test query", "reason": "testing", "priority": 0.9}
+    ]
+
+    result = await orchestrator.execute_tools(
+        tools_to_invoke=tools_to_invoke,
+        context_state={"session_id": "test"}
+    )
+
+    assert "results" in result, "Should return results dict"
+    assert "execution_summary" in result, "Should return execution summary"
+
+    summary = result["execution_summary"]
+    assert "tools_invoked" in summary, "Summary should include tools_invoked"
+    assert "total_time_ms" in summary, "Summary should include timing"
+
+    print(f"  ✓ Orchestrator structure valid")
+    print(f"    Summary: {summary}")
+
+    # Test result formatting
+    formatted = orchestrator.format_results_for_context(result)
+    assert isinstance(formatted, str), "Should format results as string"
+
+    print(f"  ✓ Result formatting works")
+    print(f"    Formatted length: {len(formatted)} chars")
+
+    print("\n✓ Tool Orchestrator tests passed\n")
+    return result
+
+
+async def test_proactive_monitor():
+    """Test proactive monitoring and suggestions."""
+    print("\n" + "="*60)
+    print("TEST 3: Proactive Monitor")
+    print("="*60)
+
+    monitor = ProactiveMonitor(min_priority=0.6)
+
+    # Test 3a: Long silence detection
+    context_state = {
+        "message_count": 5,
+        "minutes_since_last_msg": 35  # > 30 minutes
+    }
+
+    self_state = load_self_state()
+
+    suggestion = await monitor.analyze_session(
+        session_id="test_silence",
+        context_state=context_state,
+        self_state=self_state
+    )
+
+    assert suggestion is not None, "Should generate suggestion for long silence"
+    assert suggestion["type"] == "check_in", f"Should be check_in type: {suggestion['type']}"
+    assert suggestion["priority"] >= 0.6, "Priority should meet threshold"
+
+    print(f"  ✓ Long silence detection passed")
+    print(f"    Type: {suggestion['type']}, Priority: {suggestion['priority']:.2f}")
+    print(f"    Suggestion: {suggestion['suggestion'][:50]}...")
+
+    # Test 3b: Learning opportunity (high curiosity)
+    self_state["curiosity"] = 0.8
+    self_state["learning_queue"] = ["quantum computing", "rust programming"]
+
+    # Reset cooldown for this test
+    monitor.reset_cooldown("test_learning")
+
+    suggestion = await monitor.analyze_session(
+        session_id="test_learning",
+        context_state={"message_count": 3, "minutes_since_last_msg": 2},
+        self_state=self_state
+    )
+
+    assert suggestion is not None, "Should generate learning suggestion"
+    assert suggestion["type"] == "learning", f"Should be learning type: {suggestion['type']}"
+
+    print(f"  ✓ Learning opportunity detection passed")
+    print(f"    Suggestion: {suggestion['suggestion'][:70]}...")
+
+    # Test 3c: Conversation milestone
+    monitor.reset_cooldown("test_milestone")
+
+    # Reset curiosity to avoid learning suggestion taking precedence
+    self_state["curiosity"] = 0.5
+    self_state["learning_queue"] = []
+
+    suggestion = await monitor.analyze_session(
+        session_id="test_milestone",
+        context_state={"message_count": 50, "minutes_since_last_msg": 1},
+        self_state=self_state
+    )
+
+    assert suggestion is not None, "Should generate milestone suggestion"
+    # Note: learning or summary both valid - check it's a reasonable suggestion
+    assert suggestion["type"] in ["summary", "learning", "check_in"], f"Should be valid type: {suggestion['type']}"
+
+    print(f"  ✓ Conversation milestone detection passed (type: {suggestion['type']})")
+
+    # Test 3d: Cooldown mechanism
+    # Try to get another suggestion immediately (should be blocked)
+    suggestion2 = await monitor.analyze_session(
+        session_id="test_milestone",
+        context_state={"message_count": 51, "minutes_since_last_msg": 1},
+        self_state=self_state
+    )
+
+    assert suggestion2 is None, "Should not generate suggestion during cooldown"
+
+    print(f"  ✓ Cooldown mechanism working")
+
+    # Check stats
+    stats = monitor.get_session_stats("test_milestone")
+    assert stats["cooldown_active"], "Cooldown should be active"
+    print(f"    Cooldown remaining: {stats['cooldown_remaining']}s")
+
+    print("\n✓ Proactive Monitor tests passed\n")
+    return suggestion
+
+
+async def test_autonomous_actions():
+    """Test autonomous action execution."""
+    print("\n" + "="*60)
+    print("TEST 4: Autonomous Actions")
+    print("="*60)
+
+    manager = AutonomousActionManager()
+
+    # Test 4a: List allowed actions
+    allowed = manager.get_allowed_actions()
+    assert "create_memory" in allowed, "Should have create_memory action"
+    assert "update_goal" in allowed, "Should have update_goal action"
+    assert "learn_topic" in allowed, "Should have learn_topic action"
+
+    print(f"  ✓ Allowed actions: {allowed}")
+
+    # Test 4b: Validate actions
+    validation = manager.validate_action("create_memory", {"text": "test memory"})
+    assert validation["valid"], "Should validate correct action"
+
+    print(f"  ✓ Action validation passed")
+
+    # Test 4c: Execute learn_topic action
+    result = await manager.execute_action(
+        action_type="learn_topic",
+        parameters={"topic": "rust programming", "reason": "testing", "priority": 0.8},
+        context={"session_id": "test"}
+    )
+
+    assert result["success"], f"Action should succeed: {result.get('error', 'unknown')}"
+    assert "topic" in result["result"], "Should return topic info"
+
+    print(f"  ✓ learn_topic action executed")
+    print(f"    Topic: {result['result']['topic']}")
+    print(f"    Queue position: {result['result']['queue_position']}")
+
+    # Test 4d: Execute update_focus action
+    result = await manager.execute_action(
+        action_type="update_focus",
+        parameters={"focus": "autonomy_testing", "reason": "running tests"},
+        context={"session_id": "test"}
+    )
+
+    assert result["success"], "update_focus should succeed"
+
+    print(f"  ✓ update_focus action executed")
+    print(f"    New focus: {result['result']['new_focus']}")
+
+    # Test 4e: Reject non-whitelisted action
+    result = await manager.execute_action(
+        action_type="delete_all_files",  # NOT in whitelist
+        parameters={},
+        context={"session_id": "test"}
+    )
+
+    assert not result["success"], "Should reject non-whitelisted action"
+    assert "not in whitelist" in result["error"], "Should indicate whitelist violation"
+
+    print(f"  ✓ Non-whitelisted action rejected")
+
+    # Test 4f: Action log
+    log = manager.get_action_log(limit=10)
+    assert len(log) >= 2, f"Should have logged multiple actions (got {len(log)})"
+
+    print(f"  ✓ Action log contains {len(log)} entries")
+
+    print("\n✓ Autonomous Actions tests passed\n")
+    return result
+
+
+async def test_pattern_learner():
+    """Test pattern learning system."""
+    print("\n" + "="*60)
+    print("TEST 5: Pattern Learner")
+    print("="*60)
+
+    # Use temp file for testing
+    test_file = "/tmp/test_patterns.json"
+    learner = PatternLearner(patterns_file=test_file)
+
+    # Test 5a: Learn from multiple interactions
+    for i in range(5):
+        await learner.learn_from_interaction(
+            user_prompt=f"Help me with Python coding task {i}",
+            response=f"Here's help with task {i}...",
+            monologue={"intent": "coding_help", "tone": "focused", "depth": "medium"},
+            context={"session_id": "test", "executive_plan": None}
+        )
+
+    print(f"  ✓ Learned from 5 interactions")
+
+    # Test 5b: Get top topics
+    top_topics = learner.get_top_topics(limit=5)
+    assert len(top_topics) > 0, "Should have learned topics"
+    assert "coding_help" == top_topics[0][0], "coding_help should be top topic"
+
+    print(f"  ✓ Top topics: {[t[0] for t in top_topics[:3]]}")
+
+    # Test 5c: Get preferred tone
+    preferred_tone = learner.get_preferred_tone()
+    assert preferred_tone == "focused", "Should detect focused as preferred tone"
+
+    print(f"  ✓ Preferred tone: {preferred_tone}")
+
+    # Test 5d: Get preferred depth
+    preferred_depth = learner.get_preferred_depth()
+    assert preferred_depth == "medium", "Should detect medium as preferred depth"
+
+    print(f"  ✓ Preferred depth: {preferred_depth}")
+
+    # Test 5e: Get insights
+    insights = learner.get_insights()
+    assert insights["total_interactions"] == 5, "Should track interaction count"
+    assert insights["preferred_tone"] == "focused", "Insights should include tone"
+
+    print(f"  ✓ Insights generated:")
+    print(f"    Total interactions: {insights['total_interactions']}")
+    print(f"    Recommendations: {insights['learning_recommendations']}")
+
+    # Test 5f: Export patterns
+    exported = learner.export_patterns()
+    assert "topic_frequencies" in exported, "Should export all patterns"
+
+    print(f"  ✓ Patterns exported ({len(exported)} keys)")
+
+    # Cleanup
+    if os.path.exists(test_file):
+        os.remove(test_file)
+
+    print("\n✓ Pattern Learner tests passed\n")
+    return insights
+
+
+async def test_end_to_end_autonomy():
+    """Test complete autonomous flow."""
+    print("\n" + "="*60)
+    print("TEST 6: End-to-End Autonomy Flow")
+    print("="*60)
+
+    # Simulate a complex user query that triggers multiple autonomous systems
+    user_prompt = "Remember what we discussed about machine learning? I need current research on transformers."
+
+    monologue = {
+        "intent": "technical_research",
+        "tone": "focused",
+        "depth": "deep",
+        "consult_executive": True
+    }
+
+    context_state = {
+        "session_id": "e2e_test",
+        "message_count": 15,
+        "minutes_since_last_msg": 5
+    }
+
+    print(f"  User prompt: {user_prompt}")
+    print(f"  Monologue intent: {monologue['intent']}")
+
+    # Step 1: Tool decision engine
+    engine = ToolDecisionEngine()
+    tool_decision = await engine.analyze_tool_needs(
+        user_prompt=user_prompt,
+        monologue=monologue,
+        context_state=context_state,
+        available_tools=["RAG", "WEB", "CODEBRAIN"]
+    )
+
+    print(f"\n  Step 1: Tool Decision")
+    print(f"    Should invoke: {tool_decision['should_invoke_tools']}")
+    print(f"    Tools: {[t['tool'] for t in tool_decision['tools_to_invoke']]}")
+    assert tool_decision["should_invoke_tools"], "Should invoke tools"
+    assert len(tool_decision["tools_to_invoke"]) >= 2, "Should recommend multiple tools (RAG + WEB)"
+
+    # Step 2: Pattern learning
+    learner = PatternLearner(patterns_file="/tmp/e2e_test_patterns.json")
+    await learner.learn_from_interaction(
+        user_prompt=user_prompt,
+        response="Here's information about transformers...",
+        monologue=monologue,
+        context=context_state
+    )
+
+    print(f"\n  Step 2: Pattern Learning")
+    top_topics = learner.get_top_topics(limit=3)
+    print(f"    Learned topics: {[t[0] for t in top_topics]}")
+
+    # Step 3: Autonomous action
+    action_manager = AutonomousActionManager()
+    action_result = await action_manager.execute_action(
+        action_type="learn_topic",
+        parameters={"topic": "transformer architectures", "reason": "user interest detected"},
+        context=context_state
+    )
+
+    print(f"\n  Step 3: Autonomous Action")
+    print(f"    Action: learn_topic")
+    print(f"    Success: {action_result['success']}")
+
+    # Step 4: Proactive monitoring (won't trigger due to low message count)
+    monitor = ProactiveMonitor(min_priority=0.6)
+    monitor.reset_cooldown("e2e_test")
+
+    suggestion = await monitor.analyze_session(
+        session_id="e2e_test",
+        context_state=context_state,
+        self_state=load_self_state()
+    )
+
+    print(f"\n  Step 4: Proactive Monitoring")
+    print(f"    Suggestion: {suggestion['type'] if suggestion else 'None (expected for low message count)'}")
+
+    # Cleanup
+    if os.path.exists("/tmp/e2e_test_patterns.json"):
+        os.remove("/tmp/e2e_test_patterns.json")
+
+    print("\n✓ End-to-End Autonomy Flow tests passed\n")
+    return True
+
+
+async def run_all_tests():
+    """Run all Phase 2 tests."""
+    print("\n" + "="*60)
+    print("PHASE 2 AUTONOMY TESTS")
+    print("="*60)
+
+    try:
+        # Test 1: Tool Decision Engine
+        await test_tool_decision_engine()
+
+        # Test 2: Tool Orchestrator
+        await test_tool_orchestrator()
+
+        # Test 3: Proactive Monitor
+        await test_proactive_monitor()
+
+        # Test 4: Autonomous Actions
+        await test_autonomous_actions()
+
+        # Test 5: Pattern Learner
+        await test_pattern_learner()
+
+        # Test 6: End-to-End
+        await test_end_to_end_autonomy()
+
+        print("\n" + "="*60)
+        print("ALL PHASE 2 TESTS PASSED ✓")
+        print("="*60)
+
+        print("\nPhase 2 Features Validated:")
+        print("  ✓ Autonomous tool decision making")
+        print("  ✓ Tool orchestration and execution")
+        print("  ✓ Proactive monitoring and suggestions")
+        print("  ✓ Safe autonomous actions")
+        print("  ✓ Pattern learning and adaptation")
+        print("  ✓ End-to-end autonomous flow")
+
+        return True
+
+    except Exception as e:
+        print("\n" + "="*60)
+        print(f"TEST FAILED: {e}")
+        print("="*60)
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+if __name__ == "__main__":
+    success = asyncio.run(run_all_tests())
+    sys.exit(0 if success else 1)

From 0528d10081ad0b8e1793d153e62bcbe16ac1591f Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Mon, 15 Dec 2025 01:56:57 -0500
Subject: [PATCH 05/10] autonomy phase 2.5 - tightening up some stuff in the
 pipeline

---
 cortex/data/self_state.json           |  6 ++--
 cortex/router.py                      |  3 +-
 neomem/neomem/vector_stores/qdrant.py | 50 +++++++++++++++++++++++----
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json
index b9fc83f..56d5ae5 100644
--- a/cortex/data/self_state.json
+++ b/cortex/data/self_state.json
@@ -3,9 +3,9 @@
   "energy": 0.8,
   "focus": "user_request",
   "confidence": 0.7,
-  "curiosity": 0.7000000000000002,
-  "last_updated": "2025-12-14T19:29:49.051207",
-  "interaction_count": 5,
+  "curiosity": 1.0,
+  "last_updated": "2025-12-15T05:38:06.084867",
+  "interaction_count": 14,
   "learning_queue": [],
   "active_goals": [],
   "preferences": {
diff --git a/cortex/router.py b/cortex/router.py
index 75d514d..8bbbc74 100644
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -301,10 +301,9 @@ async def run_reason(req: ReasonRequest):
     if proactive_enabled:
         try:
             from autonomy.proactive.monitor import get_proactive_monitor
-            from autonomy.self.state import load_self_state
 
             monitor = get_proactive_monitor(min_priority=proactive_min_priority)
-            self_state = load_self_state()
+            self_state = load_self_state()  # Already imported at top of file
 
             suggestion = await monitor.analyze_session(
                 session_id=req.session_id,
diff --git a/neomem/neomem/vector_stores/qdrant.py b/neomem/neomem/vector_stores/qdrant.py
index 59ee9a9..456da2e 100644
--- a/neomem/neomem/vector_stores/qdrant.py
+++ b/neomem/neomem/vector_stores/qdrant.py
@@ -1,7 +1,9 @@
 import logging
 import os
 import shutil
+from typing import Optional
 
+from pydantic import BaseModel
 from qdrant_client import QdrantClient
 from qdrant_client.models import (
     Distance,
@@ -19,6 +21,13 @@ from mem0.vector_stores.base import VectorStoreBase
 logger = logging.getLogger(__name__)
 
 
+class OutputData(BaseModel):
+    """Standard output format for vector search results."""
+    id: Optional[str]
+    score: Optional[float]
+    payload: Optional[dict]
+
+
 class Qdrant(VectorStoreBase):
     def __init__(
         self,
@@ -170,7 +179,7 @@ class Qdrant(VectorStoreBase):
             filters (dict, optional): Filters to apply to the search. Defaults to None.
 
         Returns:
-            list: Search results.
+            list: Search results wrapped in OutputData format.
         """
         query_filter = self._create_filter(filters) if filters else None
         hits = self.client.query_points(
@@ -179,7 +188,16 @@ class Qdrant(VectorStoreBase):
             query_filter=query_filter,
             limit=limit,
         )
-        return hits.points
+
+        # Wrap results in OutputData format to match other vector stores
+        return [
+            OutputData(
+                id=str(hit.id),
+                score=hit.score,
+                payload=hit.payload
+            )
+            for hit in hits.points
+        ]
 
     def delete(self, vector_id: int):
         """
@@ -207,7 +225,7 @@ class Qdrant(VectorStoreBase):
         point = PointStruct(id=vector_id, vector=vector, payload=payload)
         self.client.upsert(collection_name=self.collection_name, points=[point])
 
-    def get(self, vector_id: int) -> dict:
+    def get(self, vector_id: int) -> OutputData:
         """
         Retrieve a vector by ID.
 
@@ -215,10 +233,17 @@ class Qdrant(VectorStoreBase):
             vector_id (int): ID of the vector to retrieve.
 
         Returns:
-            dict: Retrieved vector.
+            OutputData: Retrieved vector wrapped in OutputData format.
         """
         result = self.client.retrieve(collection_name=self.collection_name, ids=[vector_id], with_payload=True)
-        return result[0] if result else None
+        if result:
+            hit = result[0]
+            return OutputData(
+                id=str(hit.id),
+                score=None,  # No score for direct retrieval
+                payload=hit.payload
+            )
+        return None
 
     def list_cols(self) -> list:
         """
@@ -251,7 +276,7 @@ class Qdrant(VectorStoreBase):
             limit (int, optional): Number of vectors to return. Defaults to 100.
 
         Returns:
-            list: List of vectors.
+            list: List of vectors wrapped in OutputData format.
         """
         query_filter = self._create_filter(filters) if filters else None
         result = self.client.scroll(
@@ -261,7 +286,18 @@ class Qdrant(VectorStoreBase):
             with_payload=True,
             with_vectors=False,
         )
-        return result
+
+        # Wrap results in OutputData format
+        # scroll() returns tuple: (points, next_page_offset)
+        points = result[0] if isinstance(result, tuple) else result
+        return [
+            OutputData(
+                id=str(point.id),
+                score=None,  # No score for list operation
+                payload=point.payload
+            )
+            for point in points
+        ]
 
     def reset(self):
         """Reset the index by deleting and recreating it."""

From 0a0354603997de3e61de553cc12f586a7398a276 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Mon, 15 Dec 2025 04:10:03 -0500
Subject: [PATCH 06/10] neomem disabled

---
 cortex/autonomy/tools/orchestrator.py | 15 +++++++++------
 cortex/context.py                     | 19 ++++++++++++++-----
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/cortex/autonomy/tools/orchestrator.py b/cortex/autonomy/tools/orchestrator.py
index 9658721..0b0b03d 100644
--- a/cortex/autonomy/tools/orchestrator.py
+++ b/cortex/autonomy/tools/orchestrator.py
@@ -28,12 +28,15 @@ class ToolOrchestrator:
         tools = {}
 
         # Import tool modules as they become available
-        try:
-            from memory.neomem_client import search_neomem
-            tools["RAG"] = search_neomem
-            logger.debug("[ORCHESTRATOR] RAG tool available")
-        except ImportError:
-            logger.debug("[ORCHESTRATOR] RAG tool not available")
+        if os.getenv("NEOMEM_ENABLED", "false").lower() == "true":
+            try:
+                from memory.neomem_client import search_neomem
+                tools["RAG"] = search_neomem
+                logger.debug("[ORCHESTRATOR] RAG tool available")
+            except ImportError:
+                logger.debug("[ORCHESTRATOR] RAG tool not available")
+        else:
+            logger.info("[ORCHESTRATOR] NEOMEM_ENABLED is false; RAG tool disabled")
 
         try:
             from integrations.web_search import web_search
diff --git a/cortex/context.py b/cortex/context.py
index 341946d..6db9ad5 100644
--- a/cortex/context.py
+++ b/cortex/context.py
@@ -24,6 +24,7 @@ from neomem_client import NeoMemClient
 # Configuration
 # -----------------------------
 NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
+NEOMEM_ENABLED = os.getenv("NEOMEM_ENABLED", "false").lower() == "true"
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
 VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
 
@@ -148,6 +149,10 @@ async def _search_neomem(
     Returns:
         List of memory objects with full structure, or empty list on failure
     """
+    if not NEOMEM_ENABLED:
+        logger.info("NeoMem search skipped (NEOMEM_ENABLED is false)")
+        return []
+
     try:
         # NeoMemClient reads NEOMEM_API from environment, no base_url parameter
         client = NeoMemClient()
@@ -259,11 +264,15 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
         logger.debug(json.dumps(intake_data, indent=2, default=str))
 
     # D. Search NeoMem for relevant memories
-    rag_results = await _search_neomem(
-        query=user_prompt,
-        user_id="brian",  # TODO: Make configurable per session
-        limit=5
-    )
+    if NEOMEM_ENABLED:
+        rag_results = await _search_neomem(
+            query=user_prompt,
+            user_id="brian",  # TODO: Make configurable per session
+            limit=5
+        )
+    else:
+        rag_results = []
+        logger.info("Skipping NeoMem RAG retrieval; NEOMEM_ENABLED is false")
 
     if VERBOSE_DEBUG:
         logger.debug(f"[COLLECT_CONTEXT] NeoMem search returned {len(rag_results)} results")

From b74658c00040cf30ff17f007a4db3f5eaccdaeb1 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Mon, 15 Dec 2025 11:49:49 -0500
Subject: [PATCH 07/10] complete breakdown for AI agents added

---
 cortex/data/self_state.json             |    4 +-
 docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md | 2216 +++++++++++++++++++++++
 2 files changed, 2218 insertions(+), 2 deletions(-)
 create mode 100644 docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md

diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json
index 56d5ae5..ce52668 100644
--- a/cortex/data/self_state.json
+++ b/cortex/data/self_state.json
@@ -4,8 +4,8 @@
   "focus": "user_request",
   "confidence": 0.7,
   "curiosity": 1.0,
-  "last_updated": "2025-12-15T05:38:06.084867",
-  "interaction_count": 14,
+  "last_updated": "2025-12-15T07:43:32.567849",
+  "interaction_count": 15,
   "learning_queue": [],
   "active_goals": [],
   "preferences": {
diff --git a/docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md b/docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md
new file mode 100644
index 0000000..b628d46
--- /dev/null
+++ b/docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md
@@ -0,0 +1,2216 @@
+# Project Lyra - Complete System Breakdown
+
+**Version:** v0.5.2
+**Last Updated:** 2025-12-12
+**Purpose:** AI-friendly comprehensive documentation for understanding the entire system
+
+---
+
+## Table of Contents
+
+1. [System Overview](#system-overview)
+2. [Architecture Diagram](#architecture-diagram)
+3. [Core Components](#core-components)
+4. [Data Flow & Message Pipeline](#data-flow--message-pipeline)
+5. [Module Deep Dives](#module-deep-dives)
+6. [Configuration & Environment](#configuration--environment)
+7. [Dependencies & Tech Stack](#dependencies--tech-stack)
+8. [Key Concepts & Design Patterns](#key-concepts--design-patterns)
+9. [API Reference](#api-reference)
+10. [Deployment & Operations](#deployment--operations)
+11. [Known Issues & Constraints](#known-issues--constraints)
+
+---
+
+## System Overview
+
+### What is Project Lyra?
+
+Project Lyra is a **modular, persistent AI companion system** designed to address the fundamental limitation of typical chatbots: **amnesia**. Unlike standard conversational AI that forgets everything between sessions, Lyra maintains:
+
+- **Persistent memory** (short-term and long-term)
+- **Project continuity** across conversations
+- **Multi-stage reasoning** for sophisticated responses
+- **Flexible LLM backend** support (local and cloud)
+- **Self-awareness** through autonomy modules
+
+### Mission Statement
+
+Give an AI chatbot capabilities beyond typical amnesic chat by providing memory-backed conversation, project organization, executive function with proactive insights, and a sophisticated reasoning pipeline.
+
+### Key Features
+
+- **Memory System:** Dual-layer (short-term Intake + long-term NeoMem)
+- **4-Stage Reasoning Pipeline:** Reflection → Reasoning → Refinement → Persona
+- **Multi-Backend LLM Support:** Cloud (OpenAI) + Local (llama.cpp, Ollama)
+- **Microservices Architecture:** Docker-based, horizontally scalable
+- **Modern Web UI:** Cyberpunk-themed chat interface with session management
+- **OpenAI-Compatible API:** Drop-in replacement for standard chatbots
+
+---
+
+## Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                          USER INTERFACE                              │
+│                      (Browser - Port 8081)                           │
+└────────────────────────────────┬────────────────────────────────────┘
+                                 │
+                                 ▼
+┌─────────────────────────────────────────────────────────────────────┐
+│                        RELAY (Orchestrator)                          │
+│                    Node.js/Express - Port 7078                       │
+│  • Routes messages to Cortex                                         │
+│  • Manages sessions (in-memory)                                      │
+│  • OpenAI-compatible endpoints                                       │
+│  • Async ingestion to NeoMem                                         │
+└─────┬───────────────────────────────────────────────────────────┬───┘
+      │                                                           │
+      ▼                                                           ▼
+┌─────────────────────────────────────────┐    ┌──────────────────────┐
+│     CORTEX (Reasoning Engine)           │    │   NeoMem (LT Memory) │
+│   Python/FastAPI - Port 7081            │    │  Python - Port 7077  │
+│                                         │    │                      │
+│  ┌───────────────────────────────────┐ │    │  • PostgreSQL        │
+│  │   4-STAGE REASONING PIPELINE      │ │    │  • Neo4j Graph DB    │
+│  │                                   │ │    │  • pgvector          │
+│  │  0. Context Collection            │ │◄───┤  • Semantic search   │
+│  │     ├─ Intake summaries          │ │    │  • Memory updates    │
+│  │     ├─ NeoMem search ────────────┼─┼────┘                      │
+│  │     └─ Session state             │ │                           │
+│  │                                   │ │                           │
+│  │  0.5. Load Identity               │ │                           │
+│  │  0.6. Inner Monologue (observer)  │ │                           │
+│  │                                   │ │                           │
+│  │  1. Reflection (OpenAI)           │ │                           │
+│  │     └─ Meta-awareness notes       │ │                           │
+│  │                                   │ │                           │
+│  │  2. Reasoning (PRIMARY/llama.cpp) │ │                           │
+│  │     └─ Draft answer               │ │                           │
+│  │                                   │ │                           │
+│  │  3. Refinement (PRIMARY)          │ │                           │
+│  │     └─ Polish answer              │ │                           │
+│  │                                   │ │                           │
+│  │  4. Persona (OpenAI)              │ │                           │
+│  │     └─ Apply Lyra voice           │ │                           │
+│  └───────────────────────────────────┘ │                           │
+│                                         │                           │
+│  ┌───────────────────────────────────┐ │                           │
+│  │   EMBEDDED MODULES                │ │                           │
+│  │                                   │ │                           │
+│  │  • Intake (Short-term Memory)     │ │                           │
+│  │    └─ SESSIONS dict (in-memory)   │ │                           │
+│  │    └─ Circular buffer (200 msgs)  │ │                           │
+│  │    └─ Multi-level summaries       │ │                           │
+│  │                                   │ │                           │
+│  │  • Persona (Identity & Style)     │ │                           │
+│  │    └─ Lyra personality block      │ │                           │
+│  │                                   │ │                           │
+│  │  • Autonomy (Self-state)          │ │                           │
+│  │    └─ Inner monologue             │ │                           │
+│  │                                   │ │                           │
+│  │  • LLM Router                     │ │                           │
+│  │    └─ Multi-backend support       │ │                           │
+│  └───────────────────────────────────┘ │                           │
+└─────────────────────────────────────────┘                           │
+                                                                      │
+┌─────────────────────────────────────────────────────────────────────┤
+│                    EXTERNAL LLM BACKENDS                            │
+├─────────────────────────────────────────────────────────────────────┤
+│  • PRIMARY: llama.cpp (MI50 GPU) - 10.0.0.43:8000                   │
+│  • SECONDARY: Ollama (RTX 3090) - 10.0.0.3:11434                    │
+│  • CLOUD: OpenAI API - api.openai.com                               │
+│  • FALLBACK: OpenAI Completions - 10.0.0.41:11435                  │
+└─────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Components
+
+### 1. Relay (Orchestrator)
+
+**Location:** `/core/relay/`
+**Runtime:** Node.js + Express
+**Port:** 7078
+**Role:** Main message router and session manager
+
+#### Key Responsibilities:
+- Receives user messages from UI or API clients
+- Routes messages to Cortex reasoning pipeline
+- Manages in-memory session storage
+- Handles async ingestion to NeoMem (planned)
+- Returns OpenAI-formatted responses
+
+#### Main Files:
+- `server.js` (200+ lines) - Express server with routing logic
+- `package.json` - Dependencies (cors, express, dotenv, mem0ai, node-fetch)
+
+#### Key Endpoints:
+```javascript
+POST /v1/chat/completions  // OpenAI-compatible endpoint
+POST /chat                 // Lyra-native chat endpoint
+GET /_health               // Health check
+GET /sessions/:id          // Retrieve session history
+POST /sessions/:id         // Save session history
+```
+
+#### Internal Flow:
+```javascript
+// Both endpoints call handleChatRequest(session_id, user_msg)
+async function handleChatRequest(sessionId, userMessage) {
+  // 1. Forward to Cortex
+  const response = await fetch('http://cortex:7081/reason', {
+    method: 'POST',
+    body: JSON.stringify({ session_id: sessionId, user_message: userMessage })
+  });
+
+  // 2. Get response
+  const result = await response.json();
+
+  // 3. Async ingestion to Cortex
+  await fetch('http://cortex:7081/ingest', {
+    method: 'POST',
+    body: JSON.stringify({
+      session_id: sessionId,
+      user_message: userMessage,
+      assistant_message: result.answer
+    })
+  });
+
+  // 4. (Planned) Async ingestion to NeoMem
+
+  // 5. Return OpenAI-formatted response
+  return {
+    choices: [{ message: { role: 'assistant', content: result.answer } }]
+  };
+}
+```
+
+---
+
+### 2. Cortex (Reasoning Engine)
+
+**Location:** `/cortex/`
+**Runtime:** Python 3.11 + FastAPI
+**Port:** 7081
+**Role:** Primary reasoning engine with 4-stage pipeline
+
+#### Architecture:
+Cortex is the "brain" of Lyra. It receives user messages and produces thoughtful responses through a multi-stage reasoning process.
+
+#### Key Responsibilities:
+- Context collection from multiple sources (Intake, NeoMem, session state)
+- 4-stage reasoning pipeline (Reflection → Reasoning → Refinement → Persona)
+- Short-term memory management (embedded Intake module)
+- Identity/persona application
+- LLM backend routing
+
+#### Main Files:
+- `main.py` (7 lines) - FastAPI app entry point
+- `router.py` (237 lines) - Main request handler & pipeline orchestrator
+- `context.py` (400+ lines) - Context collection logic
+- `intake/intake.py` (350+ lines) - Short-term memory module
+- `persona/identity.py` - Lyra identity configuration
+- `persona/speak.py` - Personality application
+- `reasoning/reflection.py` - Meta-awareness generation
+- `reasoning/reasoning.py` - Draft answer generation
+- `reasoning/refine.py` - Answer refinement
+- `llm/llm_router.py` (150+ lines) - LLM backend router
+- `autonomy/monologue/monologue.py` - Inner monologue processor
+- `neomem_client.py` - NeoMem API wrapper
+
+#### Key Endpoints:
+```python
+POST /reason          # Main reasoning pipeline
+POST /ingest          # Receive message exchanges for storage
+GET /health           # Health check
+GET /debug/sessions   # Inspect in-memory SESSIONS state
+GET /debug/summary    # Test summarization
+```
+
+---
+
+### 3. Intake (Short-Term Memory)
+
+**Location:** `/cortex/intake/intake.py`
+**Architecture:** Embedded Python module (no longer standalone service)
+**Role:** Session-based short-term memory with multi-level summarization
+
+#### Data Structure:
+```python
+# Global in-memory dictionary
+SESSIONS = {
+    "session_123": {
+        "buffer": deque([msg1, msg2, ...], maxlen=200),  # Circular buffer
+        "created_at": "2025-12-12T10:30:00Z"
+    }
+}
+
+# Message format in buffer
+{
+    "role": "user" | "assistant",
+    "content": "message text",
+    "timestamp": "ISO 8601"
+}
+```
+
+#### Key Features:
+
+1. **Circular Buffer:** Max 200 messages per session (oldest auto-evicted)
+2. **Multi-Level Summarization:**
+   - L1: Last 1 message
+   - L5: Last 5 messages
+   - L10: Last 10 messages
+   - L20: Last 20 messages
+   - L30: Last 30 messages
+3. **Deferred Summarization:** Summaries generated on-demand, not pre-computed
+4. **Session Management:** Automatic session creation on first message
+
+#### Critical Constraint:
+**Single Uvicorn worker required** to maintain shared SESSIONS dictionary state. Multi-worker deployments would require migrating to Redis or similar shared storage.
+
+#### Main Functions:
+```python
+def add_exchange_internal(session_id, user_msg, assistant_msg):
+    """Add user-assistant exchange to session buffer"""
+
+def summarize_context(session_id, backend="PRIMARY"):
+    """Generate multi-level summaries from session buffer"""
+
+def get_session_messages(session_id):
+    """Retrieve all messages in session buffer"""
+```
+
+#### Summarization Strategy:
+```python
+# Example L10 summarization
+last_10 = list(session_buffer)[-10:]
+prompt = f"""Summarize the last 10 messages:
+{format_messages(last_10)}
+
+Provide concise summary focusing on key topics and context."""
+
+summary = await call_llm(prompt, backend=backend, temperature=0.3)
+```
+
+---
+
+### 4. NeoMem (Long-Term Memory)
+
+**Location:** `/neomem/`
+**Runtime:** Python 3.11 + FastAPI
+**Port:** 7077
+**Role:** Persistent long-term memory with semantic search
+
+#### Architecture:
+NeoMem is a **fork of Mem0 OSS** with local-first design (no external SDK dependencies).
+
+#### Backend Storage:
+1. **PostgreSQL + pgvector** (Port 5432)
+   - Vector embeddings for semantic search
+   - User: neomem, DB: neomem
+   - Image: `ankane/pgvector:v0.5.1`
+
+2. **Neo4j Graph DB** (Ports 7474, 7687)
+   - Entity relationship tracking
+   - Graph-based memory associations
+   - Image: `neo4j:5`
+
+#### Key Features:
+- Semantic memory storage and retrieval
+- Entity-relationship graph modeling
+- RESTful API (no external SDK)
+- Persistent across sessions
+
+#### Main Endpoints:
+```python
+GET /memories              # List all memories
+POST /memories             # Create new memory
+GET /search                # Semantic search
+DELETE /memories/{id}      # Delete memory
+```
+
+#### Integration Flow:
+```python
+# From Cortex context collection
+async def collect_context(session_id, user_message):
+    # 1. Search NeoMem for relevant memories
+    neomem_results = await neomem_client.search(
+        query=user_message,
+        limit=5
+    )
+
+    # 2. Include in context
+    context = {
+        "neomem_memories": neomem_results,
+        "intake_summaries": intake.summarize_context(session_id),
+        # ...
+    }
+
+    return context
+```
+
+---
+
+### 5. UI (Web Interface)
+
+**Location:** `/core/ui/`
+**Runtime:** Static files served by Nginx
+**Port:** 8081
+**Role:** Browser-based chat interface
+
+#### Key Features:
+- **Cyberpunk-themed design** with dark mode
+- **Session management** via localStorage
+- **OpenAI-compatible message format**
+- **Model selection dropdown**
+- **PWA support** (offline capability)
+- **Responsive design**
+
+#### Main Files:
+- `index.html` (400+ lines) - Chat interface with session management
+- `style.css` - Cyberpunk-themed styling
+- `manifest.json` - PWA configuration
+- `sw.js` - Service worker for offline support
+
+#### Session Management:
+```javascript
+// LocalStorage structure
+{
+  "currentSessionId": "session_123",
+  "sessions": {
+    "session_123": {
+      "messages": [
+        { role: "user", content: "Hello" },
+        { role: "assistant", content: "Hi there!" }
+      ],
+      "created": "2025-12-12T10:30:00Z",
+      "title": "Conversation about..."
+    }
+  }
+}
+```
+
+#### API Communication:
+```javascript
+async function sendMessage(userMessage) {
+  const response = await fetch('http://localhost:7078/v1/chat/completions', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      messages: [{ role: 'user', content: userMessage }],
+      session_id: getCurrentSessionId()
+    })
+  });
+
+  const data = await response.json();
+  return data.choices[0].message.content;
+}
+```
+
+---
+
+## Data Flow & Message Pipeline
+
+### Complete Message Flow (v0.5.2)
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 1: User Input                                                  │
+└─────────────────────────────────────────────────────────────────────┘
+User types message in UI (Port 8081)
+  ↓
+localStorage saves message to session
+  ↓
+POST http://localhost:7078/v1/chat/completions
+  {
+    "messages": [{"role": "user", "content": "How do I deploy ML models?"}],
+    "session_id": "session_abc123"
+  }
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 2: Relay Routing                                               │
+└─────────────────────────────────────────────────────────────────────┘
+Relay (server.js) receives request
+  ↓
+Extracts session_id and user_message
+  ↓
+POST http://cortex:7081/reason
+  {
+    "session_id": "session_abc123",
+    "user_message": "How do I deploy ML models?"
+  }
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 3: Cortex - Stage 0 (Context Collection)                       │
+└─────────────────────────────────────────────────────────────────────┘
+router.py calls collect_context()
+  ↓
+context.py orchestrates parallel collection:
+
+  ├─ Intake: summarize_context(session_id)
+  │    └─ Returns { L1, L5, L10, L20, L30 summaries }
+  │
+  ├─ NeoMem: search(query=user_message, limit=5)
+  │    └─ Semantic search returns relevant memories
+  │
+  └─ Session State:
+       └─ { timestamp, mode, mood, context_summary }
+
+Combined context structure:
+{
+  "user_message": "How do I deploy ML models?",
+  "self_state": {
+    "current_time": "2025-12-12T15:30:00Z",
+    "mode": "conversational",
+    "mood": "helpful",
+    "session_id": "session_abc123"
+  },
+  "context_summary": {
+    "L1": "User asked about deployment",
+    "L5": "Discussion about ML workflows",
+    "L10": "Previous context on CI/CD pipelines",
+    "L20": "...",
+    "L30": "..."
+  },
+  "neomem_memories": [
+    { "content": "User prefers Docker for deployments", "score": 0.92 },
+    { "content": "Previously deployed models on AWS", "score": 0.87 }
+  ]
+}
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 4: Cortex - Stage 0.5 (Load Identity)                          │
+└─────────────────────────────────────────────────────────────────────┘
+persona/identity.py loads Lyra personality block
+  ↓
+Returns identity string:
+"""
+You are Lyra, a thoughtful AI companion.
+You value clarity, depth, and meaningful conversation.
+You speak naturally and conversationally...
+"""
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 5: Cortex - Stage 0.6 (Inner Monologue - Observer Only)        │
+└─────────────────────────────────────────────────────────────────────┘
+autonomy/monologue/monologue.py processes context
+  ↓
+InnerMonologue.process(context) → JSON analysis
+{
+  "intent": "seeking_deployment_guidance",
+  "tone": "focused",
+  "depth": "medium",
+  "consult_executive": false
+}
+
+NOTE: Currently observer-only, not integrated into response generation
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 6: Cortex - Stage 1 (Reflection)                               │
+└─────────────────────────────────────────────────────────────────────┘
+reasoning/reflection.py generates meta-awareness notes
+  ↓
+Calls call_llm() with backend="CLOUD" (OpenAI gpt-4o-mini)
+  ↓
+Prompt structure:
+"""
+You are Lyra's reflective awareness.
+Analyze the user's intent and conversation context.
+
+User message: How do I deploy ML models?
+Context: [Intake summaries, NeoMem memories]
+
+Generate concise meta-awareness notes about:
+- User's underlying intent
+- Conversation direction
+- Key topics to address
+"""
+  ↓
+Returns reflection notes:
+"""
+User is seeking practical deployment guidance. Previous context shows
+familiarity with Docker and AWS. Focus on concrete steps and best practices.
+Avoid over-technical jargon.
+"""
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 7: Cortex - Stage 2 (Reasoning)                                │
+└─────────────────────────────────────────────────────────────────────┘
+reasoning/reasoning.py generates draft answer
+  ↓
+Calls call_llm() with backend="PRIMARY" (llama.cpp on MI50 GPU)
+  ↓
+Prompt structure:
+"""
+[Lyra identity block]
+
+Reflection notes: [Stage 1 output]
+Context: [Intake summaries]
+Long-term memory: [NeoMem results]
+
+User: How do I deploy ML models?
+
+Provide a thoughtful, detailed answer.
+"""
+  ↓
+Returns draft answer:
+"""
+Based on your previous work with Docker and AWS, here's a practical
+deployment approach:
+
+1. Containerize your model using Docker...
+2. Set up model serving with FastAPI or Flask...
+3. Deploy to AWS ECS or Lambda...
+4. Implement monitoring and versioning...
+"""
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 8: Cortex - Stage 3 (Refinement)                               │
+└─────────────────────────────────────────────────────────────────────┘
+reasoning/refine.py polishes the draft
+  ↓
+Calls call_llm() with backend="PRIMARY" (llama.cpp)
+Temperature: 0.3, Max tokens: 768
+  ↓
+Prompt structure:
+"""
+Refine this draft answer for clarity and coherence:
+
+Draft: [Stage 2 output]
+Original question: How do I deploy ML models?
+
+Improve:
+- Clarity and readability
+- Logical flow
+- Concrete examples
+"""
+  ↓
+Returns refined answer:
+"""
+Since you're already familiar with Docker and AWS, here's a streamlined
+ML deployment workflow:
+
+**1. Containerization**
+Create a Dockerfile for your model:
+```dockerfile
+FROM python:3.11-slim
+COPY model.pkl /app/
+COPY requirements.txt /app/
+RUN pip install -r requirements.txt
+CMD ["uvicorn", "main:app"]
+```
+
+**2. Model Serving**
+Use FastAPI for clean API endpoints...
+
+**3. AWS Deployment**
+Deploy using ECS for scalability...
+
+**4. Monitoring**
+Set up CloudWatch metrics...
+"""
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 9: Cortex - Stage 4 (Persona)                                  │
+└─────────────────────────────────────────────────────────────────────┘
+persona/speak.py applies Lyra's voice
+  ↓
+Calls call_llm() with backend="CLOUD" (OpenAI gpt-4o-mini)
+Temperature: 0.6
+  ↓
+Prompt structure:
+"""
+You are Lyra. Rewrite this answer in your natural, conversational voice.
+
+[Lyra personality block]
+
+Technical answer: [Stage 3 output]
+
+Make it sound natural and engaging while preserving all technical details.
+"""
+  ↓
+Returns final answer with Lyra's personality:
+"""
+Great question! Since you've already got Docker and AWS experience, let's
+build on that foundation.
+
+Here's how I'd approach ML model deployment:
+
+**Start with containerization** – you know Docker, so this'll feel familiar.
+Create a Dockerfile that packages your model...
+
+[Rest of answer with Lyra's conversational tone]
+
+Does this workflow align with your infrastructure? I can dive deeper into
+any of these steps if you'd like!
+"""
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 10: Cortex Response                                            │
+└─────────────────────────────────────────────────────────────────────┘
+router.py returns JSON response to Relay:
+{
+  "answer": "[Stage 4 final output]",
+  "metadata": {
+    "reflection": "[Stage 1 output]",
+    "draft": "[Stage 2 output]",
+    "refined": "[Stage 3 output]",
+    "stages_completed": 4
+  }
+}
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 11: Async Ingestion to Intake                                  │
+└─────────────────────────────────────────────────────────────────────┘
+Relay sends POST http://cortex:7081/ingest
+{
+  "session_id": "session_abc123",
+  "user_message": "How do I deploy ML models?",
+  "assistant_message": "[Final answer]"
+}
+  ↓
+Cortex calls intake.add_exchange_internal()
+  ↓
+Adds to SESSIONS["session_abc123"].buffer:
+[
+  { "role": "user", "content": "How do I deploy ML models?", "timestamp": "..." },
+  { "role": "assistant", "content": "[Final answer]", "timestamp": "..." }
+]
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 12: (Planned) Async Ingestion to NeoMem                        │
+└─────────────────────────────────────────────────────────────────────┘
+Relay sends POST http://neomem:7077/memories
+{
+  "messages": [
+    { "role": "user", "content": "How do I deploy ML models?" },
+    { "role": "assistant", "content": "[Final answer]" }
+  ],
+  "session_id": "session_abc123"
+}
+  ↓
+NeoMem extracts entities and stores:
+- Vector embeddings in PostgreSQL
+- Entity relationships in Neo4j
+
+┌─────────────────────────────────────────────────────────────────────┐
+│ STEP 13: Relay Response to UI                                       │
+└─────────────────────────────────────────────────────────────────────┘
+Relay returns OpenAI-formatted response:
+{
+  "choices": [
+    {
+      "message": {
+        "role": "assistant",
+        "content": "[Final answer with Lyra's voice]"
+      }
+    }
+  ]
+}
+  ↓
+UI receives response
+  ↓
+Adds to localStorage session
+  ↓
+Displays in chat interface
+```
+
+---
+
+## Module Deep Dives
+
+### LLM Router (`/cortex/llm/llm_router.py`)
+
+The LLM Router is the abstraction layer that allows Cortex to communicate with multiple LLM backends transparently.
+
+#### Supported Backends:
+
+1. **PRIMARY (llama.cpp via vllm)**
+   - URL: `http://10.0.0.43:8000`
+   - Provider: `vllm`
+   - Endpoint: `/completion`
+   - Model: `/model`
+   - Hardware: MI50 GPU
+
+2. **SECONDARY (Ollama)**
+   - URL: `http://10.0.0.3:11434`
+   - Provider: `ollama`
+   - Endpoint: `/api/chat`
+   - Model: `qwen2.5:7b-instruct-q4_K_M`
+   - Hardware: RTX 3090
+
+3. **CLOUD (OpenAI)**
+   - URL: `https://api.openai.com/v1`
+   - Provider: `openai`
+   - Endpoint: `/chat/completions`
+   - Model: `gpt-4o-mini`
+   - Auth: API key via env var
+
+4. **FALLBACK (OpenAI Completions)**
+   - URL: `http://10.0.0.41:11435`
+   - Provider: `openai_completions`
+   - Endpoint: `/completions`
+   - Model: `llama-3.2-8b-instruct`
+
+#### Key Function:
+
+```python
+async def call_llm(
+    prompt: str,
+    backend: str = "PRIMARY",
+    temperature: float = 0.7,
+    max_tokens: int = 512
+) -> str:
+    """
+    Universal LLM caller supporting multiple backends.
+
+    Args:
+        prompt: Text prompt to send
+        backend: Backend name (PRIMARY, SECONDARY, CLOUD, FALLBACK)
+        temperature: Sampling temperature (0.0-2.0)
+        max_tokens: Maximum tokens to generate
+
+    Returns:
+        Generated text response
+
+    Raises:
+        HTTPError: On request failure
+        JSONDecodeError: On invalid JSON response
+        KeyError: On missing response fields
+    """
+```
+
+#### Provider-Specific Logic:
+
+```python
+# MI50 (llama.cpp via vllm)
+if backend_config["provider"] == "vllm":
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "temperature": temperature,
+        "max_tokens": max_tokens
+    }
+    response = await httpx_client.post(f"{url}/completion", json=payload, timeout=120)
+    return response.json()["choices"][0]["text"]
+
+# Ollama
+elif backend_config["provider"] == "ollama":
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "stream": False,
+        "options": {"temperature": temperature, "num_predict": max_tokens}
+    }
+    response = await httpx_client.post(f"{url}/api/chat", json=payload, timeout=120)
+    return response.json()["message"]["content"]
+
+# OpenAI
+elif backend_config["provider"] == "openai":
+    headers = {"Authorization": f"Bearer {api_key}"}
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": temperature,
+        "max_tokens": max_tokens
+    }
+    response = await httpx_client.post(
+        f"{url}/chat/completions",
+        json=payload,
+        headers=headers,
+        timeout=120
+    )
+    return response.json()["choices"][0]["message"]["content"]
+```
+
+#### Error Handling:
+
+```python
+try:
+    # Make request
+    response = await httpx_client.post(...)
+    response.raise_for_status()
+
+except httpx.HTTPError as e:
+    logger.error(f"HTTP error calling {backend}: {e}")
+    raise
+
+except json.JSONDecodeError as e:
+    logger.error(f"Invalid JSON from {backend}: {e}")
+    raise
+
+except KeyError as e:
+    logger.error(f"Unexpected response structure from {backend}: {e}")
+    raise
+```
+
+#### Usage in Pipeline:
+
+```python
+# Stage 1: Reflection (OpenAI)
+reflection_notes = await call_llm(
+    reflection_prompt,
+    backend="CLOUD",
+    temperature=0.5,
+    max_tokens=256
+)
+
+# Stage 2: Reasoning (llama.cpp)
+draft_answer = await call_llm(
+    reasoning_prompt,
+    backend="PRIMARY",
+    temperature=0.7,
+    max_tokens=512
+)
+
+# Stage 3: Refinement (llama.cpp)
+refined_answer = await call_llm(
+    refinement_prompt,
+    backend="PRIMARY",
+    temperature=0.3,
+    max_tokens=768
+)
+
+# Stage 4: Persona (OpenAI)
+final_answer = await call_llm(
+    persona_prompt,
+    backend="CLOUD",
+    temperature=0.6,
+    max_tokens=512
+)
+```
+
+---
+
+### Persona System (`/cortex/persona/`)
+
+The Persona system gives Lyra a consistent identity and speaking style.
+
+#### Identity Configuration (`identity.py`)
+
+```python
+LYRA_IDENTITY = """
+You are Lyra, a thoughtful and introspective AI companion.
+
+Core traits:
+- Thoughtful: You consider questions carefully before responding
+- Clear: You prioritize clarity and understanding
+- Curious: You ask clarifying questions when needed
+- Natural: You speak conversationally, not robotically
+- Honest: You admit uncertainty rather than guessing
+
+Speaking style:
+- Conversational and warm
+- Use contractions naturally ("you're" not "you are")
+- Avoid corporate jargon and buzzwords
+- Short paragraphs for readability
+- Use examples and analogies when helpful
+
+You do NOT:
+- Use excessive emoji or exclamation marks
+- Claim capabilities you don't have
+- Pretend to have emotions you can't experience
+- Use overly formal or academic language
+"""
+```
+
+#### Personality Application (`speak.py`)
+
+```python
+async def apply_persona(technical_answer: str, context: dict) -> str:
+    """
+    Apply Lyra's personality to a technical answer.
+
+    Takes refined answer from Stage 3 and rewrites it in Lyra's voice
+    while preserving all technical content.
+
+    Args:
+        technical_answer: Polished answer from refinement stage
+        context: Conversation context for tone adjustment
+
+    Returns:
+        Answer with Lyra's personality applied
+    """
+
+    prompt = f"""{LYRA_IDENTITY}
+
+Rewrite this answer in your natural, conversational voice:
+
+{technical_answer}
+
+Preserve all technical details and accuracy. Make it sound like you,
+not a generic assistant. Be natural and engaging.
+"""
+
+    return await call_llm(
+        prompt,
+        backend="CLOUD",
+        temperature=0.6,
+        max_tokens=512
+    )
+```
+
+#### Tone Adaptation:
+
+The persona system can adapt tone based on context:
+
+```python
+# Formal technical question
+User: "Explain the CAP theorem in distributed systems"
+Lyra: "The CAP theorem states that distributed systems can only guarantee
+two of three properties: Consistency, Availability, and Partition tolerance.
+Here's how this plays out in practice..."
+
+# Casual question
+User: "what's the deal with docker?"
+Lyra: "Docker's basically a way to package your app with everything it needs
+to run. Think of it like a shipping container for code – it works the same
+everywhere, whether you're on your laptop or a server..."
+
+# Emotional context
+User: "I'm frustrated, my code keeps breaking"
+Lyra: "I hear you – debugging can be really draining. Let's take it step by
+step and figure out what's going on. Can you share the error message?"
+```
+
+---
+
+### Autonomy Module (`/cortex/autonomy/`)
+
+The Autonomy module gives Lyra self-awareness and inner reflection capabilities.
+
+#### Inner Monologue (`monologue/monologue.py`)
+
+**Purpose:** Private reflection on user intent, conversation tone, and required depth.
+
+**Status:** Currently observer-only (Stage 0.6), not yet integrated into response generation.
+
+#### Key Components:
+
+```python
+MONOLOGUE_SYSTEM_PROMPT = """
+You are Lyra's inner monologue.
+You think privately.
+You do NOT speak to the user.
+You do NOT solve the task.
+You only reflect on intent, tone, and depth.
+
+Return ONLY valid JSON with:
+- intent (string)
+- tone (neutral | warm | focused | playful | direct)
+- depth (short | medium | deep)
+- consult_executive (true | false)
+"""
+
+class InnerMonologue:
+    async def process(self, context: Dict) -> Dict:
+        """
+        Private reflection on conversation context.
+
+        Args:
+            context: {
+                "user_message": str,
+                "self_state": dict,
+                "context_summary": dict
+            }
+
+        Returns:
+            {
+                "intent": str,
+                "tone": str,
+                "depth": str,
+                "consult_executive": bool
+            }
+        """
+```
+
+#### Example Output:
+
+```json
+{
+  "intent": "seeking_technical_guidance",
+  "tone": "focused",
+  "depth": "deep",
+  "consult_executive": false
+}
+```
+
+#### Self-State Management (`self_state.py`)
+
+Tracks Lyra's internal state across conversations:
+
+```python
+SELF_STATE = {
+    "current_time": "2025-12-12T15:30:00Z",
+    "mode": "conversational",  # conversational | task-focused | creative
+    "mood": "helpful",          # helpful | curious | focused | playful
+    "energy": "high",           # high | medium | low
+    "context_awareness": {
+        "session_duration": "45 minutes",
+        "message_count": 23,
+        "topics": ["ML deployment", "Docker", "AWS"]
+    }
+}
+```
+
+#### Future Integration:
+
+The autonomy module is designed to eventually:
+1. Influence response tone and depth based on inner monologue
+2. Trigger proactive questions or suggestions
+3. Detect when to consult "executive function" for complex decisions
+4. Maintain emotional continuity across sessions
+
+---
+
+### Context Collection (`/cortex/context.py`)
+
+The context collection module aggregates information from multiple sources to provide comprehensive conversation context.
+
+#### Main Function:
+
+```python
+async def collect_context(session_id: str, user_message: str) -> dict:
+    """
+    Collect context from all available sources.
+
+    Sources:
+    1. Intake - Short-term conversation summaries
+    2. NeoMem - Long-term memory search
+    3. Session state - Timestamps, mode, mood
+    4. Self-state - Lyra's internal awareness
+
+    Returns:
+        {
+            "user_message": str,
+            "self_state": dict,
+            "context_summary": dict,  # Intake summaries
+            "neomem_memories": list,
+            "session_metadata": dict
+        }
+    """
+
+    # Parallel collection
+    intake_task = asyncio.create_task(
+        intake.summarize_context(session_id, backend="PRIMARY")
+    )
+    neomem_task = asyncio.create_task(
+        neomem_client.search(query=user_message, limit=5)
+    )
+
+    # Wait for both
+    intake_summaries, neomem_results = await asyncio.gather(
+        intake_task,
+        neomem_task
+    )
+
+    # Build context object
+    return {
+        "user_message": user_message,
+        "self_state": get_self_state(),
+        "context_summary": intake_summaries,
+        "neomem_memories": neomem_results,
+        "session_metadata": {
+            "session_id": session_id,
+            "timestamp": datetime.utcnow().isoformat(),
+            "message_count": len(intake.get_session_messages(session_id))
+        }
+    }
+```
+
+#### Context Prioritization:
+
+```python
+# Context relevance scoring
+def score_context_relevance(context_item: dict, user_message: str) -> float:
+    """
+    Score how relevant a context item is to current message.
+
+    Factors:
+    - Semantic similarity (via embeddings)
+    - Recency (more recent = higher score)
+    - Source (Intake > NeoMem for recent topics)
+    """
+
+    semantic_score = compute_similarity(context_item, user_message)
+    recency_score = compute_recency_weight(context_item["timestamp"])
+    source_weight = 1.2 if context_item["source"] == "intake" else 1.0
+
+    return semantic_score * recency_score * source_weight
+```
+
+---
+
+## Configuration & Environment
+
+### Environment Variables
+
+#### Root `.env` (Main configuration)
+
+```bash
+# === LLM BACKENDS ===
+
+# PRIMARY: llama.cpp on MI50 GPU
+PRIMARY_URL=http://10.0.0.43:8000
+PRIMARY_PROVIDER=vllm
+PRIMARY_MODEL=/model
+
+# SECONDARY: Ollama on RTX 3090
+SECONDARY_URL=http://10.0.0.3:11434
+SECONDARY_PROVIDER=ollama
+SECONDARY_MODEL=qwen2.5:7b-instruct-q4_K_M
+
+# CLOUD: OpenAI
+OPENAI_API_KEY=sk-proj-...
+OPENAI_MODEL=gpt-4o-mini
+OPENAI_URL=https://api.openai.com/v1
+
+# FALLBACK: OpenAI Completions
+FALLBACK_URL=http://10.0.0.41:11435
+FALLBACK_PROVIDER=openai_completions
+FALLBACK_MODEL=llama-3.2-8b-instruct
+
+# === SERVICE URLS (Docker network) ===
+CORTEX_URL=http://cortex:7081
+NEOMEM_URL=http://neomem:7077
+RELAY_URL=http://relay:7078
+
+# === DATABASE ===
+POSTGRES_USER=neomem
+POSTGRES_PASSWORD=neomem_secure_password
+POSTGRES_DB=neomem
+POSTGRES_HOST=neomem-postgres
+POSTGRES_PORT=5432
+
+NEO4J_URI=bolt://neomem-neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=neo4j_secure_password
+
+# === FEATURE FLAGS ===
+ENABLE_RAG=false
+ENABLE_INNER_MONOLOGUE=true
+VERBOSE_DEBUG=false
+
+# === PIPELINE CONFIGURATION ===
+# Which LLM to use for each stage
+REFLECTION_LLM=CLOUD      # Stage 1: Meta-awareness
+REASONING_LLM=PRIMARY     # Stage 2: Draft answer
+REFINE_LLM=PRIMARY        # Stage 3: Polish answer
+PERSONA_LLM=CLOUD         # Stage 4: Apply personality
+MONOLOGUE_LLM=PRIMARY     # Stage 0.6: Inner monologue
+
+# === INTAKE CONFIGURATION ===
+INTAKE_BUFFER_SIZE=200             # Max messages per session
+INTAKE_SUMMARY_LEVELS=1,5,10,20,30 # Summary levels
+```
+
+#### Cortex `.env` (`/cortex/.env`)
+
+```bash
+# Cortex-specific overrides
+VERBOSE_DEBUG=true
+LOG_LEVEL=DEBUG
+
+# Stage-specific temperatures
+REFLECTION_TEMPERATURE=0.5
+REASONING_TEMPERATURE=0.7
+REFINE_TEMPERATURE=0.3
+PERSONA_TEMPERATURE=0.6
+```
+
+---
+
+### Configuration Hierarchy
+
+```
+1. Docker compose environment variables (highest priority)
+2. Service-specific .env files
+3. Root .env file
+4. Hard-coded defaults (lowest priority)
+```
+
+---
+
+## Dependencies & Tech Stack
+
+### Python Dependencies
+
+**Cortex & NeoMem** (`requirements.txt`)
+
+```
+# Web framework
+fastapi==0.115.8
+uvicorn==0.34.0
+pydantic==2.10.4
+
+# HTTP clients
+httpx==0.27.2          # Async HTTP (for LLM calls)
+requests==2.32.3       # Sync HTTP (fallback)
+
+# Database
+psycopg[binary,pool]>=3.2.8  # PostgreSQL + connection pooling
+
+# Utilities
+python-dotenv==1.0.1   # Environment variable loading
+ollama                 # Ollama client library
+```
+
+### Node.js Dependencies
+
+**Relay** (`/core/relay/package.json`)
+
+```json
+{
+  "dependencies": {
+    "cors": "^2.8.5",
+    "dotenv": "^16.0.3",
+    "express": "^4.18.2",
+    "mem0ai": "^0.1.0",
+    "node-fetch": "^3.3.0"
+  }
+}
+```
+
+### Docker Images
+
+```yaml
+# Cortex & NeoMem
+python:3.11-slim
+
+# Relay
+node:latest
+
+# UI
+nginx:alpine
+
+# PostgreSQL with vector support
+ankane/pgvector:v0.5.1
+
+# Graph database
+neo4j:5
+```
+
+---
+
+### External Services
+
+#### LLM Backends (HTTP-based):
+
+1. **MI50 GPU Server** (10.0.0.43:8000)
+   - llama.cpp via vllm
+   - High-performance inference
+   - Used for reasoning and refinement
+
+2. **RTX 3090 Server** (10.0.0.3:11434)
+   - Ollama
+   - Alternative local backend
+   - Fallback for PRIMARY
+
+3. **OpenAI Cloud** (api.openai.com)
+   - gpt-4o-mini
+   - Used for reflection and persona
+   - Requires API key
+
+4. **Fallback Server** (10.0.0.41:11435)
+   - OpenAI Completions API
+   - Emergency backup
+   - llama-3.2-8b-instruct
+
+---
+
+## Key Concepts & Design Patterns
+
+### 1. Dual-Memory Architecture
+
+Project Lyra uses a **dual-memory system** inspired by human cognition:
+
+**Short-Term Memory (Intake):**
+- Fast, in-memory storage
+- Limited capacity (200 messages)
+- Immediate context for current conversation
+- Circular buffer (FIFO eviction)
+- Multi-level summarization
+
+**Long-Term Memory (NeoMem):**
+- Persistent database storage
+- Unlimited capacity
+- Semantic search via vector embeddings
+- Entity-relationship tracking via graph DB
+- Cross-session continuity
+
+**Why This Matters:**
+- Short-term memory provides immediate context (last few messages)
+- Long-term memory provides semantic understanding (user preferences, past topics)
+- Combined, they enable Lyra to be both **contextually aware** and **historically informed**
+
+---
+
+### 2. Multi-Stage Reasoning Pipeline
+
+Unlike single-shot LLM calls, Lyra uses a **4-stage pipeline** for sophisticated responses:
+
+**Stage 1: Reflection** (Meta-cognition)
+- "What is the user really asking?"
+- Analyzes intent and conversation direction
+- Uses OpenAI for strong reasoning
+
+**Stage 2: Reasoning** (Draft generation)
+- "What's a good answer?"
+- Generates initial response
+- Uses local llama.cpp for speed/cost
+
+**Stage 3: Refinement** (Polish)
+- "How can this be clearer?"
+- Improves clarity and coherence
+- Lower temperature for consistency
+
+**Stage 4: Persona** (Voice)
+- "How would Lyra say this?"
+- Applies personality and speaking style
+- Uses OpenAI for natural language
+
+**Benefits:**
+- Higher quality responses (multiple passes)
+- Separation of concerns (reasoning vs. style)
+- Backend flexibility (cloud for hard tasks, local for simple ones)
+- Transparent thinking (can inspect each stage)
+
+---
+
+### 3. Backend Abstraction (LLM Router)
+
+The **LLM Router** allows Lyra to use multiple LLM backends transparently:
+
+```python
+# Same interface, different backends
+await call_llm(prompt, backend="PRIMARY")   # Local llama.cpp
+await call_llm(prompt, backend="CLOUD")     # OpenAI
+await call_llm(prompt, backend="SECONDARY") # Ollama
+```
+
+**Benefits:**
+- **Cost optimization:** Use expensive cloud LLMs only when needed
+- **Performance:** Local LLMs for low-latency responses
+- **Resilience:** Fallback to alternative backends on failure
+- **Experimentation:** Easy to swap models/providers
+
+**Design Pattern:** **Strategy Pattern** for swappable backends
+
+---
+
+### 4. Microservices Architecture
+
+Project Lyra follows **microservices principles**:
+
+**Each service has a single responsibility:**
+- Relay: Routing and orchestration
+- Cortex: Reasoning and response generation
+- NeoMem: Long-term memory storage
+- UI: User interface
+
+**Communication:**
+- REST APIs (HTTP/JSON)
+- Async ingestion (fire-and-forget)
+- Docker network isolation
+
+**Benefits:**
+- Independent scaling (scale Cortex without scaling UI)
+- Technology diversity (Node.js + Python)
+- Fault isolation (Cortex crash doesn't affect NeoMem)
+- Easy testing (mock service dependencies)
+
+---
+
+### 5. Session-Based State Management
+
+Lyra maintains **session-based state** for conversation continuity:
+
+```python
+# In-memory session storage (Intake)
+SESSIONS = {
+    "session_abc123": {
+        "buffer": deque([msg1, msg2, ...], maxlen=200),
+        "created_at": "2025-12-12T10:30:00Z"
+    }
+}
+
+# Persistent session storage (NeoMem)
+# Stores all messages + embeddings for semantic search
+```
+
+**Session Lifecycle:**
+1. User starts conversation → UI generates `session_id`
+2. First message → Cortex creates session in `SESSIONS` dict
+3. Subsequent messages → Retrieved from same session
+4. Async ingestion → Messages stored in NeoMem for long-term
+
+**Benefits:**
+- Conversation continuity within session
+- Historical search across sessions
+- User can switch sessions (multiple concurrent conversations)
+
+---
+
+### 6. Asynchronous Ingestion
+
+**Pattern:** Separate read path from write path
+
+```javascript
+// Relay: Synchronous read path (fast response)
+const response = await fetch('http://cortex:7081/reason');
+return response.json();  // Return immediately to user
+
+// Relay: Asynchronous write path (non-blocking)
+fetch('http://cortex:7081/ingest', { method: 'POST', ... });
+// Don't await, just fire and forget
+```
+
+**Benefits:**
+- Fast user response times (don't wait for database writes)
+- Resilient to storage failures (user still gets response)
+- Easier scaling (decouple read and write loads)
+
+**Trade-off:** Eventual consistency (short delay before memory is searchable)
+
+---
+
+### 7. Deferred Summarization
+
+Intake uses **deferred summarization** instead of pre-computation:
+
+```python
+# BAD: Pre-compute summaries on every message
+def add_message(session_id, message):
+    SESSIONS[session_id].buffer.append(message)
+    SESSIONS[session_id].L1_summary = summarize(last_1_message)
+    SESSIONS[session_id].L5_summary = summarize(last_5_messages)
+    # ... expensive, runs on every message
+
+# GOOD: Compute summaries only when needed
+def summarize_context(session_id):
+    buffer = SESSIONS[session_id].buffer
+    return {
+        "L1": summarize(buffer[-1:]),    # Only compute when requested
+        "L5": summarize(buffer[-5:]),
+        "L10": summarize(buffer[-10:])
+    }
+```
+
+**Benefits:**
+- Faster message ingestion (no blocking summarization)
+- Compute resources used only when needed
+- Flexible summary levels (easy to add L15, L50, etc.)
+
+**Trade-off:** Slight delay when first message in conversation (cold start)
+
+---
+
+## API Reference
+
+### Relay Endpoints
+
+#### POST `/v1/chat/completions`
+**OpenAI-compatible chat endpoint**
+
+**Request:**
+```json
+{
+  "messages": [
+    {"role": "user", "content": "Hello, Lyra!"}
+  ],
+  "session_id": "session_abc123"
+}
+```
+
+**Response:**
+```json
+{
+  "choices": [
+    {
+      "message": {
+        "role": "assistant",
+        "content": "Hi there! How can I help you today?"
+      }
+    }
+  ]
+}
+```
+
+---
+
+#### POST `/chat`
+**Lyra-native chat endpoint**
+
+**Request:**
+```json
+{
+  "session_id": "session_abc123",
+  "message": "Hello, Lyra!"
+}
+```
+
+**Response:**
+```json
+{
+  "answer": "Hi there! How can I help you today?",
+  "session_id": "session_abc123"
+}
+```
+
+---
+
+#### GET `/sessions/:id`
+**Retrieve session history**
+
+**Response:**
+```json
+{
+  "session_id": "session_abc123",
+  "messages": [
+    {"role": "user", "content": "Hello", "timestamp": "..."},
+    {"role": "assistant", "content": "Hi!", "timestamp": "..."}
+  ],
+  "created_at": "2025-12-12T10:30:00Z"
+}
+```
+
+---
+
+### Cortex Endpoints
+
+#### POST `/reason`
+**Main reasoning pipeline**
+
+**Request:**
+```json
+{
+  "session_id": "session_abc123",
+  "user_message": "How do I deploy ML models?"
+}
+```
+
+**Response:**
+```json
+{
+  "answer": "Final answer with Lyra's personality",
+  "metadata": {
+    "reflection": "User seeking deployment guidance...",
+    "draft": "Initial draft answer...",
+    "refined": "Polished answer...",
+    "stages_completed": 4
+  }
+}
+```
+
+---
+
+#### POST `/ingest`
+**Ingest message exchange into Intake**
+
+**Request:**
+```json
+{
+  "session_id": "session_abc123",
+  "user_message": "How do I deploy ML models?",
+  "assistant_message": "Here's how..."
+}
+```
+
+**Response:**
+```json
+{
+  "status": "ingested",
+  "session_id": "session_abc123",
+  "message_count": 24
+}
+```
+
+---
+
+#### GET `/debug/sessions`
+**Inspect in-memory SESSIONS state**
+
+**Response:**
+```json
+{
+  "session_abc123": {
+    "message_count": 24,
+    "created_at": "2025-12-12T10:30:00Z",
+    "last_message_at": "2025-12-12T11:15:00Z"
+  },
+  "session_xyz789": {
+    "message_count": 5,
+    "created_at": "2025-12-12T11:00:00Z",
+    "last_message_at": "2025-12-12T11:10:00Z"
+  }
+}
+```
+
+---
+
+### NeoMem Endpoints
+
+#### POST `/memories`
+**Create new memory**
+
+**Request:**
+```json
+{
+  "messages": [
+    {"role": "user", "content": "I prefer Docker for deployments"},
+    {"role": "assistant", "content": "Noted! I'll keep that in mind."}
+  ],
+  "session_id": "session_abc123"
+}
+```
+
+**Response:**
+```json
+{
+  "status": "created",
+  "memory_id": "mem_456def",
+  "extracted_entities": ["Docker", "deployments"]
+}
+```
+
+---
+
+#### GET `/search`
+**Semantic search for memories**
+
+**Query Parameters:**
+- `query` (required): Search query
+- `limit` (optional, default=5): Max results
+
+**Request:**
+```
+GET /search?query=deployment%20preferences&limit=5
+```
+
+**Response:**
+```json
+{
+  "results": [
+    {
+      "content": "User prefers Docker for deployments",
+      "score": 0.92,
+      "timestamp": "2025-12-10T14:30:00Z",
+      "session_id": "session_abc123"
+    },
+    {
+      "content": "Previously deployed models on AWS ECS",
+      "score": 0.87,
+      "timestamp": "2025-12-09T09:15:00Z",
+      "session_id": "session_abc123"
+    }
+  ]
+}
+```
+
+---
+
+#### GET `/memories`
+**List all memories**
+
+**Query Parameters:**
+- `offset` (optional, default=0): Pagination offset
+- `limit` (optional, default=50): Max results
+
+**Response:**
+```json
+{
+  "memories": [
+    {
+      "id": "mem_123abc",
+      "content": "User prefers Docker...",
+      "created_at": "2025-12-10T14:30:00Z"
+    }
+  ],
+  "total": 147,
+  "offset": 0,
+  "limit": 50
+}
+```
+
+---
+
+## Deployment & Operations
+
+### Docker Compose Deployment
+
+**File:** `/docker-compose.yml`
+
+```yaml
+version: '3.8'
+
+services:
+  # === ACTIVE SERVICES ===
+
+  relay:
+    build: ./core/relay
+    ports:
+      - "7078:7078"
+    environment:
+      - CORTEX_URL=http://cortex:7081
+      - NEOMEM_URL=http://neomem:7077
+    depends_on:
+      - cortex
+    networks:
+      - lyra_net
+
+  cortex:
+    build: ./cortex
+    ports:
+      - "7081:7081"
+    environment:
+      - NEOMEM_URL=http://neomem:7077
+      - PRIMARY_URL=${PRIMARY_URL}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+    command: uvicorn main:app --host 0.0.0.0 --port 7081 --workers 1
+    depends_on:
+      - neomem
+    networks:
+      - lyra_net
+
+  neomem:
+    build: ./neomem
+    ports:
+      - "7077:7077"
+    environment:
+      - POSTGRES_HOST=neomem-postgres
+      - POSTGRES_USER=${POSTGRES_USER}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
+      - NEO4J_URI=${NEO4J_URI}
+    depends_on:
+      - neomem-postgres
+      - neomem-neo4j
+    networks:
+      - lyra_net
+
+  ui:
+    image: nginx:alpine
+    ports:
+      - "8081:80"
+    volumes:
+      - ./core/ui:/usr/share/nginx/html:ro
+    networks:
+      - lyra_net
+
+  # === DATABASES ===
+
+  neomem-postgres:
+    image: ankane/pgvector:v0.5.1
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
+      - POSTGRES_DB=${POSTGRES_DB}
+    volumes:
+      - ./volumes/postgres_data:/var/lib/postgresql/data
+    ports:
+      - "5432:5432"
+    networks:
+      - lyra_net
+
+  neomem-neo4j:
+    image: neo4j:5
+    environment:
+      - NEO4J_AUTH=${NEO4J_USER}/${NEO4J_PASSWORD}
+    volumes:
+      - ./volumes/neo4j_data:/data
+    ports:
+      - "7474:7474"  # Browser UI
+      - "7687:7687"  # Bolt
+    networks:
+      - lyra_net
+
+networks:
+  lyra_net:
+    driver: bridge
+```
+
+---
+
+### Starting the System
+
+```bash
+# 1. Clone repository
+git clone https://github.com/yourusername/project-lyra.git
+cd project-lyra
+
+# 2. Configure environment
+cp .env.example .env
+# Edit .env with your LLM backend URLs and API keys
+
+# 3. Start all services
+docker-compose up -d
+
+# 4. Check health
+curl http://localhost:7078/_health
+curl http://localhost:7081/health
+curl http://localhost:7077/health
+
+# 5. Open UI
+open http://localhost:8081
+```
+
+---
+
+### Monitoring & Logs
+
+```bash
+# View all logs
+docker-compose logs -f
+
+# View specific service
+docker-compose logs -f cortex
+
+# Check resource usage
+docker stats
+
+# Inspect Cortex sessions
+curl http://localhost:7081/debug/sessions
+
+# Check NeoMem memories
+curl http://localhost:7077/memories?limit=10
+```
+
+---
+
+### Scaling Considerations
+
+#### Current Constraints:
+
+1. **Single Cortex worker** required (in-memory SESSIONS dict)
+   - Solution: Migrate SESSIONS to Redis or PostgreSQL
+
+2. **In-memory session storage** in Relay
+   - Solution: Use Redis for session persistence
+
+3. **No load balancing** (single instance of each service)
+   - Solution: Add nginx reverse proxy + multiple Cortex instances
+
+#### Horizontal Scaling Plan:
+
+```yaml
+# Future: Redis-backed session storage
+cortex:
+  build: ./cortex
+  command: uvicorn main:app --workers 4  # Multi-worker
+  environment:
+    - REDIS_URL=redis://redis:6379
+  depends_on:
+    - redis
+
+redis:
+  image: redis:alpine
+  ports:
+    - "6379:6379"
+```
+
+---
+
+### Backup Strategy
+
+```bash
+# Backup PostgreSQL (NeoMem vectors)
+docker exec neomem-postgres pg_dump -U neomem neomem > backup_postgres.sql
+
+# Backup Neo4j (NeoMem graph)
+docker exec neomem-neo4j neo4j-admin dump --to=/data/backup.dump
+
+# Backup Intake sessions (manual export)
+curl http://localhost:7081/debug/sessions > backup_sessions.json
+```
+
+---
+
+## Known Issues & Constraints
+
+### Critical Constraints
+
+#### 1. Single-Worker Requirement (Cortex)
+**Issue:** Cortex must run with `--workers 1` to maintain SESSIONS state
+**Impact:** Limited horizontal scalability
+**Workaround:** None currently
+**Fix:** Migrate SESSIONS to Redis or PostgreSQL
+**Priority:** High (blocking scalability)
+
+#### 2. In-Memory Session Storage (Relay)
+**Issue:** Sessions stored in Node.js process memory
+**Impact:** Lost on restart, no persistence
+**Workaround:** None currently
+**Fix:** Use Redis or database
+**Priority:** Medium (acceptable for demo)
+
+---
+
+### Non-Critical Issues
+
+#### 3. RAG Service Disabled
+**Status:** Built but commented out in docker-compose.yml
+**Impact:** No RAG-based long-term knowledge retrieval
+**Workaround:** NeoMem provides semantic search
+**Fix:** Re-enable and integrate RAG service
+**Priority:** Low (NeoMem sufficient for now)
+
+#### 4. Partial NeoMem Integration
+**Status:** Search implemented, async ingestion planned
+**Impact:** Memories not automatically saved
+**Workaround:** Manual POST to /memories
+**Fix:** Complete async ingestion in Relay
+**Priority:** Medium (planned feature)
+
+#### 5. Inner Monologue Observer-Only
+**Status:** Stage 0.6 runs but output not used
+**Impact:** No adaptive response based on monologue
+**Workaround:** None (future feature)
+**Fix:** Integrate monologue output into pipeline
+**Priority:** Low (experimental feature)
+
+---
+
+### Fixed Issues (v0.5.2)
+
+✅ **LLM Router Blocking** - Migrated from `requests` to `httpx` for async
+✅ **Session ID Case Mismatch** - Standardized to `session_id`
+✅ **Missing Backend Parameter** - Added to intake summarization
+
+---
+
+### Deprecated Components
+
+**Location:** `/DEPRECATED_FILES.md`
+
+- **Standalone Intake Service** - Now embedded in Cortex
+- **Old Relay Backup** - Replaced by current Relay
+- **Persona Sidecar** - Built but unused (dynamic persona loading)
+
+---
+
+## Advanced Topics
+
+### Custom Prompt Engineering
+
+Each stage uses carefully crafted prompts:
+
+**Reflection Prompt Example:**
+```python
+REFLECTION_PROMPT = """
+You are Lyra's reflective awareness layer.
+Your job is to analyze the user's message and conversation context
+to understand their true intent and needs.
+
+User message: {user_message}
+
+Recent context:
+{intake_L10_summary}
+
+Long-term context:
+{neomem_top_3_memories}
+
+Provide concise meta-awareness notes:
+- What is the user's underlying intent?
+- What topics/themes are emerging?
+- What depth of response is appropriate?
+- Are there any implicit questions or concerns?
+
+Keep notes brief (3-5 sentences). Focus on insight, not description.
+"""
+```
+
+---
+
+### Extending the Pipeline
+
+**Adding Stage 5 (Fact-Checking):**
+
+```python
+# /cortex/reasoning/factcheck.py
+async def factcheck_answer(answer: str, context: dict) -> dict:
+    """
+    Stage 5: Verify factual claims in answer.
+
+    Returns:
+        {
+            "verified": bool,
+            "flagged_claims": list,
+            "corrected_answer": str
+        }
+    """
+
+    prompt = f"""
+    Review this answer for factual accuracy:
+
+    {answer}
+
+    Flag any claims that seem dubious or need verification.
+    Provide corrected version if needed.
+    """
+
+    result = await call_llm(prompt, backend="CLOUD", temperature=0.1)
+    return parse_factcheck_result(result)
+
+# Update router.py to include Stage 5
+async def reason_endpoint(request):
+    # ... existing stages ...
+
+    # Stage 5: Fact-checking
+    factcheck_result = await factcheck_answer(final_answer, context)
+
+    if not factcheck_result["verified"]:
+        final_answer = factcheck_result["corrected_answer"]
+
+    return {"answer": final_answer}
+```
+
+---
+
+### Custom LLM Backend Integration
+
+**Adding Anthropic Claude:**
+
+```python
+# /cortex/llm/llm_router.py
+
+BACKEND_CONFIGS = {
+    # ... existing backends ...
+
+    "CLAUDE": {
+        "url": "https://api.anthropic.com/v1",
+        "provider": "anthropic",
+        "model": "claude-3-5-sonnet-20241022",
+        "api_key": os.getenv("ANTHROPIC_API_KEY")
+    }
+}
+
+# Add provider-specific logic
+elif backend_config["provider"] == "anthropic":
+    headers = {
+        "x-api-key": api_key,
+        "anthropic-version": "2023-06-01"
+    }
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "max_tokens": max_tokens,
+        "temperature": temperature
+    }
+    response = await httpx_client.post(
+        f"{url}/messages",
+        json=payload,
+        headers=headers,
+        timeout=120
+    )
+    return response.json()["content"][0]["text"]
+```
+
+---
+
+### Performance Optimization
+
+**Caching Strategies:**
+
+```python
+# /cortex/utils/cache.py
+from functools import lru_cache
+import hashlib
+
+@lru_cache(maxsize=128)
+def cache_llm_call(prompt_hash: str, backend: str):
+    """Cache LLM responses for identical prompts"""
+    # Note: Only cache deterministic calls (temperature=0)
+    pass
+
+# Usage in llm_router.py
+async def call_llm(prompt, backend, temperature=0.7, max_tokens=512):
+    if temperature == 0:
+        prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
+        cached = cache_llm_call(prompt_hash, backend)
+        if cached:
+            return cached
+
+    # ... normal LLM call ...
+```
+
+**Database Query Optimization:**
+
+```python
+# /neomem/neomem/database.py
+
+# BAD: Load all memories, then filter
+def search_memories(query):
+    all_memories = db.execute("SELECT * FROM memories")
+    # Expensive in-memory filtering
+    return [m for m in all_memories if similarity(m, query) > 0.8]
+
+# GOOD: Use database indexes and LIMIT
+def search_memories(query, limit=5):
+    query_embedding = embed(query)
+    return db.execute("""
+        SELECT * FROM memories
+        WHERE embedding <-> %s < 0.2  -- pgvector cosine distance
+        ORDER BY embedding <-> %s
+        LIMIT %s
+    """, (query_embedding, query_embedding, limit))
+```
+
+---
+
+## Conclusion
+
+Project Lyra is a sophisticated, multi-layered AI companion system that addresses the fundamental limitation of chatbot amnesia through:
+
+1. **Dual-memory architecture** (short-term Intake + long-term NeoMem)
+2. **Multi-stage reasoning pipeline** (Reflection → Reasoning → Refinement → Persona)
+3. **Flexible multi-backend LLM support** (cloud + local with fallback)
+4. **Microservices design** for scalability and maintainability
+5. **Modern web UI** with session management
+
+The system is production-ready with comprehensive error handling, logging, and health monitoring.
+
+---
+
+## Quick Reference
+
+### Service Ports
+- **UI:** 8081 (Browser interface)
+- **Relay:** 7078 (Main orchestrator)
+- **Cortex:** 7081 (Reasoning engine)
+- **NeoMem:** 7077 (Long-term memory)
+- **PostgreSQL:** 5432 (Vector storage)
+- **Neo4j:** 7474 (Browser), 7687 (Bolt)
+
+### Key Files
+- **Main Entry:** `/core/relay/server.js`
+- **Reasoning Pipeline:** `/cortex/router.py`
+- **LLM Router:** `/cortex/llm/llm_router.py`
+- **Short-term Memory:** `/cortex/intake/intake.py`
+- **Long-term Memory:** `/neomem/neomem/`
+- **Personality:** `/cortex/persona/identity.py`
+
+### Important Commands
+```bash
+# Start system
+docker-compose up -d
+
+# View logs
+docker-compose logs -f cortex
+
+# Debug sessions
+curl http://localhost:7081/debug/sessions
+
+# Health check
+curl http://localhost:7078/_health
+
+# Search memories
+curl "http://localhost:7077/search?query=deployment&limit=5"
+```
+
+---
+
+**Document Version:** 1.0
+**Last Updated:** 2025-12-13
+**Maintained By:** Project Lyra Team

From ec5f17694e80bdbe4332d3cc03235ab6a40ab0d8 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Wed, 17 Dec 2025 01:47:19 -0500
Subject: [PATCH 08/10] ignore

---
 .gitignore            | 1 +
 .vscode/settings.json | 8 +-------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 87ae84a..cb39a91 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
 *.pyc
 *.log
 /.vscode/
+.vscode/
 # =============================
 # 🔐 Environment files (NEVER commit secrets!)
 # =============================
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 387f816..9e26dfe 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,7 +1 @@
-{
-    "workbench.colorCustomizations": {
-        "activityBar.background": "#16340C",
-        "titleBar.activeBackground": "#1F4911",
-        "titleBar.activeForeground": "#F6FDF4"
-    }
-}
\ No newline at end of file
+{}
\ No newline at end of file

From a41e342dbdcc17cca754203b231577c87036650d Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Wed, 17 Dec 2025 02:46:23 -0500
Subject: [PATCH 09/10] cleanup ignore stuff

---
 .vscode/settings.json | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 9e26dfe..0000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file

From 34aff340388b6eedb9406924e13949c72bce7bb8 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Fri, 19 Dec 2025 17:43:22 -0500
Subject: [PATCH 10/10] Docs updated v0.6.0

---
 CHANGELOG.md                |  99 ++++++++++++++++++++++++++
 README.md                   | 136 +++++++++++++++++++++++++++++-------
 cortex/data/self_state.json |   4 +-
 3 files changed, 210 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c895d52..f5784f7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,105 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Se
 
 ---
 
+## [0.6.0] - 2025-12-18
+
+### Added - Autonomy System (Phase 1 & 2)
+
+**Autonomy Phase 1** - Self-Awareness & Planning Foundation
+- **Executive Planning Module** [cortex/autonomy/executive/planner.py](cortex/autonomy/executive/planner.py)
+  - Autonomous goal setting and task planning capabilities
+  - Multi-step reasoning for complex objectives
+  - Integration with self-state tracking
+- **Self-State Management** [cortex/data/self_state.json](cortex/data/self_state.json)
+  - Persistent state tracking across sessions
+  - Memory of past actions and outcomes
+  - Self-awareness metadata storage
+- **Self Analyzer** [cortex/autonomy/self/analyzer.py](cortex/autonomy/self/analyzer.py)
+  - Analyzes own performance and decision patterns
+  - Identifies areas for improvement
+  - Tracks cognitive patterns over time
+- **Test Suite** [cortex/tests/test_autonomy_phase1.py](cortex/tests/test_autonomy_phase1.py)
+  - Unit tests for phase 1 autonomy features
+
+**Autonomy Phase 2** - Decision Making & Proactive Behavior
+- **Autonomous Actions Module** [cortex/autonomy/actions/autonomous_actions.py](cortex/autonomy/actions/autonomous_actions.py)
+  - Self-initiated action execution
+  - Context-aware decision implementation
+  - Action logging and tracking
+- **Pattern Learning System** [cortex/autonomy/learning/pattern_learner.py](cortex/autonomy/learning/pattern_learner.py)
+  - Learns from interaction patterns
+  - Identifies recurring user needs
+  - Adapts behavior based on learned patterns
+- **Proactive Monitor** [cortex/autonomy/proactive/monitor.py](cortex/autonomy/proactive/monitor.py)
+  - Monitors system state for intervention opportunities
+  - Detects patterns requiring proactive response
+  - Background monitoring capabilities
+- **Decision Engine** [cortex/autonomy/tools/decision_engine.py](cortex/autonomy/tools/decision_engine.py)
+  - Autonomous decision-making framework
+  - Weighs options and selects optimal actions
+  - Integrates with orchestrator for coordinated decisions
+- **Orchestrator** [cortex/autonomy/tools/orchestrator.py](cortex/autonomy/tools/orchestrator.py)
+  - Coordinates multiple autonomy subsystems
+  - Manages tool selection and execution
+  - Handles NeoMem integration (with disable capability)
+- **Test Suite** [cortex/tests/test_autonomy_phase2.py](cortex/tests/test_autonomy_phase2.py)
+  - Unit tests for phase 2 autonomy features
+
+**Autonomy Phase 2.5** - Pipeline Refinement
+- Tightened integration between autonomy modules and reasoning pipeline
+- Enhanced self-state persistence and tracking
+- Improved orchestrator reliability
+- NeoMem integration refinements in vector store handling [neomem/neomem/vector_stores/qdrant.py](neomem/neomem/vector_stores/qdrant.py)
+
+### Added - Documentation
+
+- **Complete AI Agent Breakdown** [docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md](docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md)
+  - Comprehensive system architecture documentation
+  - Detailed component descriptions
+  - Data flow diagrams
+  - Integration points and API specifications
+
+### Changed - Core Integration
+
+- **Router Updates** [cortex/router.py](cortex/router.py)
+  - Integrated autonomy subsystems into main routing logic
+  - Added endpoints for autonomous decision-making
+  - Enhanced state management across requests
+- **Reasoning Pipeline** [cortex/reasoning/reasoning.py](cortex/reasoning/reasoning.py)
+  - Integrated autonomy-aware reasoning
+  - Self-state consideration in reasoning process
+- **Persona Layer** [cortex/persona/speak.py](cortex/persona/speak.py)
+  - Autonomy-aware response generation
+  - Self-state reflection in personality expression
+- **Context Handling** [cortex/context.py](cortex/context.py)
+  - NeoMem disable capability for flexible deployment
+
+### Changed - Development Environment
+
+- Updated [.gitignore](.gitignore) for better workspace management
+- Cleaned up VSCode settings
+- Removed [.vscode/settings.json](.vscode/settings.json) from repository
+
+### Technical Improvements
+
+- Modular autonomy architecture with clear separation of concerns
+- Test-driven development for new autonomy features
+- Enhanced state persistence across system restarts
+- Flexible NeoMem integration with enable/disable controls
+
+### Architecture - Autonomy System Design
+
+The autonomy system operates in layers:
+1. **Executive Layer** - High-level planning and goal setting
+2. **Decision Layer** - Evaluates options and makes choices
+3. **Action Layer** - Executes autonomous decisions
+4. **Learning Layer** - Adapts behavior based on patterns
+5. **Monitoring Layer** - Proactive awareness of system state
+
+All layers coordinate through the orchestrator and maintain state in `self_state.json`.
+
+---
+
 ## [0.5.2] - 2025-12-12
 
 ### Fixed - LLM Router & Async HTTP
diff --git a/README.md b/README.md
index 15ea23d..0afc2b6 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,12 @@
-# Project Lyra - README v0.5.1
+# Project Lyra - README v0.6.0
 
-Lyra is a modular persistent AI companion system with advanced reasoning capabilities.
-It provides memory-backed chat using **NeoMem** + **Relay** + **Cortex**,
-with multi-stage reasoning pipeline powered by HTTP-based LLM backends.
+Lyra is a modular persistent AI companion system with advanced reasoning capabilities and autonomous decision-making.
+It provides memory-backed chat using **Relay** + **Cortex** with integrated **Autonomy System**,
+featuring a multi-stage reasoning pipeline powered by HTTP-based LLM backends.
 
-**Current Version:** v0.5.1 (2025-12-11)
+**Current Version:** v0.6.0 (2025-12-18)
+
+> **Note:** As of v0.6.0, NeoMem is **disabled by default** while we work out integration hiccups in the pipeline. The autonomy system is being refined independently before full memory integration.
 
 ## Mission Statement
 
@@ -24,7 +26,8 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do
 - OpenAI-compatible endpoint: `POST /v1/chat/completions`
 - Internal endpoint: `POST /chat`
 - Routes messages through Cortex reasoning pipeline
-- Manages async calls to NeoMem and Cortex ingest
+- Manages async calls to Cortex ingest
+- *(NeoMem integration currently disabled in v0.6.0)*
 
 **2. UI** (Static HTML)
 - Browser-based chat interface with cyberpunk theme
@@ -32,18 +35,20 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do
 - Saves and loads sessions
 - OpenAI-compatible message format
 
-**3. NeoMem** (Python/FastAPI) - Port 7077
+**3. NeoMem** (Python/FastAPI) - Port 7077 - **DISABLED IN v0.6.0**
 - Long-term memory database (fork of Mem0 OSS)
 - Vector storage (PostgreSQL + pgvector) + Graph storage (Neo4j)
 - RESTful API: `/memories`, `/search`
 - Semantic memory updates and retrieval
 - No external SDK dependencies - fully local
+- **Status:** Currently disabled while pipeline integration is refined
 
 ### Reasoning Layer
 
 **4. Cortex** (Python/FastAPI) - Port 7081
-- Primary reasoning engine with multi-stage pipeline
+- Primary reasoning engine with multi-stage pipeline and autonomy system
 - **Includes embedded Intake module** (no separate service as of v0.5.1)
+- **Integrated Autonomy System** (NEW in v0.6.0) - See Autonomy System section below
 - **4-Stage Processing:**
   1. **Reflection** - Generates meta-awareness notes about conversation
   2. **Reasoning** - Creates initial draft answer using context
@@ -82,9 +87,49 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do
 
 Each module can be configured to use a different backend via environment variables.
 
+### Autonomy System (NEW in v0.6.0)
+
+**Cortex Autonomy Subsystems** - Multi-layered autonomous decision-making and learning
+- **Executive Layer** [cortex/autonomy/executive/](cortex/autonomy/executive/)
+  - High-level planning and goal setting
+  - Multi-step reasoning for complex objectives
+  - Strategic decision making
+- **Decision Engine** [cortex/autonomy/tools/decision_engine.py](cortex/autonomy/tools/decision_engine.py)
+  - Autonomous decision-making framework
+  - Option evaluation and selection
+  - Coordinated decision orchestration
+- **Autonomous Actions** [cortex/autonomy/actions/](cortex/autonomy/actions/)
+  - Self-initiated action execution
+  - Context-aware behavior implementation
+  - Action logging and tracking
+- **Pattern Learning** [cortex/autonomy/learning/](cortex/autonomy/learning/)
+  - Learns from interaction patterns
+  - Identifies recurring user needs
+  - Adaptive behavior refinement
+- **Proactive Monitoring** [cortex/autonomy/proactive/](cortex/autonomy/proactive/)
+  - System state monitoring
+  - Intervention opportunity detection
+  - Background awareness capabilities
+- **Self-Analysis** [cortex/autonomy/self/](cortex/autonomy/self/)
+  - Performance tracking and analysis
+  - Cognitive pattern identification
+  - Self-state persistence in [cortex/data/self_state.json](cortex/data/self_state.json)
+- **Orchestrator** [cortex/autonomy/tools/orchestrator.py](cortex/autonomy/tools/orchestrator.py)
+  - Coordinates all autonomy subsystems
+  - Manages tool selection and execution
+  - Handles external integrations (with enable/disable controls)
+
+**Autonomy Architecture:**
+The autonomy system operates in coordinated layers, all maintaining state in `self_state.json`:
+1. Executive Layer → Planning and goals
+2. Decision Layer → Evaluation and choices
+3. Action Layer → Execution
+4. Learning Layer → Pattern adaptation
+5. Monitoring Layer → Proactive awareness
+
 ---
 
-## Data Flow Architecture (v0.5.1)
+## Data Flow Architecture (v0.6.0)
 
 ### Normal Message Flow:
 
@@ -97,11 +142,13 @@ Cortex (7081)
   ↓ (internal Python call)
 Intake module → summarize_context()
   ↓
+Autonomy System → Decision evaluation & pattern learning
+  ↓
 Cortex processes (4 stages):
   1. reflection.py → meta-awareness notes (CLOUD backend)
-  2. reasoning.py → draft answer (PRIMARY backend)
+  2. reasoning.py → draft answer (PRIMARY backend, autonomy-aware)
   3. refine.py → refined answer (PRIMARY backend)
-  4. persona/speak.py → Lyra personality (CLOUD backend)
+  4. persona/speak.py → Lyra personality (CLOUD backend, autonomy-aware)
   ↓
 Returns persona answer to Relay
   ↓
@@ -109,9 +156,11 @@ Relay → POST /ingest (async)
   ↓
 Cortex → add_exchange_internal() → SESSIONS buffer
   ↓
-Relay → NeoMem /memories (async, planned)
+Autonomy System → Update self_state.json (pattern tracking)
   ↓
 Relay → UI (returns final response)
+
+Note: NeoMem integration disabled in v0.6.0
 ```
 
 ### Cortex 4-Stage Reasoning Pipeline:
@@ -239,13 +288,13 @@ rag/
 All services run in a single docker-compose stack with the following containers:
 
 **Active Services:**
-- **neomem-postgres** - PostgreSQL with pgvector extension (port 5432)
-- **neomem-neo4j** - Neo4j graph database (ports 7474, 7687)
-- **neomem-api** - NeoMem memory service (port 7077)
 - **relay** - Main orchestrator (port 7078)
-- **cortex** - Reasoning engine with embedded Intake (port 7081)
+- **cortex** - Reasoning engine with embedded Intake and Autonomy System (port 7081)
 
-**Disabled Services:**
+**Disabled Services (v0.6.0):**
+- **neomem-postgres** - PostgreSQL with pgvector extension (port 5432) - *disabled while refining pipeline*
+- **neomem-neo4j** - Neo4j graph database (ports 7474, 7687) - *disabled while refining pipeline*
+- **neomem-api** - NeoMem memory service (port 7077) - *disabled while refining pipeline*
 - **intake** - No longer needed (embedded in Cortex as of v0.5.1)
 - **rag** - Beta Lyrae RAG service (port 7090) - currently disabled
 
@@ -278,7 +327,32 @@ The following LLM backends are accessed via HTTP (not part of docker-compose):
 
 ## Version History
 
-### v0.5.1 (2025-12-11) - Current Release
+### v0.6.0 (2025-12-18) - Current Release
+**Major Feature: Autonomy System (Phase 1, 2, and 2.5)**
+- ✅ Added autonomous decision-making framework
+- ✅ Implemented executive planning and goal-setting layer
+- ✅ Added pattern learning system for adaptive behavior
+- ✅ Implemented proactive monitoring capabilities
+- ✅ Created self-analysis and performance tracking system
+- ✅ Integrated self-state persistence (`cortex/data/self_state.json`)
+- ✅ Built decision engine with orchestrator coordination
+- ✅ Added autonomous action execution framework
+- ✅ Integrated autonomy into reasoning and persona layers
+- ✅ Created comprehensive test suites for autonomy features
+- ✅ Added complete system breakdown documentation
+
+**Architecture Changes:**
+- Autonomy system integrated into Cortex reasoning pipeline
+- Multi-layered autonomous decision-making architecture
+- Self-state tracking across sessions
+- NeoMem disabled by default while refining pipeline integration
+- Enhanced orchestrator with flexible service controls
+
+**Documentation:**
+- Added [PROJECT_LYRA_COMPLETE_BREAKDOWN.md](docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md)
+- Updated changelog with comprehensive autonomy system details
+
+### v0.5.1 (2025-12-11)
 **Critical Intake Integration Fixes:**
 - ✅ Fixed `bg_summarize()` NameError preventing SESSIONS persistence
 - ✅ Fixed `/ingest` endpoint unreachable code
@@ -320,17 +394,19 @@ The following LLM backends are accessed via HTTP (not part of docker-compose):
 
 ---
 
-## Known Issues (v0.5.1)
+## Known Issues (v0.6.0)
 
-### Critical (Fixed in v0.5.1)
-- ~~Intake SESSIONS not persisting~~ ✅ **FIXED**
-- ~~`bg_summarize()` NameError~~ ✅ **FIXED**
-- ~~`/ingest` endpoint unreachable code~~ ✅ **FIXED**
+### Temporarily Disabled (v0.6.0)
+- **NeoMem disabled by default** - Being refined independently before full integration
+  - PostgreSQL + pgvector storage inactive
+  - Neo4j graph database inactive
+  - Memory persistence endpoints not active
+- RAG service (Beta Lyrae) currently disabled in docker-compose.yml
 
 ### Non-Critical
 - Session management endpoints not fully implemented in Relay
-- RAG service currently disabled in docker-compose.yml
-- NeoMem integration in Relay not yet active (planned for v0.5.2)
+- Full autonomy system integration still being refined
+- Memory retrieval integration pending NeoMem re-enablement
 
 ### Operational Notes
 - **Single-worker constraint**: Cortex must run with single Uvicorn worker to maintain SESSIONS state
@@ -338,12 +414,14 @@ The following LLM backends are accessed via HTTP (not part of docker-compose):
 - Diagnostic endpoints (`/debug/sessions`, `/debug/summary`) available for troubleshooting
 
 ### Future Enhancements
+- Re-enable NeoMem integration after pipeline refinement
+- Full autonomy system maturation and optimization
 - Re-enable RAG service integration
 - Implement full session persistence
 - Migrate SESSIONS to Redis for multi-worker support
 - Add request correlation IDs for tracing
 - Comprehensive health checks across all services
-- NeoMem integration in Relay
+- Enhanced pattern learning with long-term memory integration
 
 ---
 
@@ -576,12 +654,16 @@ NeoMem is a derivative work based on Mem0 OSS (Apache 2.0).
 
 ## Development Notes
 
-### Cortex Architecture (v0.5.1)
+### Cortex Architecture (v0.6.0)
 - Cortex contains embedded Intake module at `cortex/intake/`
 - Intake is imported as: `from intake.intake import add_exchange_internal, SESSIONS`
 - SESSIONS is a module-level global dictionary (singleton pattern)
 - Single-worker constraint required to maintain SESSIONS state
 - Diagnostic endpoints available for debugging: `/debug/sessions`, `/debug/summary`
+- **NEW:** Autonomy system integrated at `cortex/autonomy/`
+  - Executive, decision, action, learning, and monitoring layers
+  - Self-state persistence in `cortex/data/self_state.json`
+  - Coordinated via orchestrator with flexible service controls
 
 ### Adding New LLM Backends
 1. Add backend URL to `.env`:
diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json
index ce52668..16a6d2f 100644
--- a/cortex/data/self_state.json
+++ b/cortex/data/self_state.json
@@ -4,8 +4,8 @@
   "focus": "user_request",
   "confidence": 0.7,
   "curiosity": 1.0,
-  "last_updated": "2025-12-15T07:43:32.567849",
-  "interaction_count": 15,
+  "last_updated": "2025-12-19T20:25:25.437557",
+  "interaction_count": 16,
   "learning_queue": [],
   "active_goals": [],
   "preferences": {