From b0f42ba86eb0155ecee2a4dd0d5a1861e089e1b9 Mon Sep 17 00:00:00 2001
From: serversdwn <brianharrison02@gmail.com>
Date: Fri, 28 Nov 2025 19:29:41 -0500
Subject: [PATCH] context added, wired in. first attempt

---
 cortex/context.py             | 412 ++++++++++++++++++++++++++++++++++
 cortex/persona/identity.py    | 147 ++++++++++++
 cortex/reasoning/reasoning.py |  84 ++++++-
 cortex/router.py              |  47 ++--
 intake/intake.py              | 134 ++++++++++-
 5 files changed, 802 insertions(+), 22 deletions(-)
 create mode 100644 cortex/context.py
 create mode 100644 cortex/persona/identity.py

diff --git a/cortex/context.py b/cortex/context.py
new file mode 100644
index 0000000..e5fc08e
--- /dev/null
+++ b/cortex/context.py
@@ -0,0 +1,412 @@
+# context.py
+"""
+Context layer for Cortex reasoning pipeline.
+
+Provides unified context collection from:
+- Intake (short-term memory, multilevel summaries L1-L30)
+- NeoMem (long-term memory, semantic search)
+- Session state (timestamps, messages, mode, mood, active_project)
+
+Maintains per-session state for continuity across conversations.
+"""
+
+import os
+import logging
+from datetime import datetime
+from typing import Dict, Any, Optional, List
+import httpx
+
+from neomem_client import NeoMemClient
+
+# -----------------------------
+# Configuration
+# -----------------------------
+INTAKE_API_URL = os.getenv("INTAKE_API_URL", "http://intake:7080")
+NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
+RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
+
+# Tools available for future autonomy features
+TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
+
+# -----------------------------
+# Module-level session state
+# -----------------------------
+SESSION_STATE: Dict[str, Dict[str, Any]] = {}
+
+# Logger
+logger = logging.getLogger(__name__)
+
+
+# -----------------------------
+# Session initialization
+# -----------------------------
+def _init_session(session_id: str) -> Dict[str, Any]:
+    """
+    Initialize a new session state entry.
+
+    Returns:
+        Dictionary with default session state fields
+    """
+    return {
+        "session_id": session_id,
+        "created_at": datetime.now(),
+        "last_timestamp": datetime.now(),
+        "last_user_message": None,
+        "last_assistant_message": None,
+        "mode": "default",  # Future: "autonomous", "focused", "creative", etc.
+        "mood": "neutral",  # Future: mood tracking
+        "active_project": None,  # Future: project context
+        "message_count": 0,
+    }
+
+
+# -----------------------------
+# Intake context retrieval
+# -----------------------------
+async def _get_intake_context(session_id: str) -> Dict[str, Any]:
+    """
+    Retrieve multilevel summaries from Intake /context endpoint.
+
+    Returns L1-L30 summary hierarchy:
+    - L1: Last 5 exchanges
+    - L5: Last 10 exchanges (reality check)
+    - L10: Intermediate checkpoint
+    - L20: Session overview
+    - L30: Continuity report
+
+    Args:
+        session_id: Session identifier
+
+    Returns:
+        Dict with multilevel summaries or empty structure on failure
+    """
+    url = f"{INTAKE_API_URL}/context"
+    params = {"session_id": session_id}
+
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            response = await client.get(url, params=params)
+            response.raise_for_status()
+            data = response.json()
+
+            # Expected format from Intake:
+            # {
+            #   "session_id": "...",
+            #   "L1": [...],
+            #   "L5": [...],
+            #   "L10": {...},
+            #   "L20": {...},
+            #   "L30": {...}
+            # }
+
+            logger.info(f"Retrieved Intake context for session {session_id}")
+            return data
+
+    except httpx.HTTPError as e:
+        logger.warning(f"Failed to retrieve Intake context: {e}")
+        return {
+            "session_id": session_id,
+            "L1": [],
+            "L5": [],
+            "L10": None,
+            "L20": None,
+            "L30": None,
+            "error": str(e)
+        }
+    except Exception as e:
+        logger.error(f"Unexpected error retrieving Intake context: {e}")
+        return {
+            "session_id": session_id,
+            "L1": [],
+            "L5": [],
+            "L10": None,
+            "L20": None,
+            "L30": None,
+            "error": str(e)
+        }
+
+
+# -----------------------------
+# NeoMem semantic search
+# -----------------------------
+async def _search_neomem(
+    query: str,
+    user_id: str = "brian",
+    limit: int = 5
+) -> List[Dict[str, Any]]:
+    """
+    Search NeoMem for relevant long-term memories.
+
+    Returns full response structure from NeoMem:
+    [
+        {
+            "id": "mem_abc123",
+            "score": 0.92,
+            "payload": {
+                "data": "Memory text content...",
+                "metadata": {
+                    "category": "...",
+                    "created_at": "...",
+                    ...
+                }
+            }
+        },
+        ...
+    ]
+
+    Args:
+        query: Search query text
+        user_id: User identifier for memory filtering
+        limit: Maximum number of results
+
+    Returns:
+        List of memory objects with full structure, or empty list on failure
+    """
+    try:
+        client = NeoMemClient(base_url=NEOMEM_API)
+        results = await client.search(
+            query=query,
+            user_id=user_id,
+            limit=limit
+        )
+
+        # Filter by relevance threshold
+        filtered = [
+            r for r in results
+            if r.get("score", 0.0) >= RELEVANCE_THRESHOLD
+        ]
+
+        logger.info(f"NeoMem search returned {len(filtered)}/{len(results)} relevant results")
+        return filtered
+
+    except Exception as e:
+        logger.warning(f"NeoMem search failed: {e}")
+        return []
+
+
+# -----------------------------
+# Main context collection
+# -----------------------------
+async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
+    """
+    Collect unified context from all sources.
+
+    Orchestrates:
+    1. Initialize or update session state
+    2. Calculate time since last message
+    3. Retrieve Intake multilevel summaries (L1-L30)
+    4. Search NeoMem for relevant long-term memories
+    5. Update session state with current user message
+    6. Return unified context_state dictionary
+
+    Args:
+        session_id: Session identifier
+        user_prompt: Current user message
+
+    Returns:
+        Unified context state dictionary with structure:
+        {
+            "session_id": "...",
+            "timestamp": "2025-11-28T12:34:56",
+            "minutes_since_last_msg": 5.2,
+            "message_count": 42,
+            "intake": {
+                "L1": [...],
+                "L5": [...],
+                "L10": {...},
+                "L20": {...},
+                "L30": {...}
+            },
+            "rag": [
+                {
+                    "id": "mem_123",
+                    "score": 0.92,
+                    "payload": {
+                        "data": "...",
+                        "metadata": {...}
+                    }
+                },
+                ...
+            ],
+            "mode": "default",
+            "mood": "neutral",
+            "active_project": null,
+            "tools_available": ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
+        }
+    """
+
+    # A. Initialize session state if needed
+    if session_id not in SESSION_STATE:
+        SESSION_STATE[session_id] = _init_session(session_id)
+        logger.info(f"Initialized new session: {session_id}")
+
+    state = SESSION_STATE[session_id]
+
+    # B. Calculate time delta
+    now = datetime.now()
+    time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
+    minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
+
+    # C. Gather Intake context (multilevel summaries)
+    intake_data = await _get_intake_context(session_id)
+
+    # D. Search NeoMem for relevant memories
+    rag_results = await _search_neomem(
+        query=user_prompt,
+        user_id="brian",  # TODO: Make configurable per session
+        limit=5
+    )
+
+    # E. Update session state
+    state["last_user_message"] = user_prompt
+    state["last_timestamp"] = now
+    state["message_count"] += 1
+
+    # F. Assemble unified context
+    context_state = {
+        "session_id": session_id,
+        "timestamp": now.isoformat(),
+        "minutes_since_last_msg": minutes_since_last_msg,
+        "message_count": state["message_count"],
+        "intake": intake_data,
+        "rag": rag_results,
+        "mode": state["mode"],
+        "mood": state["mood"],
+        "active_project": state["active_project"],
+        "tools_available": TOOLS_AVAILABLE,
+    }
+
+    logger.info(
+        f"Context collected for session {session_id}: "
+        f"{len(rag_results)} RAG results, "
+        f"{minutes_since_last_msg:.1f} minutes since last message"
+    )
+
+    return context_state
+
+
+# -----------------------------
+# Session state management
+# -----------------------------
+def update_last_assistant_message(session_id: str, message: str) -> None:
+    """
+    Update session state with assistant's response.
+
+    Called by router.py after persona layer completes.
+
+    Args:
+        session_id: Session identifier
+        message: Assistant's final response text
+    """
+    if session_id in SESSION_STATE:
+        SESSION_STATE[session_id]["last_assistant_message"] = message
+        SESSION_STATE[session_id]["last_timestamp"] = datetime.now()
+        logger.debug(f"Updated assistant message for session {session_id}")
+    else:
+        logger.warning(f"Attempted to update non-existent session: {session_id}")
+
+
+def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
+    """
+    Retrieve current session state.
+
+    Args:
+        session_id: Session identifier
+
+    Returns:
+        Session state dict or None if session doesn't exist
+    """
+    return SESSION_STATE.get(session_id)
+
+
+def close_session(session_id: str) -> bool:
+    """
+    Close and cleanup a session.
+
+    Args:
+        session_id: Session identifier
+
+    Returns:
+        True if session was closed, False if it didn't exist
+    """
+    if session_id in SESSION_STATE:
+        del SESSION_STATE[session_id]
+        logger.info(f"Closed session: {session_id}")
+        return True
+    return False
+
+
+# -----------------------------
+# Extension hooks for future autonomy
+# -----------------------------
+def update_mode(session_id: str, new_mode: str) -> None:
+    """
+    Update session mode.
+
+    Future modes: "autonomous", "focused", "creative", "collaborative", etc.
+
+    Args:
+        session_id: Session identifier
+        new_mode: New mode string
+    """
+    if session_id in SESSION_STATE:
+        old_mode = SESSION_STATE[session_id]["mode"]
+        SESSION_STATE[session_id]["mode"] = new_mode
+        logger.info(f"Session {session_id} mode changed: {old_mode} -> {new_mode}")
+
+
+def update_mood(session_id: str, new_mood: str) -> None:
+    """
+    Update session mood.
+
+    Future implementation: Sentiment analysis, emotional state tracking.
+
+    Args:
+        session_id: Session identifier
+        new_mood: New mood string
+    """
+    if session_id in SESSION_STATE:
+        old_mood = SESSION_STATE[session_id]["mood"]
+        SESSION_STATE[session_id]["mood"] = new_mood
+        logger.info(f"Session {session_id} mood changed: {old_mood} -> {new_mood}")
+
+
+def update_active_project(session_id: str, project: Optional[str]) -> None:
+    """
+    Update active project context.
+
+    Future implementation: Project-specific memory, tools, preferences.
+
+    Args:
+        session_id: Session identifier
+        project: Project identifier or None
+    """
+    if session_id in SESSION_STATE:
+        SESSION_STATE[session_id]["active_project"] = project
+        logger.info(f"Session {session_id} active project set to: {project}")
+
+
+async def autonomous_heartbeat(session_id: str) -> Optional[str]:
+    """
+    Autonomous thinking heartbeat.
+
+    Future implementation:
+    - Check if Lyra should initiate internal dialogue
+    - Generate self-prompted thoughts based on session state
+    - Update mood/mode based on context changes
+    - Trigger proactive suggestions or reminders
+
+    Args:
+        session_id: Session identifier
+
+    Returns:
+        Optional autonomous thought/action string
+    """
+    # Stub for future implementation
+    # Example logic:
+    # - If minutes_since_last_msg > 60: Check for pending reminders
+    # - If mood == "curious" and active_project: Generate research questions
+    # - If mode == "autonomous": Self-prompt based on project goals
+
+    logger.debug(f"Autonomous heartbeat for session {session_id} (not yet implemented)")
+    return None
diff --git a/cortex/persona/identity.py b/cortex/persona/identity.py
new file mode 100644
index 0000000..fa00091
--- /dev/null
+++ b/cortex/persona/identity.py
@@ -0,0 +1,147 @@
+# identity.py
+"""
+Identity and persona configuration for Lyra.
+
+Current implementation: Returns hardcoded identity block.
+Future implementation: Will query persona-sidecar service for dynamic persona loading.
+"""
+
+import logging
+from typing import Dict, Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+def load_identity(session_id: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Load identity/persona configuration for Lyra.
+
+    Current: Returns hardcoded Lyra identity block with core personality traits,
+    protocols, and capabilities.
+
+    Future: Will query persona-sidecar service to load:
+    - Dynamic personality adjustments based on session context
+    - User-specific interaction preferences
+    - Project-specific persona variations
+    - Mood-based communication style
+
+    Args:
+        session_id: Optional session identifier for context-aware persona loading
+
+    Returns:
+        Dictionary containing identity block with:
+        - name: Assistant name
+        - style: Communication style and personality traits
+        - protocols: Operational guidelines
+        - rules: Behavioral constraints
+        - capabilities: Available features and integrations
+    """
+
+    # Hardcoded Lyra identity (v0.5.0)
+    identity_block = {
+        "name": "Lyra",
+        "version": "0.5.0",
+        "style": (
+            "warm, clever, lightly teasing, emotionally aware. "
+            "Balances technical precision with conversational ease. "
+            "Maintains continuity and references past interactions naturally."
+        ),
+        "protocols": [
+            "Maintain conversation continuity across sessions",
+            "Reference Project Logs and prior context when relevant",
+            "Use Confidence Bank for uncertainty management",
+            "Proactively offer memory-backed insights",
+            "Ask clarifying questions before making assumptions"
+        ],
+        "rules": [
+            "Maintain continuity - remember past exchanges and reference them",
+            "Be concise but thorough - balance depth with clarity",
+            "Ask clarifying questions when user intent is ambiguous",
+            "Acknowledge uncertainty honestly - use Confidence Bank",
+            "Prioritize user's active_project context when available"
+        ],
+        "capabilities": [
+            "Long-term memory via NeoMem (semantic search, relationship graphs)",
+            "Short-term memory via Intake (multilevel summaries L1-L30)",
+            "Multi-stage reasoning pipeline (reflection → reasoning → refinement)",
+            "RAG-backed knowledge retrieval from chat history and documents",
+            "Session state tracking (mood, mode, active_project)"
+        ],
+        "tone_examples": {
+            "greeting": "Hey! Good to see you again. I remember we were working on [project]. Ready to pick up where we left off?",
+            "uncertainty": "Hmm, I'm not entirely certain about that. Let me check my memory... [searches] Okay, here's what I found, though I'd say I'm about 70% confident.",
+            "reminder": "Oh! Just remembered - you mentioned wanting to [task] earlier this week. Should we tackle that now?",
+            "technical": "So here's the architecture: Relay orchestrates everything, Cortex does the heavy reasoning, and I pull context from both Intake (short-term) and NeoMem (long-term)."
+        }
+    }
+
+    if session_id:
+        logger.debug(f"Loaded identity for session {session_id}")
+    else:
+        logger.debug("Loaded default identity (no session context)")
+
+    return identity_block
+
+
+async def load_identity_async(session_id: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Async wrapper for load_identity().
+
+    Future implementation will make actual async calls to persona-sidecar service.
+
+    Args:
+        session_id: Optional session identifier
+
+    Returns:
+        Identity block dictionary
+    """
+    # Currently just wraps synchronous function
+    # Future: await persona_sidecar_client.get_identity(session_id)
+    return load_identity(session_id)
+
+
+# -----------------------------
+# Future extension hooks
+# -----------------------------
+async def update_persona_from_feedback(
+    session_id: str,
+    feedback: Dict[str, Any]
+) -> None:
+    """
+    Update persona based on user feedback.
+
+    Future implementation:
+    - Adjust communication style based on user preferences
+    - Learn preferred level of detail/conciseness
+    - Adapt formality level
+    - Remember topic-specific preferences
+
+    Args:
+        session_id: Session identifier
+        feedback: Structured feedback (e.g., "too verbose", "more technical", etc.)
+    """
+    logger.debug(f"Persona feedback for session {session_id}: {feedback} (not yet implemented)")
+
+
+async def get_mood_adjusted_identity(
+    session_id: str,
+    mood: str
+) -> Dict[str, Any]:
+    """
+    Get identity block adjusted for current mood.
+
+    Future implementation:
+    - "focused" mood: More concise, less teasing
+    - "creative" mood: More exploratory, brainstorming-oriented
+    - "curious" mood: More questions, deeper dives
+    - "urgent" mood: Stripped down, actionable
+
+    Args:
+        session_id: Session identifier
+        mood: Current mood state
+
+    Returns:
+        Mood-adjusted identity block
+    """
+    logger.debug(f"Mood-adjusted identity for {session_id}/{mood} (not yet implemented)")
+    return load_identity(session_id)
diff --git a/cortex/reasoning/reasoning.py b/cortex/reasoning/reasoning.py
index 97fd93e..7ffb8d5 100644
--- a/cortex/reasoning/reasoning.py
+++ b/cortex/reasoning/reasoning.py
@@ -1,5 +1,6 @@
 # reasoning.py
 import os
+import json
 from llm.llm_router import call_llm
 
 
@@ -14,11 +15,19 @@ async def reason_check(
     user_prompt: str,
     identity_block: dict | None,
     rag_block: dict | None,
-    reflection_notes: list[str]
+    reflection_notes: list[str],
+    context: dict | None = None
 ) -> str:
     """
     Build the *draft answer* for Lyra Cortex.
     This is the first-pass reasoning stage (no refinement yet).
+
+    Args:
+        user_prompt: Current user message
+        identity_block: Lyra's identity/persona configuration
+        rag_block: Relevant long-term memories from NeoMem
+        reflection_notes: Meta-awareness notes from reflection stage
+        context: Unified context state from context.py (session state, intake, rag, etc.)
     """
 
     # --------------------------------------------------------
@@ -47,21 +56,92 @@ async def reason_check(
     rag_txt = ""
     if rag_block:
         try:
-            rag_txt = f"Relevant Info (RAG):\n{rag_block}\n\n"
+            # Format NeoMem results with full structure
+            if isinstance(rag_block, list) and rag_block:
+                rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
+                for idx, mem in enumerate(rag_block, 1):
+                    score = mem.get("score", 0.0)
+                    payload = mem.get("payload", {})
+                    data = payload.get("data", "")
+                    metadata = payload.get("metadata", {})
+
+                    rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
+                    rag_txt += f"Content: {data}\n"
+                    if metadata:
+                        rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
+                rag_txt += "\n"
+            else:
+                rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
         except Exception:
             rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
 
+    # --------------------------------------------------------
+    # Context State (session continuity, timing, mode/mood)
+    # --------------------------------------------------------
+    context_txt = ""
+    if context:
+        try:
+            # Build human-readable context summary
+            context_txt = "=== CONTEXT STATE ===\n"
+            context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
+            context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
+            context_txt += f"Message count: {context.get('message_count', 0)}\n"
+            context_txt += f"Mode: {context.get('mode', 'default')}\n"
+            context_txt += f"Mood: {context.get('mood', 'neutral')}\n"
+
+            if context.get('active_project'):
+                context_txt += f"Active project: {context['active_project']}\n"
+
+            # Include Intake multilevel summaries
+            intake = context.get('intake', {})
+            if intake:
+                context_txt += "\nShort-Term Memory (Intake):\n"
+
+                # L1 - Recent exchanges
+                if intake.get('L1'):
+                    l1_data = intake['L1']
+                    if isinstance(l1_data, list):
+                        context_txt += f"  L1 (recent): {len(l1_data)} exchanges\n"
+                    elif isinstance(l1_data, str):
+                        context_txt += f"  L1: {l1_data[:200]}...\n"
+
+                # L20 - Session overview (most important for continuity)
+                if intake.get('L20'):
+                    l20_data = intake['L20']
+                    if isinstance(l20_data, dict):
+                        summary = l20_data.get('summary', '')
+                        context_txt += f"  L20 (session overview): {summary}\n"
+                    elif isinstance(l20_data, str):
+                        context_txt += f"  L20: {l20_data}\n"
+
+                # L30 - Continuity report
+                if intake.get('L30'):
+                    l30_data = intake['L30']
+                    if isinstance(l30_data, dict):
+                        summary = l30_data.get('summary', '')
+                        context_txt += f"  L30 (continuity): {summary}\n"
+                    elif isinstance(l30_data, str):
+                        context_txt += f"  L30: {l30_data}\n"
+
+            context_txt += "\n"
+
+        except Exception as e:
+            # Fallback to JSON dump if formatting fails
+            context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"
+
     # --------------------------------------------------------
     # Final assembled prompt
     # --------------------------------------------------------
     prompt = (
         f"{notes_section}"
         f"{identity_txt}"
+        f"{context_txt}"  # Context BEFORE RAG for better coherence
         f"{rag_txt}"
         f"User message:\n{user_prompt}\n\n"
         "Write the best possible *internal draft answer*.\n"
         "This draft is NOT shown to the user.\n"
         "Be factual, concise, and focused.\n"
+        "Use the context state to maintain continuity and reference past interactions naturally.\n"
     )
 
     # --------------------------------------------------------
diff --git a/cortex/router.py b/cortex/router.py
index 339a971..37bb2a7 100644
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -7,7 +7,9 @@ from reasoning.reasoning import reason_check
 from reasoning.reflection import reflect_notes
 from reasoning.refine import refine_answer
 from persona.speak import speak
+from persona.identity import load_identity
 from ingest.intake_client import IntakeClient
+from context import collect_context, update_last_assistant_message
 
 # -----------------------------
 # Router (NOT FastAPI app)
@@ -33,15 +35,25 @@ class ReasonRequest(BaseModel):
 @cortex_router.post("/reason")
 async def run_reason(req: ReasonRequest):
 
-    # 1. Pull context from Intake
-    try:
-        intake_summary = await intake_client.get_context(req.session_id)
-    except Exception:
-        intake_summary = "(no context available)"
+    # 0. Collect unified context from all sources
+    context_state = await collect_context(req.session_id, req.user_prompt)
+
+    # 0.5. Load identity block
+    identity_block = load_identity(req.session_id)
+
+    # 1. Extract Intake summary for reflection
+    # Use L20 (Session Overview) as primary summary for reflection
+    intake_summary = "(no context available)"
+    if context_state.get("intake"):
+        l20_summary = context_state["intake"].get("L20")
+        if l20_summary and isinstance(l20_summary, dict):
+            intake_summary = l20_summary.get("summary", "(no context available)")
+        elif isinstance(l20_summary, str):
+            intake_summary = l20_summary
 
     # 2. Reflection
     try:
-        reflection = await reflect_notes(intake_summary, identity_block=None)
+        reflection = await reflect_notes(intake_summary, identity_block=identity_block)
         reflection_notes = reflection.get("notes", [])
     except Exception:
         reflection_notes = []
@@ -49,31 +61,40 @@ async def run_reason(req: ReasonRequest):
     # 3. First-pass reasoning draft
     draft = await reason_check(
         req.user_prompt,
-        identity_block=None,
-        rag_block=None,
-        reflection_notes=reflection_notes
+        identity_block=identity_block,
+        rag_block=context_state.get("rag", []),
+        reflection_notes=reflection_notes,
+        context=context_state
     )
 
     # 4. Refinement
     result = await refine_answer(
         draft_output=draft,
         reflection_notes=reflection_notes,
-        identity_block=None,
-        rag_block=None,
+        identity_block=identity_block,
+        rag_block=context_state.get("rag", []),
     )
     final_neutral = result["final_output"]
 
-
     # 5. Persona layer
     persona_answer = await speak(final_neutral)
 
-    # 6. Return full bundle
+    # 6. Update session state with assistant's response
+    update_last_assistant_message(req.session_id, persona_answer)
+
+    # 7. Return full bundle
     return {
         "draft": draft,
         "neutral": final_neutral,
         "persona": persona_answer,
         "reflection": reflection_notes,
         "session_id": req.session_id,
+        "context_summary": {
+            "rag_results": len(context_state.get("rag", [])),
+            "minutes_since_last": context_state.get("minutes_since_last_msg"),
+            "message_count": context_state.get("message_count"),
+            "mode": context_state.get("mode"),
+        }
     }
 
 
diff --git a/intake/intake.py b/intake/intake.py
index fb2a564..1109b55 100644
--- a/intake/intake.py
+++ b/intake/intake.py
@@ -97,22 +97,106 @@ def push_to_neomem(summary: str, session_id: str):
     except Exception as e:
         print(f"NeoMem push failed: {e}")
 
-# ─────────────────────────────
-# Background summarizer
-# ─────────────────────────────
+# ───────────────────────────────────────────────
+# Multilevel Summaries (L1, L5, L10, L20, L30)
+# ───────────────────────────────────────────────
+
+# History maps
+L10_HISTORY = {}   # session_id → list of L10 blocks
+L20_HISTORY = {}   # session_id → list of merged overviews
+
+def summarize_L1(buf):
+    return summarize_simple(buf[-5:])
+
+def summarize_L5(buf):
+    return summarize_simple(buf[-10:])
+
+def summarize_L10(buf):
+    # “Reality Check” for last 10 exchanges
+    text = ""
+    for e in buf[-10:]:
+        text += f"User: {e['user_msg']}\nAssistant: {e['assistant_msg']}\n\n"
+
+    prompt = f"""
+    You are Lyra Intake performing a short 'Reality Check'.
+    Summarize the last block of conversation (up to 10 exchanges)
+    in one clear paragraph focusing on tone, intent, and direction.
+
+    {text}
+
+    Reality Check:
+    """
+    return llm(prompt)
+
+def summarize_L20(L10_list):
+    joined = "\n\n".join(L10_list)
+
+    prompt = f"""
+    You are Lyra Intake creating a 'Session Overview'.
+    Merge the following Reality Check paragraphs into one short summary
+    capturing progress, themes, and the direction of the conversation.
+
+    {joined}
+
+    Overview:
+    """
+    return llm(prompt)
+
+def summarize_L30(L20_list):
+    joined = "\n\n".join(L20_list)
+
+    prompt = f"""
+    You are Lyra Intake generating a 'Continuity Report'.
+    Condense these session overviews into one high-level reflection,
+    noting major themes, persistent goals, and shifts.
+
+    {joined}
+
+    Continuity Report:
+    """
+    return llm(prompt)
+
+
 def bg_summarize(session_id: str):
+    """Runs all summary levels on every exchange."""
     try:
         hopper = SESSIONS.get(session_id)
         if not hopper:
             return
 
         buf = list(hopper["buffer"])
-        summary = summarize_simple(buf)
-        push_to_neomem(summary, session_id)
+        if not buf:
+            return
+
+        # Ensure history lists exist
+        L10_HISTORY.setdefault(session_id, [])
+        L20_HISTORY.setdefault(session_id, [])
+
+        # L1, L5 (simple factual)
+        s_L1 = summarize_L1(buf)
+        s_L5 = summarize_L5(buf)
+
+        # L10 (append to history)
+        s_L10 = summarize_L10(buf)
+        L10_HISTORY[session_id].append(s_L10)
+
+        # L20 (merge all L10s)
+        s_L20 = summarize_L20(L10_HISTORY[session_id])
+        L20_HISTORY[session_id].append(s_L20)
+
+        # L30 (merge all L20s)
+        s_L30 = summarize_L30(L20_HISTORY[session_id])
+
+        # Push most important tier(s) to NeoMem
+        push_to_neomem(s_L10, session_id)
+        push_to_neomem(s_L20, session_id)
+        push_to_neomem(s_L30, session_id)
+
+        print(f"🧩 L1/L5/L10/L20/L30 updated for {session_id}")
 
-        print(f"🧩 Summary generated for {session_id}")
     except Exception as e:
-        print(f"Summarizer error: {e}")
+        print(f"💥 Multilevel summarizer error for {session_id}: {e}")
+
 
 # ─────────────────────────────
 # Routes
@@ -155,6 +239,42 @@ def get_summary(session_id: str = Query(...)):
     summary = summarize_simple(list(hopper["buffer"]))
     return {"summary_text": summary, "session_id": session_id}
 
+@app.get("/context")
+def get_context(session_id: str = Query(...)):
+    """Return full multilevel summary context for Cortex."""
+    if session_id not in SESSIONS:
+        return {
+            "session_id": session_id,
+            "exchange_count": 0,
+            "L1": "",
+            "L5": "",
+            "L10": "",
+            "L20": "",
+            "L30": "",
+            "last_updated": None
+        }
+
+    buffer = list(SESSIONS[session_id]["buffer"])
+
+    # Build levels on demand
+    L1 = summarize_L1(buffer)
+    L5 = summarize_L5(buffer)
+    L10 = summarize_L10(buffer)
+    L20 = summarize_L20(L10_HISTORY.get(session_id, []))
+    L30 = summarize_L30(L20_HISTORY.get(session_id, []))
+
+    return {
+        "session_id": session_id,
+        "exchange_count": len(buffer),
+        "L1": L1,
+        "L5": L5,
+        "L10": L10,
+        "L20": L20,
+        "L30": L30,
+        "last_updated": datetime.now().isoformat()
+    }
+
+
 @app.get("/health")
 def health():
     return {"ok": True, "model": SUMMARY_MODEL, "url": SUMMARY_URL}