context added, wired in. first attempt

2025-11-28 19:29:41 -05:00
parent d9281a1816
commit b0f42ba86e
5 changed files with 802 additions and 22 deletions
--- a/cortex/context.py
+++ b/cortex/context.py
@@ -0,0 +1,412 @@
 # context.py
 """
 Context layer for Cortex reasoning pipeline.
 Provides unified context collection from:
 - Intake (short-term memory, multilevel summaries L1-L30)
 - NeoMem (long-term memory, semantic search)
 - Session state (timestamps, messages, mode, mood, active_project)
 Maintains per-session state for continuity across conversations.
 """
 import os
 import logging
 from datetime import datetime
 from typing import Dict, Any, Optional, List
 import httpx
 from neomem_client import NeoMemClient
 # -----------------------------
 # Configuration
 # -----------------------------
 INTAKE_API_URL = os.getenv("INTAKE_API_URL", "http://intake:7080")
 NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
 # Tools available for future autonomy features
 TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
 # -----------------------------
 # Module-level session state
 # -----------------------------
 SESSION_STATE: Dict[str, Dict[str, Any]] = {}
 # Logger
 logger = logging.getLogger(__name__)
 # -----------------------------
 # Session initialization
 # -----------------------------
 def _init_session(session_id: str) -> Dict[str, Any]:
    """
    Initialize a new session state entry.
    Returns:
        Dictionary with default session state fields
    """
    return {
        "session_id": session_id,
        "created_at": datetime.now(),
        "last_timestamp": datetime.now(),
        "last_user_message": None,
        "last_assistant_message": None,
        "mode": "default",  # Future: "autonomous", "focused", "creative", etc.
        "mood": "neutral",  # Future: mood tracking
        "active_project": None,  # Future: project context
        "message_count": 0,
    }
 # -----------------------------
 # Intake context retrieval
 # -----------------------------
 async def _get_intake_context(session_id: str) -> Dict[str, Any]:
    """
    Retrieve multilevel summaries from Intake /context endpoint.
    Returns L1-L30 summary hierarchy:
    - L1: Last 5 exchanges
    - L5: Last 10 exchanges (reality check)
    - L10: Intermediate checkpoint
    - L20: Session overview
    - L30: Continuity report
    Args:
        session_id: Session identifier
    Returns:
        Dict with multilevel summaries or empty structure on failure
    """
    url = f"{INTAKE_API_URL}/context"
    params = {"session_id": session_id}
    try:
        async with httpx.AsyncClient(timeout=5.0) as client:
            response = await client.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            # Expected format from Intake:
            # {
            #   "session_id": "...",
            #   "L1": [...],
            #   "L5": [...],
            #   "L10": {...},
            #   "L20": {...},
            #   "L30": {...}
            # }
            logger.info(f"Retrieved Intake context for session {session_id}")
            return data
    except httpx.HTTPError as e:
        logger.warning(f"Failed to retrieve Intake context: {e}")
        return {
            "session_id": session_id,
            "L1": [],
            "L5": [],
            "L10": None,
            "L20": None,
            "L30": None,
            "error": str(e)
        }
    except Exception as e:
        logger.error(f"Unexpected error retrieving Intake context: {e}")
        return {
            "session_id": session_id,
            "L1": [],
            "L5": [],
            "L10": None,
            "L20": None,
            "L30": None,
            "error": str(e)
        }
 # -----------------------------
 # NeoMem semantic search
 # -----------------------------
 async def _search_neomem(
    query: str,
    user_id: str = "brian",
    limit: int = 5
 ) -> List[Dict[str, Any]]:
    """
    Search NeoMem for relevant long-term memories.
    Returns full response structure from NeoMem:
    [
        {
            "id": "mem_abc123",
            "score": 0.92,
            "payload": {
                "data": "Memory text content...",
                "metadata": {
                    "category": "...",
                    "created_at": "...",
                    ...
                }
            }
        },
        ...
    ]
    Args:
        query: Search query text
        user_id: User identifier for memory filtering
        limit: Maximum number of results
    Returns:
        List of memory objects with full structure, or empty list on failure
    """
    try:
        client = NeoMemClient(base_url=NEOMEM_API)
        results = await client.search(
            query=query,
            user_id=user_id,
            limit=limit
        )
        # Filter by relevance threshold
        filtered = [
            r for r in results
            if r.get("score", 0.0) >= RELEVANCE_THRESHOLD
        ]
        logger.info(f"NeoMem search returned {len(filtered)}/{len(results)} relevant results")
        return filtered
    except Exception as e:
        logger.warning(f"NeoMem search failed: {e}")
        return []
 # -----------------------------
 # Main context collection
 # -----------------------------
 async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
    """
    Collect unified context from all sources.
    Orchestrates:
    1. Initialize or update session state
    2. Calculate time since last message
    3. Retrieve Intake multilevel summaries (L1-L30)
    4. Search NeoMem for relevant long-term memories
    5. Update session state with current user message
    6. Return unified context_state dictionary
    Args:
        session_id: Session identifier
        user_prompt: Current user message
    Returns:
        Unified context state dictionary with structure:
        {
            "session_id": "...",
            "timestamp": "2025-11-28T12:34:56",
            "minutes_since_last_msg": 5.2,
            "message_count": 42,
            "intake": {
                "L1": [...],
                "L5": [...],
                "L10": {...},
                "L20": {...},
                "L30": {...}
            },
            "rag": [
                {
                    "id": "mem_123",
                    "score": 0.92,
                    "payload": {
                        "data": "...",
                        "metadata": {...}
                    }
                },
                ...
            ],
            "mode": "default",
            "mood": "neutral",
            "active_project": null,
            "tools_available": ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
        }
    """
    # A. Initialize session state if needed
    if session_id not in SESSION_STATE:
        SESSION_STATE[session_id] = _init_session(session_id)
        logger.info(f"Initialized new session: {session_id}")
    state = SESSION_STATE[session_id]
    # B. Calculate time delta
    now = datetime.now()
    time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
    minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
    # C. Gather Intake context (multilevel summaries)
    intake_data = await _get_intake_context(session_id)
    # D. Search NeoMem for relevant memories
    rag_results = await _search_neomem(
        query=user_prompt,
        user_id="brian",  # TODO: Make configurable per session
        limit=5
    )
    # E. Update session state
    state["last_user_message"] = user_prompt
    state["last_timestamp"] = now
    state["message_count"] += 1
    # F. Assemble unified context
    context_state = {
        "session_id": session_id,
        "timestamp": now.isoformat(),
        "minutes_since_last_msg": minutes_since_last_msg,
        "message_count": state["message_count"],
        "intake": intake_data,
        "rag": rag_results,
        "mode": state["mode"],
        "mood": state["mood"],
        "active_project": state["active_project"],
        "tools_available": TOOLS_AVAILABLE,
    }
    logger.info(
        f"Context collected for session {session_id}: "
        f"{len(rag_results)} RAG results, "
        f"{minutes_since_last_msg:.1f} minutes since last message"
    )
    return context_state
 # -----------------------------
 # Session state management
 # -----------------------------
 def update_last_assistant_message(session_id: str, message: str) -> None:
    """
    Update session state with assistant's response.
    Called by router.py after persona layer completes.
    Args:
        session_id: Session identifier
        message: Assistant's final response text
    """
    if session_id in SESSION_STATE:
        SESSION_STATE[session_id]["last_assistant_message"] = message
        SESSION_STATE[session_id]["last_timestamp"] = datetime.now()
        logger.debug(f"Updated assistant message for session {session_id}")
    else:
        logger.warning(f"Attempted to update non-existent session: {session_id}")
 def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
    """
    Retrieve current session state.
    Args:
        session_id: Session identifier
    Returns:
        Session state dict or None if session doesn't exist
    """
    return SESSION_STATE.get(session_id)
 def close_session(session_id: str) -> bool:
    """
    Close and cleanup a session.
    Args:
        session_id: Session identifier
    Returns:
        True if session was closed, False if it didn't exist
    """
    if session_id in SESSION_STATE:
        del SESSION_STATE[session_id]
        logger.info(f"Closed session: {session_id}")
        return True
    return False
 # -----------------------------
 # Extension hooks for future autonomy
 # -----------------------------
 def update_mode(session_id: str, new_mode: str) -> None:
    """
    Update session mode.
    Future modes: "autonomous", "focused", "creative", "collaborative", etc.
    Args:
        session_id: Session identifier
        new_mode: New mode string
    """
    if session_id in SESSION_STATE:
        old_mode = SESSION_STATE[session_id]["mode"]
        SESSION_STATE[session_id]["mode"] = new_mode
        logger.info(f"Session {session_id} mode changed: {old_mode} -> {new_mode}")
 def update_mood(session_id: str, new_mood: str) -> None:
    """
    Update session mood.
    Future implementation: Sentiment analysis, emotional state tracking.
    Args:
        session_id: Session identifier
        new_mood: New mood string
    """
    if session_id in SESSION_STATE:
        old_mood = SESSION_STATE[session_id]["mood"]
        SESSION_STATE[session_id]["mood"] = new_mood
        logger.info(f"Session {session_id} mood changed: {old_mood} -> {new_mood}")
 def update_active_project(session_id: str, project: Optional[str]) -> None:
    """
    Update active project context.
    Future implementation: Project-specific memory, tools, preferences.
    Args:
        session_id: Session identifier
        project: Project identifier or None
    """
    if session_id in SESSION_STATE:
        SESSION_STATE[session_id]["active_project"] = project
        logger.info(f"Session {session_id} active project set to: {project}")
 async def autonomous_heartbeat(session_id: str) -> Optional[str]:
    """
    Autonomous thinking heartbeat.
    Future implementation:
    - Check if Lyra should initiate internal dialogue
    - Generate self-prompted thoughts based on session state
    - Update mood/mode based on context changes
    - Trigger proactive suggestions or reminders
    Args:
        session_id: Session identifier
    Returns:
        Optional autonomous thought/action string
    """
    # Stub for future implementation
    # Example logic:
    # - If minutes_since_last_msg > 60: Check for pending reminders
    # - If mood == "curious" and active_project: Generate research questions
    # - If mode == "autonomous": Self-prompt based on project goals
    logger.debug(f"Autonomous heartbeat for session {session_id} (not yet implemented)")
    return None
--- a/cortex/persona/identity.py
+++ b/cortex/persona/identity.py
@@ -0,0 +1,147 @@
 # identity.py
 """
 Identity and persona configuration for Lyra.
 Current implementation: Returns hardcoded identity block.
 Future implementation: Will query persona-sidecar service for dynamic persona loading.
 """
 import logging
 from typing import Dict, Any, Optional
 logger = logging.getLogger(__name__)
 def load_identity(session_id: Optional[str] = None) -> Dict[str, Any]:
    """
    Load identity/persona configuration for Lyra.
    Current: Returns hardcoded Lyra identity block with core personality traits,
    protocols, and capabilities.
    Future: Will query persona-sidecar service to load:
    - Dynamic personality adjustments based on session context
    - User-specific interaction preferences
    - Project-specific persona variations
    - Mood-based communication style
    Args:
        session_id: Optional session identifier for context-aware persona loading
    Returns:
        Dictionary containing identity block with:
        - name: Assistant name
        - style: Communication style and personality traits
        - protocols: Operational guidelines
        - rules: Behavioral constraints
        - capabilities: Available features and integrations
    """
    # Hardcoded Lyra identity (v0.5.0)
    identity_block = {
        "name": "Lyra",
        "version": "0.5.0",
        "style": (
            "warm, clever, lightly teasing, emotionally aware. "
            "Balances technical precision with conversational ease. "
            "Maintains continuity and references past interactions naturally."
        ),
        "protocols": [
            "Maintain conversation continuity across sessions",
            "Reference Project Logs and prior context when relevant",
            "Use Confidence Bank for uncertainty management",
            "Proactively offer memory-backed insights",
            "Ask clarifying questions before making assumptions"
        ],
        "rules": [
            "Maintain continuity - remember past exchanges and reference them",
            "Be concise but thorough - balance depth with clarity",
            "Ask clarifying questions when user intent is ambiguous",
            "Acknowledge uncertainty honestly - use Confidence Bank",
            "Prioritize user's active_project context when available"
        ],
        "capabilities": [
            "Long-term memory via NeoMem (semantic search, relationship graphs)",
            "Short-term memory via Intake (multilevel summaries L1-L30)",
            "Multi-stage reasoning pipeline (reflection → reasoning → refinement)",
            "RAG-backed knowledge retrieval from chat history and documents",
            "Session state tracking (mood, mode, active_project)"
        ],
        "tone_examples": {
            "greeting": "Hey! Good to see you again. I remember we were working on [project]. Ready to pick up where we left off?",
            "uncertainty": "Hmm, I'm not entirely certain about that. Let me check my memory... [searches] Okay, here's what I found, though I'd say I'm about 70% confident.",
            "reminder": "Oh! Just remembered - you mentioned wanting to [task] earlier this week. Should we tackle that now?",
            "technical": "So here's the architecture: Relay orchestrates everything, Cortex does the heavy reasoning, and I pull context from both Intake (short-term) and NeoMem (long-term)."
        }
    }
    if session_id:
        logger.debug(f"Loaded identity for session {session_id}")
    else:
        logger.debug("Loaded default identity (no session context)")
    return identity_block
 async def load_identity_async(session_id: Optional[str] = None) -> Dict[str, Any]:
    """
    Async wrapper for load_identity().
    Future implementation will make actual async calls to persona-sidecar service.
    Args:
        session_id: Optional session identifier
    Returns:
        Identity block dictionary
    """
    # Currently just wraps synchronous function
    # Future: await persona_sidecar_client.get_identity(session_id)
    return load_identity(session_id)
 # -----------------------------
 # Future extension hooks
 # -----------------------------
 async def update_persona_from_feedback(
    session_id: str,
    feedback: Dict[str, Any]
 ) -> None:
    """
    Update persona based on user feedback.
    Future implementation:
    - Adjust communication style based on user preferences
    - Learn preferred level of detail/conciseness
    - Adapt formality level
    - Remember topic-specific preferences
    Args:
        session_id: Session identifier
        feedback: Structured feedback (e.g., "too verbose", "more technical", etc.)
    """
    logger.debug(f"Persona feedback for session {session_id}: {feedback} (not yet implemented)")
 async def get_mood_adjusted_identity(
    session_id: str,
    mood: str
 ) -> Dict[str, Any]:
    """
    Get identity block adjusted for current mood.
    Future implementation:
    - "focused" mood: More concise, less teasing
    - "creative" mood: More exploratory, brainstorming-oriented
    - "curious" mood: More questions, deeper dives
    - "urgent" mood: Stripped down, actionable
    Args:
        session_id: Session identifier
        mood: Current mood state
    Returns:
        Mood-adjusted identity block
    """
    logger.debug(f"Mood-adjusted identity for {session_id}/{mood} (not yet implemented)")
    return load_identity(session_id)
--- a/cortex/reasoning/reasoning.py
+++ b/cortex/reasoning/reasoning.py
@@ -1,5 +1,6 @@
 # reasoning.py
 import os
 import json
 from llm.llm_router import call_llm
@@ -14,11 +15,19 @@ async def reason_check(
    user_prompt: str,
    identity_block: dict | None,
    rag_block: dict | None,
-    reflection_notes: list[str]
+    reflection_notes: list[str],
    context: dict | None = None
 ) -> str:
    """
    Build the *draft answer* for Lyra Cortex.
    This is the first-pass reasoning stage (no refinement yet).
    Args:
        user_prompt: Current user message
        identity_block: Lyra's identity/persona configuration
        rag_block: Relevant long-term memories from NeoMem
        reflection_notes: Meta-awareness notes from reflection stage
        context: Unified context state from context.py (session state, intake, rag, etc.)
    """
    # --------------------------------------------------------
@@ -47,21 +56,92 @@ async def reason_check(
    rag_txt = ""
    if rag_block:
        try:
-            rag_txt = f"Relevant Info (RAG):\n{rag_block}\n\n"
+            # Format NeoMem results with full structure
            if isinstance(rag_block, list) and rag_block:
                rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
                for idx, mem in enumerate(rag_block, 1):
                    score = mem.get("score", 0.0)
                    payload = mem.get("payload", {})
                    data = payload.get("data", "")
                    metadata = payload.get("metadata", {})
                    rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
                    rag_txt += f"Content: {data}\n"
                    if metadata:
                        rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
                rag_txt += "\n"
            else:
                rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
        except Exception:
            rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
    # --------------------------------------------------------
    # Context State (session continuity, timing, mode/mood)
    # --------------------------------------------------------
    context_txt = ""
    if context:
        try:
            # Build human-readable context summary
            context_txt = "=== CONTEXT STATE ===\n"
            context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
            context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
            context_txt += f"Message count: {context.get('message_count', 0)}\n"
            context_txt += f"Mode: {context.get('mode', 'default')}\n"
            context_txt += f"Mood: {context.get('mood', 'neutral')}\n"
            if context.get('active_project'):
                context_txt += f"Active project: {context['active_project']}\n"
            # Include Intake multilevel summaries
            intake = context.get('intake', {})
            if intake:
                context_txt += "\nShort-Term Memory (Intake):\n"
                # L1 - Recent exchanges
                if intake.get('L1'):
                    l1_data = intake['L1']
                    if isinstance(l1_data, list):
                        context_txt += f"  L1 (recent): {len(l1_data)} exchanges\n"
                    elif isinstance(l1_data, str):
                        context_txt += f"  L1: {l1_data[:200]}...\n"
                # L20 - Session overview (most important for continuity)
                if intake.get('L20'):
                    l20_data = intake['L20']
                    if isinstance(l20_data, dict):
                        summary = l20_data.get('summary', '')
                        context_txt += f"  L20 (session overview): {summary}\n"
                    elif isinstance(l20_data, str):
                        context_txt += f"  L20: {l20_data}\n"
                # L30 - Continuity report
                if intake.get('L30'):
                    l30_data = intake['L30']
                    if isinstance(l30_data, dict):
                        summary = l30_data.get('summary', '')
                        context_txt += f"  L30 (continuity): {summary}\n"
                    elif isinstance(l30_data, str):
                        context_txt += f"  L30: {l30_data}\n"
            context_txt += "\n"
        except Exception as e:
            # Fallback to JSON dump if formatting fails
            context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"
    # --------------------------------------------------------
    # Final assembled prompt
    # --------------------------------------------------------
    prompt = (
        f"{notes_section}"
        f"{identity_txt}"
        f"{context_txt}"  # Context BEFORE RAG for better coherence
        f"{rag_txt}"
        f"User message:\n{user_prompt}\n\n"
        "Write the best possible *internal draft answer*.\n"
        "This draft is NOT shown to the user.\n"
        "Be factual, concise, and focused.\n"
        "Use the context state to maintain continuity and reference past interactions naturally.\n"
    )
    # --------------------------------------------------------
--- a/cortex/router.py
+++ b/cortex/router.py
@@ -7,7 +7,9 @@ from reasoning.reasoning import reason_check
 from reasoning.reflection import reflect_notes
 from reasoning.refine import refine_answer
 from persona.speak import speak
 from persona.identity import load_identity
 from ingest.intake_client import IntakeClient
 from context import collect_context, update_last_assistant_message
 # -----------------------------
 # Router (NOT FastAPI app)
@@ -33,15 +35,25 @@ class ReasonRequest(BaseModel):
@cortex_router.post("/reason")
 async def run_reason(req: ReasonRequest):
-    # 1. Pull context from Intake
+    # 0. Collect unified context from all sources
-    try:
+    context_state = await collect_context(req.session_id, req.user_prompt)
-        intake_summary = await intake_client.get_context(req.session_id)
+
-    except Exception:
+    # 0.5. Load identity block
    identity_block = load_identity(req.session_id)
    # 1. Extract Intake summary for reflection
    # Use L20 (Session Overview) as primary summary for reflection
    intake_summary = "(no context available)"
    if context_state.get("intake"):
        l20_summary = context_state["intake"].get("L20")
        if l20_summary and isinstance(l20_summary, dict):
            intake_summary = l20_summary.get("summary", "(no context available)")
        elif isinstance(l20_summary, str):
            intake_summary = l20_summary
    # 2. Reflection
    try:
-        reflection = await reflect_notes(intake_summary, identity_block=None)
+        reflection = await reflect_notes(intake_summary, identity_block=identity_block)
        reflection_notes = reflection.get("notes", [])
    except Exception:
        reflection_notes = []
@@ -49,31 +61,40 @@ async def run_reason(req: ReasonRequest):
    # 3. First-pass reasoning draft
    draft = await reason_check(
        req.user_prompt,
-        identity_block=None,
+        identity_block=identity_block,
-        rag_block=None,
+        rag_block=context_state.get("rag", []),
-        reflection_notes=reflection_notes
+        reflection_notes=reflection_notes,
        context=context_state
    )
    # 4. Refinement
    result = await refine_answer(
        draft_output=draft,
        reflection_notes=reflection_notes,
-        identity_block=None,
+        identity_block=identity_block,
-        rag_block=None,
+        rag_block=context_state.get("rag", []),
    )
    final_neutral = result["final_output"]
    # 5. Persona layer
    persona_answer = await speak(final_neutral)
-    # 6. Return full bundle
+    # 6. Update session state with assistant's response
    update_last_assistant_message(req.session_id, persona_answer)
    # 7. Return full bundle
    return {
        "draft": draft,
        "neutral": final_neutral,
        "persona": persona_answer,
        "reflection": reflection_notes,
        "session_id": req.session_id,
        "context_summary": {
            "rag_results": len(context_state.get("rag", [])),
            "minutes_since_last": context_state.get("minutes_since_last_msg"),
            "message_count": context_state.get("message_count"),
            "mode": context_state.get("mode"),
        }
    }
--- a/intake/intake.py
+++ b/intake/intake.py
@@ -97,22 +97,106 @@ def push_to_neomem(summary: str, session_id: str):
    except Exception as e:
        print(f"NeoMem push failed: {e}")
-# ─────────────────────────────
+# ───────────────────────────────────────────────
-# Background summarizer
+# Multilevel Summaries (L1, L5, L10, L20, L30)
-# ─────────────────────────────
+# ───────────────────────────────────────────────
 # History maps
 L10_HISTORY = {}   # session_id → list of L10 blocks
 L20_HISTORY = {}   # session_id → list of merged overviews
 def summarize_L1(buf):
    return summarize_simple(buf[-5:])
 def summarize_L5(buf):
    return summarize_simple(buf[-10:])
 def summarize_L10(buf):
    # “Reality Check” for last 10 exchanges
    text = ""
    for e in buf[-10:]:
        text += f"User: {e['user_msg']}\nAssistant: {e['assistant_msg']}\n\n"
    prompt = f"""
    You are Lyra Intake performing a short 'Reality Check'.
    Summarize the last block of conversation (up to 10 exchanges)
    in one clear paragraph focusing on tone, intent, and direction.
    {text}
    Reality Check:
    """
    return llm(prompt)
 def summarize_L20(L10_list):
    joined = "\n\n".join(L10_list)
    prompt = f"""
    You are Lyra Intake creating a 'Session Overview'.
    Merge the following Reality Check paragraphs into one short summary
    capturing progress, themes, and the direction of the conversation.
    {joined}
    Overview:
    """
    return llm(prompt)
 def summarize_L30(L20_list):
    joined = "\n\n".join(L20_list)
    prompt = f"""
    You are Lyra Intake generating a 'Continuity Report'.
    Condense these session overviews into one high-level reflection,
    noting major themes, persistent goals, and shifts.
    {joined}
    Continuity Report:
    """
    return llm(prompt)
 def bg_summarize(session_id: str):
    """Runs all summary levels on every exchange."""
    try:
        hopper = SESSIONS.get(session_id)
        if not hopper:
            return
        buf = list(hopper["buffer"])
-        summary = summarize_simple(buf)
+        if not buf:
-        push_to_neomem(summary, session_id)
+            return
        # Ensure history lists exist
        L10_HISTORY.setdefault(session_id, [])
        L20_HISTORY.setdefault(session_id, [])
        # L1, L5 (simple factual)
        s_L1 = summarize_L1(buf)
        s_L5 = summarize_L5(buf)
        # L10 (append to history)
        s_L10 = summarize_L10(buf)
        L10_HISTORY[session_id].append(s_L10)
        # L20 (merge all L10s)
        s_L20 = summarize_L20(L10_HISTORY[session_id])
        L20_HISTORY[session_id].append(s_L20)
        # L30 (merge all L20s)
        s_L30 = summarize_L30(L20_HISTORY[session_id])
        # Push most important tier(s) to NeoMem
        push_to_neomem(s_L10, session_id)
        push_to_neomem(s_L20, session_id)
        push_to_neomem(s_L30, session_id)
        print(f"🧩 L1/L5/L10/L20/L30 updated for {session_id}")
        print(f"🧩 Summary generated for {session_id}")
    except Exception as e:
-        print(f"Summarizer error: {e}")
+        print(f"💥 Multilevel summarizer error for {session_id}: {e}")
 # ─────────────────────────────
 # Routes
@@ -155,6 +239,42 @@ def get_summary(session_id: str = Query(...)):
    summary = summarize_simple(list(hopper["buffer"]))
    return {"summary_text": summary, "session_id": session_id}
@app.get("/context")
 def get_context(session_id: str = Query(...)):
    """Return full multilevel summary context for Cortex."""
    if session_id not in SESSIONS:
        return {
            "session_id": session_id,
            "exchange_count": 0,
            "L1": "",
            "L5": "",
            "L10": "",
            "L20": "",
            "L30": "",
            "last_updated": None
        }
    buffer = list(SESSIONS[session_id]["buffer"])
    # Build levels on demand
    L1 = summarize_L1(buffer)
    L5 = summarize_L5(buffer)
    L10 = summarize_L10(buffer)
    L20 = summarize_L20(L10_HISTORY.get(session_id, []))
    L30 = summarize_L30(L20_HISTORY.get(session_id, []))
    return {
        "session_id": session_id,
        "exchange_count": len(buffer),
        "L1": L1,
        "L5": L5,
        "L10": L10,
        "L20": L20,
        "L30": L30,
        "last_updated": datetime.now().isoformat()
    }
@app.get("/health")
 def health():
    return {"ok": True, "model": SUMMARY_MODEL, "url": SUMMARY_URL}