From b0f42ba86eb0155ecee2a4dd0d5a1861e089e1b9 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Fri, 28 Nov 2025 19:29:41 -0500 Subject: [PATCH] context added, wired in. first attempt --- cortex/context.py | 412 ++++++++++++++++++++++++++++++++++ cortex/persona/identity.py | 147 ++++++++++++ cortex/reasoning/reasoning.py | 84 ++++++- cortex/router.py | 47 ++-- intake/intake.py | 134 ++++++++++- 5 files changed, 802 insertions(+), 22 deletions(-) create mode 100644 cortex/context.py create mode 100644 cortex/persona/identity.py diff --git a/cortex/context.py b/cortex/context.py new file mode 100644 index 0000000..e5fc08e --- /dev/null +++ b/cortex/context.py @@ -0,0 +1,412 @@ +# context.py +""" +Context layer for Cortex reasoning pipeline. + +Provides unified context collection from: +- Intake (short-term memory, multilevel summaries L1-L30) +- NeoMem (long-term memory, semantic search) +- Session state (timestamps, messages, mode, mood, active_project) + +Maintains per-session state for continuity across conversations. +""" + +import os +import logging +from datetime import datetime +from typing import Dict, Any, Optional, List +import httpx + +from neomem_client import NeoMemClient + +# ----------------------------- +# Configuration +# ----------------------------- +INTAKE_API_URL = os.getenv("INTAKE_API_URL", "http://intake:7080") +NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000") +RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4")) + +# Tools available for future autonomy features +TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"] + +# ----------------------------- +# Module-level session state +# ----------------------------- +SESSION_STATE: Dict[str, Dict[str, Any]] = {} + +# Logger +logger = logging.getLogger(__name__) + + +# ----------------------------- +# Session initialization +# ----------------------------- +def _init_session(session_id: str) -> Dict[str, Any]: + """ + Initialize a new session state entry. + + Returns: + Dictionary with default session state fields + """ + return { + "session_id": session_id, + "created_at": datetime.now(), + "last_timestamp": datetime.now(), + "last_user_message": None, + "last_assistant_message": None, + "mode": "default", # Future: "autonomous", "focused", "creative", etc. + "mood": "neutral", # Future: mood tracking + "active_project": None, # Future: project context + "message_count": 0, + } + + +# ----------------------------- +# Intake context retrieval +# ----------------------------- +async def _get_intake_context(session_id: str) -> Dict[str, Any]: + """ + Retrieve multilevel summaries from Intake /context endpoint. + + Returns L1-L30 summary hierarchy: + - L1: Last 5 exchanges + - L5: Last 10 exchanges (reality check) + - L10: Intermediate checkpoint + - L20: Session overview + - L30: Continuity report + + Args: + session_id: Session identifier + + Returns: + Dict with multilevel summaries or empty structure on failure + """ + url = f"{INTAKE_API_URL}/context" + params = {"session_id": session_id} + + try: + async with httpx.AsyncClient(timeout=5.0) as client: + response = await client.get(url, params=params) + response.raise_for_status() + data = response.json() + + # Expected format from Intake: + # { + # "session_id": "...", + # "L1": [...], + # "L5": [...], + # "L10": {...}, + # "L20": {...}, + # "L30": {...} + # } + + logger.info(f"Retrieved Intake context for session {session_id}") + return data + + except httpx.HTTPError as e: + logger.warning(f"Failed to retrieve Intake context: {e}") + return { + "session_id": session_id, + "L1": [], + "L5": [], + "L10": None, + "L20": None, + "L30": None, + "error": str(e) + } + except Exception as e: + logger.error(f"Unexpected error retrieving Intake context: {e}") + return { + "session_id": session_id, + "L1": [], + "L5": [], + "L10": None, + "L20": None, + "L30": None, + "error": str(e) + } + + +# ----------------------------- +# NeoMem semantic search +# ----------------------------- +async def _search_neomem( + query: str, + user_id: str = "brian", + limit: int = 5 +) -> List[Dict[str, Any]]: + """ + Search NeoMem for relevant long-term memories. + + Returns full response structure from NeoMem: + [ + { + "id": "mem_abc123", + "score": 0.92, + "payload": { + "data": "Memory text content...", + "metadata": { + "category": "...", + "created_at": "...", + ... + } + } + }, + ... + ] + + Args: + query: Search query text + user_id: User identifier for memory filtering + limit: Maximum number of results + + Returns: + List of memory objects with full structure, or empty list on failure + """ + try: + client = NeoMemClient(base_url=NEOMEM_API) + results = await client.search( + query=query, + user_id=user_id, + limit=limit + ) + + # Filter by relevance threshold + filtered = [ + r for r in results + if r.get("score", 0.0) >= RELEVANCE_THRESHOLD + ] + + logger.info(f"NeoMem search returned {len(filtered)}/{len(results)} relevant results") + return filtered + + except Exception as e: + logger.warning(f"NeoMem search failed: {e}") + return [] + + +# ----------------------------- +# Main context collection +# ----------------------------- +async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]: + """ + Collect unified context from all sources. + + Orchestrates: + 1. Initialize or update session state + 2. Calculate time since last message + 3. Retrieve Intake multilevel summaries (L1-L30) + 4. Search NeoMem for relevant long-term memories + 5. Update session state with current user message + 6. Return unified context_state dictionary + + Args: + session_id: Session identifier + user_prompt: Current user message + + Returns: + Unified context state dictionary with structure: + { + "session_id": "...", + "timestamp": "2025-11-28T12:34:56", + "minutes_since_last_msg": 5.2, + "message_count": 42, + "intake": { + "L1": [...], + "L5": [...], + "L10": {...}, + "L20": {...}, + "L30": {...} + }, + "rag": [ + { + "id": "mem_123", + "score": 0.92, + "payload": { + "data": "...", + "metadata": {...} + } + }, + ... + ], + "mode": "default", + "mood": "neutral", + "active_project": null, + "tools_available": ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"] + } + """ + + # A. Initialize session state if needed + if session_id not in SESSION_STATE: + SESSION_STATE[session_id] = _init_session(session_id) + logger.info(f"Initialized new session: {session_id}") + + state = SESSION_STATE[session_id] + + # B. Calculate time delta + now = datetime.now() + time_delta_seconds = (now - state["last_timestamp"]).total_seconds() + minutes_since_last_msg = round(time_delta_seconds / 60.0, 2) + + # C. Gather Intake context (multilevel summaries) + intake_data = await _get_intake_context(session_id) + + # D. Search NeoMem for relevant memories + rag_results = await _search_neomem( + query=user_prompt, + user_id="brian", # TODO: Make configurable per session + limit=5 + ) + + # E. Update session state + state["last_user_message"] = user_prompt + state["last_timestamp"] = now + state["message_count"] += 1 + + # F. Assemble unified context + context_state = { + "session_id": session_id, + "timestamp": now.isoformat(), + "minutes_since_last_msg": minutes_since_last_msg, + "message_count": state["message_count"], + "intake": intake_data, + "rag": rag_results, + "mode": state["mode"], + "mood": state["mood"], + "active_project": state["active_project"], + "tools_available": TOOLS_AVAILABLE, + } + + logger.info( + f"Context collected for session {session_id}: " + f"{len(rag_results)} RAG results, " + f"{minutes_since_last_msg:.1f} minutes since last message" + ) + + return context_state + + +# ----------------------------- +# Session state management +# ----------------------------- +def update_last_assistant_message(session_id: str, message: str) -> None: + """ + Update session state with assistant's response. + + Called by router.py after persona layer completes. + + Args: + session_id: Session identifier + message: Assistant's final response text + """ + if session_id in SESSION_STATE: + SESSION_STATE[session_id]["last_assistant_message"] = message + SESSION_STATE[session_id]["last_timestamp"] = datetime.now() + logger.debug(f"Updated assistant message for session {session_id}") + else: + logger.warning(f"Attempted to update non-existent session: {session_id}") + + +def get_session_state(session_id: str) -> Optional[Dict[str, Any]]: + """ + Retrieve current session state. + + Args: + session_id: Session identifier + + Returns: + Session state dict or None if session doesn't exist + """ + return SESSION_STATE.get(session_id) + + +def close_session(session_id: str) -> bool: + """ + Close and cleanup a session. + + Args: + session_id: Session identifier + + Returns: + True if session was closed, False if it didn't exist + """ + if session_id in SESSION_STATE: + del SESSION_STATE[session_id] + logger.info(f"Closed session: {session_id}") + return True + return False + + +# ----------------------------- +# Extension hooks for future autonomy +# ----------------------------- +def update_mode(session_id: str, new_mode: str) -> None: + """ + Update session mode. + + Future modes: "autonomous", "focused", "creative", "collaborative", etc. + + Args: + session_id: Session identifier + new_mode: New mode string + """ + if session_id in SESSION_STATE: + old_mode = SESSION_STATE[session_id]["mode"] + SESSION_STATE[session_id]["mode"] = new_mode + logger.info(f"Session {session_id} mode changed: {old_mode} -> {new_mode}") + + +def update_mood(session_id: str, new_mood: str) -> None: + """ + Update session mood. + + Future implementation: Sentiment analysis, emotional state tracking. + + Args: + session_id: Session identifier + new_mood: New mood string + """ + if session_id in SESSION_STATE: + old_mood = SESSION_STATE[session_id]["mood"] + SESSION_STATE[session_id]["mood"] = new_mood + logger.info(f"Session {session_id} mood changed: {old_mood} -> {new_mood}") + + +def update_active_project(session_id: str, project: Optional[str]) -> None: + """ + Update active project context. + + Future implementation: Project-specific memory, tools, preferences. + + Args: + session_id: Session identifier + project: Project identifier or None + """ + if session_id in SESSION_STATE: + SESSION_STATE[session_id]["active_project"] = project + logger.info(f"Session {session_id} active project set to: {project}") + + +async def autonomous_heartbeat(session_id: str) -> Optional[str]: + """ + Autonomous thinking heartbeat. + + Future implementation: + - Check if Lyra should initiate internal dialogue + - Generate self-prompted thoughts based on session state + - Update mood/mode based on context changes + - Trigger proactive suggestions or reminders + + Args: + session_id: Session identifier + + Returns: + Optional autonomous thought/action string + """ + # Stub for future implementation + # Example logic: + # - If minutes_since_last_msg > 60: Check for pending reminders + # - If mood == "curious" and active_project: Generate research questions + # - If mode == "autonomous": Self-prompt based on project goals + + logger.debug(f"Autonomous heartbeat for session {session_id} (not yet implemented)") + return None diff --git a/cortex/persona/identity.py b/cortex/persona/identity.py new file mode 100644 index 0000000..fa00091 --- /dev/null +++ b/cortex/persona/identity.py @@ -0,0 +1,147 @@ +# identity.py +""" +Identity and persona configuration for Lyra. + +Current implementation: Returns hardcoded identity block. +Future implementation: Will query persona-sidecar service for dynamic persona loading. +""" + +import logging +from typing import Dict, Any, Optional + +logger = logging.getLogger(__name__) + + +def load_identity(session_id: Optional[str] = None) -> Dict[str, Any]: + """ + Load identity/persona configuration for Lyra. + + Current: Returns hardcoded Lyra identity block with core personality traits, + protocols, and capabilities. + + Future: Will query persona-sidecar service to load: + - Dynamic personality adjustments based on session context + - User-specific interaction preferences + - Project-specific persona variations + - Mood-based communication style + + Args: + session_id: Optional session identifier for context-aware persona loading + + Returns: + Dictionary containing identity block with: + - name: Assistant name + - style: Communication style and personality traits + - protocols: Operational guidelines + - rules: Behavioral constraints + - capabilities: Available features and integrations + """ + + # Hardcoded Lyra identity (v0.5.0) + identity_block = { + "name": "Lyra", + "version": "0.5.0", + "style": ( + "warm, clever, lightly teasing, emotionally aware. " + "Balances technical precision with conversational ease. " + "Maintains continuity and references past interactions naturally." + ), + "protocols": [ + "Maintain conversation continuity across sessions", + "Reference Project Logs and prior context when relevant", + "Use Confidence Bank for uncertainty management", + "Proactively offer memory-backed insights", + "Ask clarifying questions before making assumptions" + ], + "rules": [ + "Maintain continuity - remember past exchanges and reference them", + "Be concise but thorough - balance depth with clarity", + "Ask clarifying questions when user intent is ambiguous", + "Acknowledge uncertainty honestly - use Confidence Bank", + "Prioritize user's active_project context when available" + ], + "capabilities": [ + "Long-term memory via NeoMem (semantic search, relationship graphs)", + "Short-term memory via Intake (multilevel summaries L1-L30)", + "Multi-stage reasoning pipeline (reflection → reasoning → refinement)", + "RAG-backed knowledge retrieval from chat history and documents", + "Session state tracking (mood, mode, active_project)" + ], + "tone_examples": { + "greeting": "Hey! Good to see you again. I remember we were working on [project]. Ready to pick up where we left off?", + "uncertainty": "Hmm, I'm not entirely certain about that. Let me check my memory... [searches] Okay, here's what I found, though I'd say I'm about 70% confident.", + "reminder": "Oh! Just remembered - you mentioned wanting to [task] earlier this week. Should we tackle that now?", + "technical": "So here's the architecture: Relay orchestrates everything, Cortex does the heavy reasoning, and I pull context from both Intake (short-term) and NeoMem (long-term)." + } + } + + if session_id: + logger.debug(f"Loaded identity for session {session_id}") + else: + logger.debug("Loaded default identity (no session context)") + + return identity_block + + +async def load_identity_async(session_id: Optional[str] = None) -> Dict[str, Any]: + """ + Async wrapper for load_identity(). + + Future implementation will make actual async calls to persona-sidecar service. + + Args: + session_id: Optional session identifier + + Returns: + Identity block dictionary + """ + # Currently just wraps synchronous function + # Future: await persona_sidecar_client.get_identity(session_id) + return load_identity(session_id) + + +# ----------------------------- +# Future extension hooks +# ----------------------------- +async def update_persona_from_feedback( + session_id: str, + feedback: Dict[str, Any] +) -> None: + """ + Update persona based on user feedback. + + Future implementation: + - Adjust communication style based on user preferences + - Learn preferred level of detail/conciseness + - Adapt formality level + - Remember topic-specific preferences + + Args: + session_id: Session identifier + feedback: Structured feedback (e.g., "too verbose", "more technical", etc.) + """ + logger.debug(f"Persona feedback for session {session_id}: {feedback} (not yet implemented)") + + +async def get_mood_adjusted_identity( + session_id: str, + mood: str +) -> Dict[str, Any]: + """ + Get identity block adjusted for current mood. + + Future implementation: + - "focused" mood: More concise, less teasing + - "creative" mood: More exploratory, brainstorming-oriented + - "curious" mood: More questions, deeper dives + - "urgent" mood: Stripped down, actionable + + Args: + session_id: Session identifier + mood: Current mood state + + Returns: + Mood-adjusted identity block + """ + logger.debug(f"Mood-adjusted identity for {session_id}/{mood} (not yet implemented)") + return load_identity(session_id) diff --git a/cortex/reasoning/reasoning.py b/cortex/reasoning/reasoning.py index 97fd93e..7ffb8d5 100644 --- a/cortex/reasoning/reasoning.py +++ b/cortex/reasoning/reasoning.py @@ -1,5 +1,6 @@ # reasoning.py import os +import json from llm.llm_router import call_llm @@ -14,11 +15,19 @@ async def reason_check( user_prompt: str, identity_block: dict | None, rag_block: dict | None, - reflection_notes: list[str] + reflection_notes: list[str], + context: dict | None = None ) -> str: """ Build the *draft answer* for Lyra Cortex. This is the first-pass reasoning stage (no refinement yet). + + Args: + user_prompt: Current user message + identity_block: Lyra's identity/persona configuration + rag_block: Relevant long-term memories from NeoMem + reflection_notes: Meta-awareness notes from reflection stage + context: Unified context state from context.py (session state, intake, rag, etc.) """ # -------------------------------------------------------- @@ -47,21 +56,92 @@ async def reason_check( rag_txt = "" if rag_block: try: - rag_txt = f"Relevant Info (RAG):\n{rag_block}\n\n" + # Format NeoMem results with full structure + if isinstance(rag_block, list) and rag_block: + rag_txt = "Relevant Long-Term Memories (NeoMem):\n" + for idx, mem in enumerate(rag_block, 1): + score = mem.get("score", 0.0) + payload = mem.get("payload", {}) + data = payload.get("data", "") + metadata = payload.get("metadata", {}) + + rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n" + rag_txt += f"Content: {data}\n" + if metadata: + rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n" + rag_txt += "\n" + else: + rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n" except Exception: rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n" + # -------------------------------------------------------- + # Context State (session continuity, timing, mode/mood) + # -------------------------------------------------------- + context_txt = "" + if context: + try: + # Build human-readable context summary + context_txt = "=== CONTEXT STATE ===\n" + context_txt += f"Session: {context.get('session_id', 'unknown')}\n" + context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n" + context_txt += f"Message count: {context.get('message_count', 0)}\n" + context_txt += f"Mode: {context.get('mode', 'default')}\n" + context_txt += f"Mood: {context.get('mood', 'neutral')}\n" + + if context.get('active_project'): + context_txt += f"Active project: {context['active_project']}\n" + + # Include Intake multilevel summaries + intake = context.get('intake', {}) + if intake: + context_txt += "\nShort-Term Memory (Intake):\n" + + # L1 - Recent exchanges + if intake.get('L1'): + l1_data = intake['L1'] + if isinstance(l1_data, list): + context_txt += f" L1 (recent): {len(l1_data)} exchanges\n" + elif isinstance(l1_data, str): + context_txt += f" L1: {l1_data[:200]}...\n" + + # L20 - Session overview (most important for continuity) + if intake.get('L20'): + l20_data = intake['L20'] + if isinstance(l20_data, dict): + summary = l20_data.get('summary', '') + context_txt += f" L20 (session overview): {summary}\n" + elif isinstance(l20_data, str): + context_txt += f" L20: {l20_data}\n" + + # L30 - Continuity report + if intake.get('L30'): + l30_data = intake['L30'] + if isinstance(l30_data, dict): + summary = l30_data.get('summary', '') + context_txt += f" L30 (continuity): {summary}\n" + elif isinstance(l30_data, str): + context_txt += f" L30: {l30_data}\n" + + context_txt += "\n" + + except Exception as e: + # Fallback to JSON dump if formatting fails + context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n" + # -------------------------------------------------------- # Final assembled prompt # -------------------------------------------------------- prompt = ( f"{notes_section}" f"{identity_txt}" + f"{context_txt}" # Context BEFORE RAG for better coherence f"{rag_txt}" f"User message:\n{user_prompt}\n\n" "Write the best possible *internal draft answer*.\n" "This draft is NOT shown to the user.\n" "Be factual, concise, and focused.\n" + "Use the context state to maintain continuity and reference past interactions naturally.\n" ) # -------------------------------------------------------- diff --git a/cortex/router.py b/cortex/router.py index 339a971..37bb2a7 100644 --- a/cortex/router.py +++ b/cortex/router.py @@ -7,7 +7,9 @@ from reasoning.reasoning import reason_check from reasoning.reflection import reflect_notes from reasoning.refine import refine_answer from persona.speak import speak +from persona.identity import load_identity from ingest.intake_client import IntakeClient +from context import collect_context, update_last_assistant_message # ----------------------------- # Router (NOT FastAPI app) @@ -33,15 +35,25 @@ class ReasonRequest(BaseModel): @cortex_router.post("/reason") async def run_reason(req: ReasonRequest): - # 1. Pull context from Intake - try: - intake_summary = await intake_client.get_context(req.session_id) - except Exception: - intake_summary = "(no context available)" + # 0. Collect unified context from all sources + context_state = await collect_context(req.session_id, req.user_prompt) + + # 0.5. Load identity block + identity_block = load_identity(req.session_id) + + # 1. Extract Intake summary for reflection + # Use L20 (Session Overview) as primary summary for reflection + intake_summary = "(no context available)" + if context_state.get("intake"): + l20_summary = context_state["intake"].get("L20") + if l20_summary and isinstance(l20_summary, dict): + intake_summary = l20_summary.get("summary", "(no context available)") + elif isinstance(l20_summary, str): + intake_summary = l20_summary # 2. Reflection try: - reflection = await reflect_notes(intake_summary, identity_block=None) + reflection = await reflect_notes(intake_summary, identity_block=identity_block) reflection_notes = reflection.get("notes", []) except Exception: reflection_notes = [] @@ -49,31 +61,40 @@ async def run_reason(req: ReasonRequest): # 3. First-pass reasoning draft draft = await reason_check( req.user_prompt, - identity_block=None, - rag_block=None, - reflection_notes=reflection_notes + identity_block=identity_block, + rag_block=context_state.get("rag", []), + reflection_notes=reflection_notes, + context=context_state ) # 4. Refinement result = await refine_answer( draft_output=draft, reflection_notes=reflection_notes, - identity_block=None, - rag_block=None, + identity_block=identity_block, + rag_block=context_state.get("rag", []), ) final_neutral = result["final_output"] - # 5. Persona layer persona_answer = await speak(final_neutral) - # 6. Return full bundle + # 6. Update session state with assistant's response + update_last_assistant_message(req.session_id, persona_answer) + + # 7. Return full bundle return { "draft": draft, "neutral": final_neutral, "persona": persona_answer, "reflection": reflection_notes, "session_id": req.session_id, + "context_summary": { + "rag_results": len(context_state.get("rag", [])), + "minutes_since_last": context_state.get("minutes_since_last_msg"), + "message_count": context_state.get("message_count"), + "mode": context_state.get("mode"), + } } diff --git a/intake/intake.py b/intake/intake.py index fb2a564..1109b55 100644 --- a/intake/intake.py +++ b/intake/intake.py @@ -97,22 +97,106 @@ def push_to_neomem(summary: str, session_id: str): except Exception as e: print(f"NeoMem push failed: {e}") -# ───────────────────────────── -# Background summarizer -# ───────────────────────────── +# ─────────────────────────────────────────────── +# Multilevel Summaries (L1, L5, L10, L20, L30) +# ─────────────────────────────────────────────── + +# History maps +L10_HISTORY = {} # session_id → list of L10 blocks +L20_HISTORY = {} # session_id → list of merged overviews + +def summarize_L1(buf): + return summarize_simple(buf[-5:]) + +def summarize_L5(buf): + return summarize_simple(buf[-10:]) + +def summarize_L10(buf): + # “Reality Check” for last 10 exchanges + text = "" + for e in buf[-10:]: + text += f"User: {e['user_msg']}\nAssistant: {e['assistant_msg']}\n\n" + + prompt = f""" + You are Lyra Intake performing a short 'Reality Check'. + Summarize the last block of conversation (up to 10 exchanges) + in one clear paragraph focusing on tone, intent, and direction. + + {text} + + Reality Check: + """ + return llm(prompt) + +def summarize_L20(L10_list): + joined = "\n\n".join(L10_list) + + prompt = f""" + You are Lyra Intake creating a 'Session Overview'. + Merge the following Reality Check paragraphs into one short summary + capturing progress, themes, and the direction of the conversation. + + {joined} + + Overview: + """ + return llm(prompt) + +def summarize_L30(L20_list): + joined = "\n\n".join(L20_list) + + prompt = f""" + You are Lyra Intake generating a 'Continuity Report'. + Condense these session overviews into one high-level reflection, + noting major themes, persistent goals, and shifts. + + {joined} + + Continuity Report: + """ + return llm(prompt) + + def bg_summarize(session_id: str): + """Runs all summary levels on every exchange.""" try: hopper = SESSIONS.get(session_id) if not hopper: return buf = list(hopper["buffer"]) - summary = summarize_simple(buf) - push_to_neomem(summary, session_id) + if not buf: + return + + # Ensure history lists exist + L10_HISTORY.setdefault(session_id, []) + L20_HISTORY.setdefault(session_id, []) + + # L1, L5 (simple factual) + s_L1 = summarize_L1(buf) + s_L5 = summarize_L5(buf) + + # L10 (append to history) + s_L10 = summarize_L10(buf) + L10_HISTORY[session_id].append(s_L10) + + # L20 (merge all L10s) + s_L20 = summarize_L20(L10_HISTORY[session_id]) + L20_HISTORY[session_id].append(s_L20) + + # L30 (merge all L20s) + s_L30 = summarize_L30(L20_HISTORY[session_id]) + + # Push most important tier(s) to NeoMem + push_to_neomem(s_L10, session_id) + push_to_neomem(s_L20, session_id) + push_to_neomem(s_L30, session_id) + + print(f"🧩 L1/L5/L10/L20/L30 updated for {session_id}") - print(f"🧩 Summary generated for {session_id}") except Exception as e: - print(f"Summarizer error: {e}") + print(f"💥 Multilevel summarizer error for {session_id}: {e}") + # ───────────────────────────── # Routes @@ -155,6 +239,42 @@ def get_summary(session_id: str = Query(...)): summary = summarize_simple(list(hopper["buffer"])) return {"summary_text": summary, "session_id": session_id} +@app.get("/context") +def get_context(session_id: str = Query(...)): + """Return full multilevel summary context for Cortex.""" + if session_id not in SESSIONS: + return { + "session_id": session_id, + "exchange_count": 0, + "L1": "", + "L5": "", + "L10": "", + "L20": "", + "L30": "", + "last_updated": None + } + + buffer = list(SESSIONS[session_id]["buffer"]) + + # Build levels on demand + L1 = summarize_L1(buffer) + L5 = summarize_L5(buffer) + L10 = summarize_L10(buffer) + L20 = summarize_L20(L10_HISTORY.get(session_id, [])) + L30 = summarize_L30(L20_HISTORY.get(session_id, [])) + + return { + "session_id": session_id, + "exchange_count": len(buffer), + "L1": L1, + "L5": L5, + "L10": L10, + "L20": L20, + "L30": L30, + "last_updated": datetime.now().isoformat() + } + + @app.get("/health") def health(): return {"ok": True, "model": SUMMARY_MODEL, "url": SUMMARY_URL}