cortex rework continued.

2025-12-11 02:50:23 -05:00
parent 8c914906e5
commit 5ed3fd0982
7 changed files with 910 additions and 1113 deletions
--- a/cortex/intake/init.py
+++ b/cortex/intake/init.py
@@ -0,0 +1,18 @@
+"""
+Intake module - short-term memory summarization.
+
+Runs inside the Cortex container as a pure Python module.
+No standalone API server - called internally by Cortex.
+"""
+
+from .intake import (
+    SESSIONS,
+    add_exchange_internal,
+    summarize_context,
+)
+
+__all__ = [
+    "SESSIONS",
+    "add_exchange_internal",
+    "summarize_context",
+]
--- a/cortex/intake/intake.py
+++ b/cortex/intake/intake.py
@@ -1,18 +1,29 @@
 import os
+import json
 from datetime import datetime
 from typing import List, Dict, Any, TYPE_CHECKING
 from collections import deque
+from llm.llm_router import call_llm

+# -------------------------------------------------------------------
+# Global Short-Term Memory (new Intake)
+# -------------------------------------------------------------------
+SESSIONS: dict[str, dict] = {}   # session_id → { buffer: deque, created_at: timestamp }
+
+# Diagnostic: Verify module loads only once
+print(f"[Intake Module Init] SESSIONS object id: {id(SESSIONS)}, module: {__name__}")
+
+# L10 / L20 history lives here too
+L10_HISTORY: Dict[str, list[str]] = {}
+L20_HISTORY: Dict[str, list[str]] = {}
+
+from llm.llm_router import call_llm  # Use Cortex's shared LLM router

 if TYPE_CHECKING:
+    # Only for type hints — do NOT redefine SESSIONS here
    from collections import deque as _deque
-    SESSIONS: dict
-    L10_HISTORY: dict
-    L20_HISTORY: dict
    def bg_summarize(session_id: str) -> None: ...

-from llm.llm_router import call_llm  # use Cortex's shared router
-
 # ─────────────────────────────
 # Config
 # ─────────────────────────────
@@ -220,20 +231,24 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
 # ─────────────────────────────
 # Main entrypoint for Cortex
 # ─────────────────────────────
-
-async def summarize_context(
-    session_id: str,
-    exchanges: List[Dict[str, Any]],
-) -> Dict[str, Any]:
+async def summarize_context(session_id: str, exchanges: list[dict]):
    """
-    Main API used by Cortex:
+    Internal summarizer that uses Cortex's LLM router.
+    Produces L1 / L5 / L10 / L20 / L30 summaries.

-        summaries = await summarize_context(session_id, exchanges)
-
-    `exchanges` should be the recent conversation buffer for that session.
+    Args:
+        session_id: The conversation/session ID
+        exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
    """
-    buf = list(exchanges)
-    if not buf:
+
+    # Build raw conversation text
+    convo_lines = []
+    for ex in exchanges:
+        convo_lines.append(f"User: {ex.get('user_msg','')}")
+        convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
+    convo_text = "\n".join(convo_lines)
+
+    if not convo_text.strip():
        return {
            "session_id": session_id,
            "exchange_count": 0,
@@ -242,31 +257,72 @@ async def summarize_context(
            "L10": "",
            "L20": "",
            "L30": "",
-            "last_updated": None,
+            "last_updated": datetime.now().isoformat()
        }

-    # Base levels
-    L1 = await summarize_L1(buf)
-    L5 = await summarize_L5(buf)
-    L10 = await summarize_L10(session_id, buf)
-    L20 = await summarize_L20(session_id)
-    L30 = await summarize_L30(session_id)
+    # Prompt the LLM (internal — no HTTP)
+    prompt = f"""
+Summarize the conversation below into multiple compression levels.

-    # Push the "interesting" tiers into NeoMem
-    push_to_neomem(L10, session_id, "L10")
-    push_to_neomem(L20, session_id, "L20")
-    push_to_neomem(L30, session_id, "L30")
+Conversation:
+----------------
+{convo_text}
+----------------

-    return {
-        "session_id": session_id,
-        "exchange_count": len(buf),
-        "L1": L1,
-        "L5": L5,
-        "L10": L10,
-        "L20": L20,
-        "L30": L30,
-        "last_updated": datetime.now().isoformat(),
-    }
+Output strictly in JSON with keys:
+L1  → ultra short summary (1–2 sentences max)
+L5  → short summary
+L10 → medium summary
+L20 → detailed overview
+L30 → full detailed summary
+
+JSON only. No text outside JSON.
+"""
+
+    try:
+        llm_response = await call_llm(
+            prompt,
+            temperature=0.2
+        )
+
+
+        # LLM should return JSON, parse it
+        summary = json.loads(llm_response)
+
+        return {
+            "session_id": session_id,
+            "exchange_count": len(exchanges),
+            "L1": summary.get("L1", ""),
+            "L5": summary.get("L5", ""),
+            "L10": summary.get("L10", ""),
+            "L20": summary.get("L20", ""),
+            "L30": summary.get("L30", ""),
+            "last_updated": datetime.now().isoformat()
+        }
+
+    except Exception as e:
+        return {
+            "session_id": session_id,
+            "exchange_count": len(exchanges),
+            "L1": f"[Error summarizing: {str(e)}]",
+            "L5": "",
+            "L10": "",
+            "L20": "",
+            "L30": "",
+            "last_updated": datetime.now().isoformat()
+        }
+
+# ─────────────────────────────────
+# Background summarization stub
+# ─────────────────────────────────
+def bg_summarize(session_id: str):
+    """
+    Placeholder for background summarization.
+    Actual summarization happens during /reason via summarize_context().
+
+    This function exists to prevent NameError when called from add_exchange_internal().
+    """
+    print(f"[Intake] Exchange added for {session_id}. Will summarize on next /reason call.")

 # ─────────────────────────────
 # Internal entrypoint for Cortex
@@ -283,15 +339,23 @@ def add_exchange_internal(exchange: dict):

    exchange["timestamp"] = datetime.now().isoformat()

+    # DEBUG: Verify we're using the module-level SESSIONS
+    print(f"[add_exchange_internal] SESSIONS object id: {id(SESSIONS)}, current sessions: {list(SESSIONS.keys())}")
+
    # Ensure session exists
    if session_id not in SESSIONS:
        SESSIONS[session_id] = {
            "buffer": deque(maxlen=200),
            "created_at": datetime.now()
        }
+        print(f"[add_exchange_internal] Created new session: {session_id}")
+    else:
+        print(f"[add_exchange_internal] Using existing session: {session_id}")

    # Append exchange into the rolling buffer
    SESSIONS[session_id]["buffer"].append(exchange)
+    buffer_len = len(SESSIONS[session_id]["buffer"])
+    print(f"[add_exchange_internal] Added exchange to {session_id}, buffer now has {buffer_len} items")

    # Trigger summarization immediately
    try: