cortex rework continued.

This commit is contained in:
serversdwn
2025-12-11 02:50:23 -05:00
parent 8c914906e5
commit 5ed3fd0982
7 changed files with 910 additions and 1113 deletions

18
cortex/intake/__init__.py Normal file
View File

@@ -0,0 +1,18 @@
"""
Intake module - short-term memory summarization.
Runs inside the Cortex container as a pure Python module.
No standalone API server - called internally by Cortex.
"""
from .intake import (
SESSIONS,
add_exchange_internal,
summarize_context,
)
__all__ = [
"SESSIONS",
"add_exchange_internal",
"summarize_context",
]

View File

@@ -1,18 +1,29 @@
import os
import json
from datetime import datetime
from typing import List, Dict, Any, TYPE_CHECKING
from collections import deque
from llm.llm_router import call_llm
# -------------------------------------------------------------------
# Global Short-Term Memory (new Intake)
# -------------------------------------------------------------------
SESSIONS: dict[str, dict] = {} # session_id → { buffer: deque, created_at: timestamp }
# Diagnostic: Verify module loads only once
print(f"[Intake Module Init] SESSIONS object id: {id(SESSIONS)}, module: {__name__}")
# L10 / L20 history lives here too
L10_HISTORY: Dict[str, list[str]] = {}
L20_HISTORY: Dict[str, list[str]] = {}
from llm.llm_router import call_llm # Use Cortex's shared LLM router
if TYPE_CHECKING:
# Only for type hints — do NOT redefine SESSIONS here
from collections import deque as _deque
SESSIONS: dict
L10_HISTORY: dict
L20_HISTORY: dict
def bg_summarize(session_id: str) -> None: ...
from llm.llm_router import call_llm # use Cortex's shared router
# ─────────────────────────────
# Config
# ─────────────────────────────
@@ -220,20 +231,24 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
# ─────────────────────────────
# Main entrypoint for Cortex
# ─────────────────────────────
async def summarize_context(
session_id: str,
exchanges: List[Dict[str, Any]],
) -> Dict[str, Any]:
async def summarize_context(session_id: str, exchanges: list[dict]):
"""
Main API used by Cortex:
Internal summarizer that uses Cortex's LLM router.
Produces L1 / L5 / L10 / L20 / L30 summaries.
summaries = await summarize_context(session_id, exchanges)
`exchanges` should be the recent conversation buffer for that session.
Args:
session_id: The conversation/session ID
exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
"""
buf = list(exchanges)
if not buf:
# Build raw conversation text
convo_lines = []
for ex in exchanges:
convo_lines.append(f"User: {ex.get('user_msg','')}")
convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
convo_text = "\n".join(convo_lines)
if not convo_text.strip():
return {
"session_id": session_id,
"exchange_count": 0,
@@ -242,31 +257,72 @@ async def summarize_context(
"L10": "",
"L20": "",
"L30": "",
"last_updated": None,
"last_updated": datetime.now().isoformat()
}
# Base levels
L1 = await summarize_L1(buf)
L5 = await summarize_L5(buf)
L10 = await summarize_L10(session_id, buf)
L20 = await summarize_L20(session_id)
L30 = await summarize_L30(session_id)
# Prompt the LLM (internal — no HTTP)
prompt = f"""
Summarize the conversation below into multiple compression levels.
# Push the "interesting" tiers into NeoMem
push_to_neomem(L10, session_id, "L10")
push_to_neomem(L20, session_id, "L20")
push_to_neomem(L30, session_id, "L30")
Conversation:
----------------
{convo_text}
----------------
return {
"session_id": session_id,
"exchange_count": len(buf),
"L1": L1,
"L5": L5,
"L10": L10,
"L20": L20,
"L30": L30,
"last_updated": datetime.now().isoformat(),
}
Output strictly in JSON with keys:
L1 → ultra short summary (12 sentences max)
L5 → short summary
L10 → medium summary
L20 → detailed overview
L30 → full detailed summary
JSON only. No text outside JSON.
"""
try:
llm_response = await call_llm(
prompt,
temperature=0.2
)
# LLM should return JSON, parse it
summary = json.loads(llm_response)
return {
"session_id": session_id,
"exchange_count": len(exchanges),
"L1": summary.get("L1", ""),
"L5": summary.get("L5", ""),
"L10": summary.get("L10", ""),
"L20": summary.get("L20", ""),
"L30": summary.get("L30", ""),
"last_updated": datetime.now().isoformat()
}
except Exception as e:
return {
"session_id": session_id,
"exchange_count": len(exchanges),
"L1": f"[Error summarizing: {str(e)}]",
"L5": "",
"L10": "",
"L20": "",
"L30": "",
"last_updated": datetime.now().isoformat()
}
# ─────────────────────────────────
# Background summarization stub
# ─────────────────────────────────
def bg_summarize(session_id: str):
"""
Placeholder for background summarization.
Actual summarization happens during /reason via summarize_context().
This function exists to prevent NameError when called from add_exchange_internal().
"""
print(f"[Intake] Exchange added for {session_id}. Will summarize on next /reason call.")
# ─────────────────────────────
# Internal entrypoint for Cortex
@@ -283,15 +339,23 @@ def add_exchange_internal(exchange: dict):
exchange["timestamp"] = datetime.now().isoformat()
# DEBUG: Verify we're using the module-level SESSIONS
print(f"[add_exchange_internal] SESSIONS object id: {id(SESSIONS)}, current sessions: {list(SESSIONS.keys())}")
# Ensure session exists
if session_id not in SESSIONS:
SESSIONS[session_id] = {
"buffer": deque(maxlen=200),
"created_at": datetime.now()
}
print(f"[add_exchange_internal] Created new session: {session_id}")
else:
print(f"[add_exchange_internal] Using existing session: {session_id}")
# Append exchange into the rolling buffer
SESSIONS[session_id]["buffer"].append(exchange)
buffer_len = len(SESSIONS[session_id]["buffer"])
print(f"[add_exchange_internal] Added exchange to {session_id}, buffer now has {buffer_len} items")
# Trigger summarization immediately
try: