cortex pipeline stablized, inner monologue is now determining user intent and tone

This commit is contained in:
serversdwn
2025-12-13 04:13:12 -05:00
parent 8554249421
commit fa4dd46cfc
12 changed files with 428 additions and 97 deletions

View File

@@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
async def summarize_context(session_id: str, exchanges: list[dict]):
"""
Internal summarizer that uses Cortex's LLM router.
Produces L1 / L5 / L10 / L20 / L30 summaries.
Produces cascading summaries based on exchange count:
- L1: Always (most recent activity)
- L2: After 2+ exchanges
- L5: After 5+ exchanges
- L10: After 10+ exchanges
- L20: After 20+ exchanges
- L30: After 30+ exchanges
Args:
session_id: The conversation/session ID
exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
"""
# Build raw conversation text
convo_lines = []
for ex in exchanges:
convo_lines.append(f"User: {ex.get('user_msg','')}")
convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
convo_text = "\n".join(convo_lines)
exchange_count = len(exchanges)
if not convo_text.strip():
if exchange_count == 0:
return {
"session_id": session_id,
"exchange_count": 0,
"L1": "",
"L2": "",
"L5": "",
"L10": "",
"L20": "",
@@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]):
"last_updated": datetime.now().isoformat()
}
# Prompt the LLM (internal — no HTTP)
prompt = f"""
Summarize the conversation below into multiple compression levels.
Conversation:
----------------
{convo_text}
----------------
Output strictly in JSON with keys:
L1 → ultra short summary (12 sentences max)
L5 → short summary
L10 → medium summary
L20 → detailed overview
L30 → full detailed summary
JSON only. No text outside JSON.
"""
result = {
"session_id": session_id,
"exchange_count": exchange_count,
"L1": "",
"L2": "",
"L5": "",
"L10": "",
"L20": "",
"L30": "",
"last_updated": datetime.now().isoformat()
}
try:
llm_response = await call_llm(
prompt,
backend=INTAKE_LLM,
temperature=0.2
)
# L1: Always generate (most recent exchanges)
result["L1"] = await summarize_simple(exchanges[-5:])
print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
# L2: After 2+ exchanges
if exchange_count >= 2:
result["L2"] = await summarize_simple(exchanges[-2:])
print(f"[Intake] Generated L2 for {session_id}")
# LLM should return JSON, parse it
if not llm_response or not llm_response.strip():
raise ValueError("Empty response from LLM")
# L5: After 5+ exchanges
if exchange_count >= 5:
result["L5"] = await summarize_simple(exchanges[-10:])
print(f"[Intake] Generated L5 for {session_id}")
summary = json.loads(llm_response)
# L10: After 10+ exchanges (Reality Check)
if exchange_count >= 10:
result["L10"] = await summarize_L10(session_id, exchanges)
print(f"[Intake] Generated L10 for {session_id}")
return {
"session_id": session_id,
"exchange_count": len(exchanges),
"L1": summary.get("L1", ""),
"L5": summary.get("L5", ""),
"L10": summary.get("L10", ""),
"L20": summary.get("L20", ""),
"L30": summary.get("L30", ""),
"last_updated": datetime.now().isoformat()
}
# L20: After 20+ exchanges (Session Overview - merges L10s)
if exchange_count >= 20 and exchange_count % 10 == 0:
result["L20"] = await summarize_L20(session_id)
print(f"[Intake] Generated L20 for {session_id}")
# L30: After 30+ exchanges (Continuity Report - merges L20s)
if exchange_count >= 30 and exchange_count % 10 == 0:
result["L30"] = await summarize_L30(session_id)
print(f"[Intake] Generated L30 for {session_id}")
return result
except Exception as e:
return {
"session_id": session_id,
"exchange_count": len(exchanges),
"L1": f"[Error summarizing: {str(e)}]",
"L5": "",
"L10": "",
"L20": "",
"L30": "",
"last_updated": datetime.now().isoformat()
}
print(f"[Intake] Error during summarization: {e}")
result["L1"] = f"[Error summarizing: {str(e)}]"
return result
# ─────────────────────────────────
# Background summarization stub