cortex pipeline stablized, inner monologue is now determining user intent and tone
This commit is contained in:
@@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
|
||||
async def summarize_context(session_id: str, exchanges: list[dict]):
|
||||
"""
|
||||
Internal summarizer that uses Cortex's LLM router.
|
||||
Produces L1 / L5 / L10 / L20 / L30 summaries.
|
||||
Produces cascading summaries based on exchange count:
|
||||
- L1: Always (most recent activity)
|
||||
- L2: After 2+ exchanges
|
||||
- L5: After 5+ exchanges
|
||||
- L10: After 10+ exchanges
|
||||
- L20: After 20+ exchanges
|
||||
- L30: After 30+ exchanges
|
||||
|
||||
Args:
|
||||
session_id: The conversation/session ID
|
||||
exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
|
||||
"""
|
||||
|
||||
# Build raw conversation text
|
||||
convo_lines = []
|
||||
for ex in exchanges:
|
||||
convo_lines.append(f"User: {ex.get('user_msg','')}")
|
||||
convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
|
||||
convo_text = "\n".join(convo_lines)
|
||||
exchange_count = len(exchanges)
|
||||
|
||||
if not convo_text.strip():
|
||||
if exchange_count == 0:
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": 0,
|
||||
"L1": "",
|
||||
"L2": "",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
@@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]):
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Prompt the LLM (internal — no HTTP)
|
||||
prompt = f"""
|
||||
Summarize the conversation below into multiple compression levels.
|
||||
|
||||
Conversation:
|
||||
----------------
|
||||
{convo_text}
|
||||
----------------
|
||||
|
||||
Output strictly in JSON with keys:
|
||||
L1 → ultra short summary (1–2 sentences max)
|
||||
L5 → short summary
|
||||
L10 → medium summary
|
||||
L20 → detailed overview
|
||||
L30 → full detailed summary
|
||||
|
||||
JSON only. No text outside JSON.
|
||||
"""
|
||||
result = {
|
||||
"session_id": session_id,
|
||||
"exchange_count": exchange_count,
|
||||
"L1": "",
|
||||
"L2": "",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
"L30": "",
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
try:
|
||||
llm_response = await call_llm(
|
||||
prompt,
|
||||
backend=INTAKE_LLM,
|
||||
temperature=0.2
|
||||
)
|
||||
# L1: Always generate (most recent exchanges)
|
||||
result["L1"] = await summarize_simple(exchanges[-5:])
|
||||
print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
|
||||
|
||||
print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
|
||||
print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
|
||||
# L2: After 2+ exchanges
|
||||
if exchange_count >= 2:
|
||||
result["L2"] = await summarize_simple(exchanges[-2:])
|
||||
print(f"[Intake] Generated L2 for {session_id}")
|
||||
|
||||
# LLM should return JSON, parse it
|
||||
if not llm_response or not llm_response.strip():
|
||||
raise ValueError("Empty response from LLM")
|
||||
# L5: After 5+ exchanges
|
||||
if exchange_count >= 5:
|
||||
result["L5"] = await summarize_simple(exchanges[-10:])
|
||||
print(f"[Intake] Generated L5 for {session_id}")
|
||||
|
||||
summary = json.loads(llm_response)
|
||||
# L10: After 10+ exchanges (Reality Check)
|
||||
if exchange_count >= 10:
|
||||
result["L10"] = await summarize_L10(session_id, exchanges)
|
||||
print(f"[Intake] Generated L10 for {session_id}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": len(exchanges),
|
||||
"L1": summary.get("L1", ""),
|
||||
"L5": summary.get("L5", ""),
|
||||
"L10": summary.get("L10", ""),
|
||||
"L20": summary.get("L20", ""),
|
||||
"L30": summary.get("L30", ""),
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
# L20: After 20+ exchanges (Session Overview - merges L10s)
|
||||
if exchange_count >= 20 and exchange_count % 10 == 0:
|
||||
result["L20"] = await summarize_L20(session_id)
|
||||
print(f"[Intake] Generated L20 for {session_id}")
|
||||
|
||||
# L30: After 30+ exchanges (Continuity Report - merges L20s)
|
||||
if exchange_count >= 30 and exchange_count % 10 == 0:
|
||||
result["L30"] = await summarize_L30(session_id)
|
||||
print(f"[Intake] Generated L30 for {session_id}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": len(exchanges),
|
||||
"L1": f"[Error summarizing: {str(e)}]",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
"L30": "",
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
print(f"[Intake] Error during summarization: {e}")
|
||||
result["L1"] = f"[Error summarizing: {str(e)}]"
|
||||
return result
|
||||
|
||||
# ─────────────────────────────────
|
||||
# Background summarization stub
|
||||
|
||||
Reference in New Issue
Block a user