388 lines
13 KiB
Python
388 lines
13 KiB
Python
import os
|
|
import json
|
|
from datetime import datetime
|
|
from typing import List, Dict, Any, TYPE_CHECKING
|
|
from collections import deque
|
|
from llm.llm_router import call_llm
|
|
|
|
# -------------------------------------------------------------------
|
|
# Global Short-Term Memory (new Intake)
|
|
# -------------------------------------------------------------------
|
|
SESSIONS: dict[str, dict] = {} # session_id → { buffer: deque, created_at: timestamp }
|
|
|
|
# Diagnostic: Verify module loads only once
|
|
print(f"[Intake Module Init] SESSIONS object id: {id(SESSIONS)}, module: {__name__}")
|
|
|
|
# L10 / L20 history lives here too
|
|
L10_HISTORY: Dict[str, list[str]] = {}
|
|
L20_HISTORY: Dict[str, list[str]] = {}
|
|
|
|
from llm.llm_router import call_llm # Use Cortex's shared LLM router
|
|
|
|
if TYPE_CHECKING:
|
|
# Only for type hints — do NOT redefine SESSIONS here
|
|
from collections import deque as _deque
|
|
def bg_summarize(session_id: str) -> None: ...
|
|
|
|
# ─────────────────────────────
|
|
# Config
|
|
# ─────────────────────────────
|
|
|
|
INTAKE_LLM = os.getenv("INTAKE_LLM", "PRIMARY").upper()
|
|
|
|
SUMMARY_MAX_TOKENS = int(os.getenv("SUMMARY_MAX_TOKENS", "200"))
|
|
SUMMARY_TEMPERATURE = float(os.getenv("SUMMARY_TEMPERATURE", "0.3"))
|
|
|
|
NEOMEM_API = os.getenv("NEOMEM_API")
|
|
NEOMEM_KEY = os.getenv("NEOMEM_KEY")
|
|
|
|
# ─────────────────────────────
|
|
# Internal history for L10/L20/L30
|
|
# ─────────────────────────────
|
|
|
|
L10_HISTORY: Dict[str, list[str]] = {} # session_id → list of L10 blocks
|
|
L20_HISTORY: Dict[str, list[str]] = {} # session_id → list of merged overviews
|
|
|
|
|
|
# ─────────────────────────────
|
|
# LLM helper (via Cortex router)
|
|
# ─────────────────────────────
|
|
|
|
async def _llm(prompt: str) -> str:
|
|
"""
|
|
Use Cortex's llm_router to run a summary prompt.
|
|
"""
|
|
try:
|
|
text = await call_llm(
|
|
prompt,
|
|
backend=INTAKE_LLM,
|
|
temperature=SUMMARY_TEMPERATURE,
|
|
max_tokens=SUMMARY_MAX_TOKENS,
|
|
)
|
|
return (text or "").strip()
|
|
except Exception as e:
|
|
return f"[Error summarizing: {e}]"
|
|
|
|
|
|
# ─────────────────────────────
|
|
# Formatting helpers
|
|
# ─────────────────────────────
|
|
|
|
def _format_exchanges(exchanges: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Expect each exchange to look like:
|
|
{ "user_msg": "...", "assistant_msg": "..." }
|
|
"""
|
|
chunks = []
|
|
for e in exchanges:
|
|
user = e.get("user_msg", "")
|
|
assistant = e.get("assistant_msg", "")
|
|
chunks.append(f"User: {user}\nAssistant: {assistant}\n")
|
|
return "\n".join(chunks)
|
|
|
|
|
|
# ─────────────────────────────
|
|
# Base factual summary
|
|
# ─────────────────────────────
|
|
|
|
async def summarize_simple(exchanges: List[Dict[str, Any]]) -> str:
|
|
"""
|
|
Simple factual summary of recent exchanges.
|
|
"""
|
|
if not exchanges:
|
|
return ""
|
|
|
|
text = _format_exchanges(exchanges)
|
|
|
|
prompt = f"""
|
|
Summarize the following conversation between Brian (user) and Lyra (assistant).
|
|
Focus only on factual content. Avoid names, examples, story tone, or invented details.
|
|
|
|
{text}
|
|
|
|
Summary:
|
|
"""
|
|
return await _llm(prompt)
|
|
|
|
|
|
# ─────────────────────────────
|
|
# Multilevel Summaries (L1, L5, L10, L20, L30)
|
|
# ─────────────────────────────
|
|
|
|
async def summarize_L1(buf: List[Dict[str, Any]]) -> str:
|
|
# Last ~5 exchanges
|
|
return await summarize_simple(buf[-5:])
|
|
|
|
|
|
async def summarize_L5(buf: List[Dict[str, Any]]) -> str:
|
|
# Last ~10 exchanges
|
|
return await summarize_simple(buf[-10:])
|
|
|
|
|
|
async def summarize_L10(session_id: str, buf: List[Dict[str, Any]]) -> str:
|
|
# “Reality Check” for last 10 exchanges
|
|
text = _format_exchanges(buf[-10:])
|
|
|
|
prompt = f"""
|
|
You are Lyra Intake performing a short 'Reality Check'.
|
|
Summarize the last block of conversation (up to 10 exchanges)
|
|
in one clear paragraph focusing on tone, intent, and direction.
|
|
|
|
{text}
|
|
|
|
Reality Check:
|
|
"""
|
|
summary = await _llm(prompt)
|
|
|
|
# Track history for this session
|
|
L10_HISTORY.setdefault(session_id, [])
|
|
L10_HISTORY[session_id].append(summary)
|
|
|
|
return summary
|
|
|
|
|
|
async def summarize_L20(session_id: str) -> str:
|
|
"""
|
|
Merge all L10 Reality Checks into a 'Session Overview'.
|
|
"""
|
|
history = L10_HISTORY.get(session_id, [])
|
|
joined = "\n\n".join(history) if history else ""
|
|
|
|
if not joined:
|
|
return ""
|
|
|
|
prompt = f"""
|
|
You are Lyra Intake creating a 'Session Overview'.
|
|
Merge the following Reality Check paragraphs into one short summary
|
|
capturing progress, themes, and the direction of the conversation.
|
|
|
|
{joined}
|
|
|
|
Overview:
|
|
"""
|
|
summary = await _llm(prompt)
|
|
|
|
L20_HISTORY.setdefault(session_id, [])
|
|
L20_HISTORY[session_id].append(summary)
|
|
|
|
return summary
|
|
|
|
|
|
async def summarize_L30(session_id: str) -> str:
|
|
"""
|
|
Merge all L20 session overviews into a 'Continuity Report'.
|
|
"""
|
|
history = L20_HISTORY.get(session_id, [])
|
|
joined = "\n\n".join(history) if history else ""
|
|
|
|
if not joined:
|
|
return ""
|
|
|
|
prompt = f"""
|
|
You are Lyra Intake generating a 'Continuity Report'.
|
|
Condense these session overviews into one high-level reflection,
|
|
noting major themes, persistent goals, and shifts.
|
|
|
|
{joined}
|
|
|
|
Continuity Report:
|
|
"""
|
|
return await _llm(prompt)
|
|
|
|
|
|
# ─────────────────────────────
|
|
# NeoMem push
|
|
# ─────────────────────────────
|
|
|
|
def push_to_neomem(summary: str, session_id: str, level: str) -> None:
|
|
"""
|
|
Fire-and-forget push of a summary into NeoMem.
|
|
"""
|
|
if not NEOMEM_API or not summary:
|
|
return
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
if NEOMEM_KEY:
|
|
headers["Authorization"] = f"Bearer {NEOMEM_KEY}"
|
|
|
|
payload = {
|
|
"messages": [{"role": "assistant", "content": summary}],
|
|
"user_id": "brian",
|
|
"metadata": {
|
|
"source": "intake",
|
|
"session_id": session_id,
|
|
"level": level,
|
|
},
|
|
}
|
|
|
|
try:
|
|
import requests
|
|
requests.post(
|
|
f"{NEOMEM_API}/memories",
|
|
json=payload,
|
|
headers=headers,
|
|
timeout=20,
|
|
).raise_for_status()
|
|
print(f"🧠 NeoMem updated ({level}) for {session_id}")
|
|
except Exception as e:
|
|
print(f"NeoMem push failed ({level}, {session_id}): {e}")
|
|
|
|
|
|
# ─────────────────────────────
|
|
# Main entrypoint for Cortex
|
|
# ─────────────────────────────
|
|
async def summarize_context(session_id: str, exchanges: list[dict]):
|
|
"""
|
|
Internal summarizer that uses Cortex's LLM router.
|
|
Produces cascading summaries based on exchange count:
|
|
- L1: Always (most recent activity)
|
|
- L2: After 2+ exchanges
|
|
- L5: After 5+ exchanges
|
|
- L10: After 10+ exchanges
|
|
- L20: After 20+ exchanges
|
|
- L30: After 30+ exchanges
|
|
|
|
Args:
|
|
session_id: The conversation/session ID
|
|
exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
|
|
"""
|
|
|
|
exchange_count = len(exchanges)
|
|
|
|
if exchange_count == 0:
|
|
return {
|
|
"session_id": session_id,
|
|
"exchange_count": 0,
|
|
"L1": "",
|
|
"L2": "",
|
|
"L5": "",
|
|
"L10": "",
|
|
"L20": "",
|
|
"L30": "",
|
|
"last_updated": datetime.now().isoformat()
|
|
}
|
|
|
|
result = {
|
|
"session_id": session_id,
|
|
"exchange_count": exchange_count,
|
|
"L1": "",
|
|
"L2": "",
|
|
"L5": "",
|
|
"L10": "",
|
|
"L20": "",
|
|
"L30": "",
|
|
"last_updated": datetime.now().isoformat()
|
|
}
|
|
|
|
try:
|
|
# L1: Always generate (most recent exchanges)
|
|
result["L1"] = await summarize_simple(exchanges[-5:])
|
|
print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
|
|
|
|
# L2: After 2+ exchanges
|
|
if exchange_count >= 2:
|
|
result["L2"] = await summarize_simple(exchanges[-2:])
|
|
print(f"[Intake] Generated L2 for {session_id}")
|
|
|
|
# L5: After 5+ exchanges
|
|
if exchange_count >= 5:
|
|
result["L5"] = await summarize_simple(exchanges[-10:])
|
|
print(f"[Intake] Generated L5 for {session_id}")
|
|
|
|
# L10: After 10+ exchanges (Reality Check)
|
|
if exchange_count >= 10:
|
|
result["L10"] = await summarize_L10(session_id, exchanges)
|
|
print(f"[Intake] Generated L10 for {session_id}")
|
|
|
|
# L20: After 20+ exchanges (Session Overview - merges L10s)
|
|
if exchange_count >= 20 and exchange_count % 10 == 0:
|
|
result["L20"] = await summarize_L20(session_id)
|
|
print(f"[Intake] Generated L20 for {session_id}")
|
|
|
|
# L30: After 30+ exchanges (Continuity Report - merges L20s)
|
|
if exchange_count >= 30 and exchange_count % 10 == 0:
|
|
result["L30"] = await summarize_L30(session_id)
|
|
print(f"[Intake] Generated L30 for {session_id}")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
print(f"[Intake] Error during summarization: {e}")
|
|
result["L1"] = f"[Error summarizing: {str(e)}]"
|
|
return result
|
|
|
|
# ─────────────────────────────────
|
|
# Background summarization stub
|
|
# ─────────────────────────────────
|
|
def bg_summarize(session_id: str):
|
|
"""
|
|
Placeholder for background summarization.
|
|
Actual summarization happens during /reason via summarize_context().
|
|
|
|
This function exists to prevent NameError when called from add_exchange_internal().
|
|
"""
|
|
print(f"[Intake] Exchange added for {session_id}. Will summarize on next /reason call.")
|
|
|
|
# ─────────────────────────────
|
|
# Internal entrypoint for Cortex
|
|
# ─────────────────────────────
|
|
def get_recent_messages(session_id: str, limit: int = 20) -> list:
|
|
"""
|
|
Get recent raw messages from the session buffer.
|
|
|
|
Args:
|
|
session_id: Session identifier
|
|
limit: Maximum number of messages to return (default 20)
|
|
|
|
Returns:
|
|
List of message dicts with 'role' and 'content' fields
|
|
"""
|
|
if session_id not in SESSIONS:
|
|
return []
|
|
|
|
buffer = SESSIONS[session_id]["buffer"]
|
|
|
|
# Convert buffer to list and get last N messages
|
|
messages = list(buffer)[-limit:]
|
|
|
|
return messages
|
|
|
|
|
|
def add_exchange_internal(exchange: dict):
|
|
"""
|
|
Direct internal call — bypasses FastAPI request handling.
|
|
Cortex uses this to feed user/assistant turns directly
|
|
into Intake's buffer and trigger full summarization.
|
|
"""
|
|
session_id = exchange.get("session_id")
|
|
if not session_id:
|
|
raise ValueError("session_id missing")
|
|
|
|
exchange["timestamp"] = datetime.now().isoformat()
|
|
|
|
# DEBUG: Verify we're using the module-level SESSIONS
|
|
print(f"[add_exchange_internal] SESSIONS object id: {id(SESSIONS)}, current sessions: {list(SESSIONS.keys())}")
|
|
|
|
# Ensure session exists
|
|
if session_id not in SESSIONS:
|
|
SESSIONS[session_id] = {
|
|
"buffer": deque(maxlen=200),
|
|
"created_at": datetime.now()
|
|
}
|
|
print(f"[add_exchange_internal] Created new session: {session_id}")
|
|
else:
|
|
print(f"[add_exchange_internal] Using existing session: {session_id}")
|
|
|
|
# Append exchange into the rolling buffer
|
|
SESSIONS[session_id]["buffer"].append(exchange)
|
|
buffer_len = len(SESSIONS[session_id]["buffer"])
|
|
print(f"[add_exchange_internal] Added exchange to {session_id}, buffer now has {buffer_len} items")
|
|
|
|
# Trigger summarization immediately
|
|
try:
|
|
bg_summarize(session_id)
|
|
except Exception as e:
|
|
print(f"[Internal Intake] Summarization error: {e}")
|
|
|
|
return {"ok": True, "session_id": session_id}
|