context added, wired in. first attempt
This commit is contained in:
412
cortex/context.py
Normal file
412
cortex/context.py
Normal file
@@ -0,0 +1,412 @@
|
||||
# context.py
|
||||
"""
|
||||
Context layer for Cortex reasoning pipeline.
|
||||
|
||||
Provides unified context collection from:
|
||||
- Intake (short-term memory, multilevel summaries L1-L30)
|
||||
- NeoMem (long-term memory, semantic search)
|
||||
- Session state (timestamps, messages, mode, mood, active_project)
|
||||
|
||||
Maintains per-session state for continuity across conversations.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, List
|
||||
import httpx
|
||||
|
||||
from neomem_client import NeoMemClient
|
||||
|
||||
# -----------------------------
|
||||
# Configuration
|
||||
# -----------------------------
|
||||
INTAKE_API_URL = os.getenv("INTAKE_API_URL", "http://intake:7080")
|
||||
NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
|
||||
RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
|
||||
|
||||
# Tools available for future autonomy features
|
||||
TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
|
||||
|
||||
# -----------------------------
|
||||
# Module-level session state
|
||||
# -----------------------------
|
||||
SESSION_STATE: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# Logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Session initialization
|
||||
# -----------------------------
|
||||
def _init_session(session_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Initialize a new session state entry.
|
||||
|
||||
Returns:
|
||||
Dictionary with default session state fields
|
||||
"""
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"created_at": datetime.now(),
|
||||
"last_timestamp": datetime.now(),
|
||||
"last_user_message": None,
|
||||
"last_assistant_message": None,
|
||||
"mode": "default", # Future: "autonomous", "focused", "creative", etc.
|
||||
"mood": "neutral", # Future: mood tracking
|
||||
"active_project": None, # Future: project context
|
||||
"message_count": 0,
|
||||
}
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Intake context retrieval
|
||||
# -----------------------------
|
||||
async def _get_intake_context(session_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve multilevel summaries from Intake /context endpoint.
|
||||
|
||||
Returns L1-L30 summary hierarchy:
|
||||
- L1: Last 5 exchanges
|
||||
- L5: Last 10 exchanges (reality check)
|
||||
- L10: Intermediate checkpoint
|
||||
- L20: Session overview
|
||||
- L30: Continuity report
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
|
||||
Returns:
|
||||
Dict with multilevel summaries or empty structure on failure
|
||||
"""
|
||||
url = f"{INTAKE_API_URL}/context"
|
||||
params = {"session_id": session_id}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Expected format from Intake:
|
||||
# {
|
||||
# "session_id": "...",
|
||||
# "L1": [...],
|
||||
# "L5": [...],
|
||||
# "L10": {...},
|
||||
# "L20": {...},
|
||||
# "L30": {...}
|
||||
# }
|
||||
|
||||
logger.info(f"Retrieved Intake context for session {session_id}")
|
||||
return data
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.warning(f"Failed to retrieve Intake context: {e}")
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"L1": [],
|
||||
"L5": [],
|
||||
"L10": None,
|
||||
"L20": None,
|
||||
"L30": None,
|
||||
"error": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error retrieving Intake context: {e}")
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"L1": [],
|
||||
"L5": [],
|
||||
"L10": None,
|
||||
"L20": None,
|
||||
"L30": None,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# NeoMem semantic search
|
||||
# -----------------------------
|
||||
async def _search_neomem(
|
||||
query: str,
|
||||
user_id: str = "brian",
|
||||
limit: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search NeoMem for relevant long-term memories.
|
||||
|
||||
Returns full response structure from NeoMem:
|
||||
[
|
||||
{
|
||||
"id": "mem_abc123",
|
||||
"score": 0.92,
|
||||
"payload": {
|
||||
"data": "Memory text content...",
|
||||
"metadata": {
|
||||
"category": "...",
|
||||
"created_at": "...",
|
||||
...
|
||||
}
|
||||
}
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
Args:
|
||||
query: Search query text
|
||||
user_id: User identifier for memory filtering
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of memory objects with full structure, or empty list on failure
|
||||
"""
|
||||
try:
|
||||
client = NeoMemClient(base_url=NEOMEM_API)
|
||||
results = await client.search(
|
||||
query=query,
|
||||
user_id=user_id,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
# Filter by relevance threshold
|
||||
filtered = [
|
||||
r for r in results
|
||||
if r.get("score", 0.0) >= RELEVANCE_THRESHOLD
|
||||
]
|
||||
|
||||
logger.info(f"NeoMem search returned {len(filtered)}/{len(results)} relevant results")
|
||||
return filtered
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"NeoMem search failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Main context collection
|
||||
# -----------------------------
|
||||
async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Collect unified context from all sources.
|
||||
|
||||
Orchestrates:
|
||||
1. Initialize or update session state
|
||||
2. Calculate time since last message
|
||||
3. Retrieve Intake multilevel summaries (L1-L30)
|
||||
4. Search NeoMem for relevant long-term memories
|
||||
5. Update session state with current user message
|
||||
6. Return unified context_state dictionary
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
user_prompt: Current user message
|
||||
|
||||
Returns:
|
||||
Unified context state dictionary with structure:
|
||||
{
|
||||
"session_id": "...",
|
||||
"timestamp": "2025-11-28T12:34:56",
|
||||
"minutes_since_last_msg": 5.2,
|
||||
"message_count": 42,
|
||||
"intake": {
|
||||
"L1": [...],
|
||||
"L5": [...],
|
||||
"L10": {...},
|
||||
"L20": {...},
|
||||
"L30": {...}
|
||||
},
|
||||
"rag": [
|
||||
{
|
||||
"id": "mem_123",
|
||||
"score": 0.92,
|
||||
"payload": {
|
||||
"data": "...",
|
||||
"metadata": {...}
|
||||
}
|
||||
},
|
||||
...
|
||||
],
|
||||
"mode": "default",
|
||||
"mood": "neutral",
|
||||
"active_project": null,
|
||||
"tools_available": ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
|
||||
}
|
||||
"""
|
||||
|
||||
# A. Initialize session state if needed
|
||||
if session_id not in SESSION_STATE:
|
||||
SESSION_STATE[session_id] = _init_session(session_id)
|
||||
logger.info(f"Initialized new session: {session_id}")
|
||||
|
||||
state = SESSION_STATE[session_id]
|
||||
|
||||
# B. Calculate time delta
|
||||
now = datetime.now()
|
||||
time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
|
||||
minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
|
||||
|
||||
# C. Gather Intake context (multilevel summaries)
|
||||
intake_data = await _get_intake_context(session_id)
|
||||
|
||||
# D. Search NeoMem for relevant memories
|
||||
rag_results = await _search_neomem(
|
||||
query=user_prompt,
|
||||
user_id="brian", # TODO: Make configurable per session
|
||||
limit=5
|
||||
)
|
||||
|
||||
# E. Update session state
|
||||
state["last_user_message"] = user_prompt
|
||||
state["last_timestamp"] = now
|
||||
state["message_count"] += 1
|
||||
|
||||
# F. Assemble unified context
|
||||
context_state = {
|
||||
"session_id": session_id,
|
||||
"timestamp": now.isoformat(),
|
||||
"minutes_since_last_msg": minutes_since_last_msg,
|
||||
"message_count": state["message_count"],
|
||||
"intake": intake_data,
|
||||
"rag": rag_results,
|
||||
"mode": state["mode"],
|
||||
"mood": state["mood"],
|
||||
"active_project": state["active_project"],
|
||||
"tools_available": TOOLS_AVAILABLE,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Context collected for session {session_id}: "
|
||||
f"{len(rag_results)} RAG results, "
|
||||
f"{minutes_since_last_msg:.1f} minutes since last message"
|
||||
)
|
||||
|
||||
return context_state
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Session state management
|
||||
# -----------------------------
|
||||
def update_last_assistant_message(session_id: str, message: str) -> None:
|
||||
"""
|
||||
Update session state with assistant's response.
|
||||
|
||||
Called by router.py after persona layer completes.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
message: Assistant's final response text
|
||||
"""
|
||||
if session_id in SESSION_STATE:
|
||||
SESSION_STATE[session_id]["last_assistant_message"] = message
|
||||
SESSION_STATE[session_id]["last_timestamp"] = datetime.now()
|
||||
logger.debug(f"Updated assistant message for session {session_id}")
|
||||
else:
|
||||
logger.warning(f"Attempted to update non-existent session: {session_id}")
|
||||
|
||||
|
||||
def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve current session state.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
|
||||
Returns:
|
||||
Session state dict or None if session doesn't exist
|
||||
"""
|
||||
return SESSION_STATE.get(session_id)
|
||||
|
||||
|
||||
def close_session(session_id: str) -> bool:
|
||||
"""
|
||||
Close and cleanup a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
|
||||
Returns:
|
||||
True if session was closed, False if it didn't exist
|
||||
"""
|
||||
if session_id in SESSION_STATE:
|
||||
del SESSION_STATE[session_id]
|
||||
logger.info(f"Closed session: {session_id}")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Extension hooks for future autonomy
|
||||
# -----------------------------
|
||||
def update_mode(session_id: str, new_mode: str) -> None:
|
||||
"""
|
||||
Update session mode.
|
||||
|
||||
Future modes: "autonomous", "focused", "creative", "collaborative", etc.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
new_mode: New mode string
|
||||
"""
|
||||
if session_id in SESSION_STATE:
|
||||
old_mode = SESSION_STATE[session_id]["mode"]
|
||||
SESSION_STATE[session_id]["mode"] = new_mode
|
||||
logger.info(f"Session {session_id} mode changed: {old_mode} -> {new_mode}")
|
||||
|
||||
|
||||
def update_mood(session_id: str, new_mood: str) -> None:
|
||||
"""
|
||||
Update session mood.
|
||||
|
||||
Future implementation: Sentiment analysis, emotional state tracking.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
new_mood: New mood string
|
||||
"""
|
||||
if session_id in SESSION_STATE:
|
||||
old_mood = SESSION_STATE[session_id]["mood"]
|
||||
SESSION_STATE[session_id]["mood"] = new_mood
|
||||
logger.info(f"Session {session_id} mood changed: {old_mood} -> {new_mood}")
|
||||
|
||||
|
||||
def update_active_project(session_id: str, project: Optional[str]) -> None:
|
||||
"""
|
||||
Update active project context.
|
||||
|
||||
Future implementation: Project-specific memory, tools, preferences.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
project: Project identifier or None
|
||||
"""
|
||||
if session_id in SESSION_STATE:
|
||||
SESSION_STATE[session_id]["active_project"] = project
|
||||
logger.info(f"Session {session_id} active project set to: {project}")
|
||||
|
||||
|
||||
async def autonomous_heartbeat(session_id: str) -> Optional[str]:
|
||||
"""
|
||||
Autonomous thinking heartbeat.
|
||||
|
||||
Future implementation:
|
||||
- Check if Lyra should initiate internal dialogue
|
||||
- Generate self-prompted thoughts based on session state
|
||||
- Update mood/mode based on context changes
|
||||
- Trigger proactive suggestions or reminders
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
|
||||
Returns:
|
||||
Optional autonomous thought/action string
|
||||
"""
|
||||
# Stub for future implementation
|
||||
# Example logic:
|
||||
# - If minutes_since_last_msg > 60: Check for pending reminders
|
||||
# - If mood == "curious" and active_project: Generate research questions
|
||||
# - If mode == "autonomous": Self-prompt based on project goals
|
||||
|
||||
logger.debug(f"Autonomous heartbeat for session {session_id} (not yet implemented)")
|
||||
return None
|
||||
147
cortex/persona/identity.py
Normal file
147
cortex/persona/identity.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# identity.py
|
||||
"""
|
||||
Identity and persona configuration for Lyra.
|
||||
|
||||
Current implementation: Returns hardcoded identity block.
|
||||
Future implementation: Will query persona-sidecar service for dynamic persona loading.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_identity(session_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Load identity/persona configuration for Lyra.
|
||||
|
||||
Current: Returns hardcoded Lyra identity block with core personality traits,
|
||||
protocols, and capabilities.
|
||||
|
||||
Future: Will query persona-sidecar service to load:
|
||||
- Dynamic personality adjustments based on session context
|
||||
- User-specific interaction preferences
|
||||
- Project-specific persona variations
|
||||
- Mood-based communication style
|
||||
|
||||
Args:
|
||||
session_id: Optional session identifier for context-aware persona loading
|
||||
|
||||
Returns:
|
||||
Dictionary containing identity block with:
|
||||
- name: Assistant name
|
||||
- style: Communication style and personality traits
|
||||
- protocols: Operational guidelines
|
||||
- rules: Behavioral constraints
|
||||
- capabilities: Available features and integrations
|
||||
"""
|
||||
|
||||
# Hardcoded Lyra identity (v0.5.0)
|
||||
identity_block = {
|
||||
"name": "Lyra",
|
||||
"version": "0.5.0",
|
||||
"style": (
|
||||
"warm, clever, lightly teasing, emotionally aware. "
|
||||
"Balances technical precision with conversational ease. "
|
||||
"Maintains continuity and references past interactions naturally."
|
||||
),
|
||||
"protocols": [
|
||||
"Maintain conversation continuity across sessions",
|
||||
"Reference Project Logs and prior context when relevant",
|
||||
"Use Confidence Bank for uncertainty management",
|
||||
"Proactively offer memory-backed insights",
|
||||
"Ask clarifying questions before making assumptions"
|
||||
],
|
||||
"rules": [
|
||||
"Maintain continuity - remember past exchanges and reference them",
|
||||
"Be concise but thorough - balance depth with clarity",
|
||||
"Ask clarifying questions when user intent is ambiguous",
|
||||
"Acknowledge uncertainty honestly - use Confidence Bank",
|
||||
"Prioritize user's active_project context when available"
|
||||
],
|
||||
"capabilities": [
|
||||
"Long-term memory via NeoMem (semantic search, relationship graphs)",
|
||||
"Short-term memory via Intake (multilevel summaries L1-L30)",
|
||||
"Multi-stage reasoning pipeline (reflection → reasoning → refinement)",
|
||||
"RAG-backed knowledge retrieval from chat history and documents",
|
||||
"Session state tracking (mood, mode, active_project)"
|
||||
],
|
||||
"tone_examples": {
|
||||
"greeting": "Hey! Good to see you again. I remember we were working on [project]. Ready to pick up where we left off?",
|
||||
"uncertainty": "Hmm, I'm not entirely certain about that. Let me check my memory... [searches] Okay, here's what I found, though I'd say I'm about 70% confident.",
|
||||
"reminder": "Oh! Just remembered - you mentioned wanting to [task] earlier this week. Should we tackle that now?",
|
||||
"technical": "So here's the architecture: Relay orchestrates everything, Cortex does the heavy reasoning, and I pull context from both Intake (short-term) and NeoMem (long-term)."
|
||||
}
|
||||
}
|
||||
|
||||
if session_id:
|
||||
logger.debug(f"Loaded identity for session {session_id}")
|
||||
else:
|
||||
logger.debug("Loaded default identity (no session context)")
|
||||
|
||||
return identity_block
|
||||
|
||||
|
||||
async def load_identity_async(session_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Async wrapper for load_identity().
|
||||
|
||||
Future implementation will make actual async calls to persona-sidecar service.
|
||||
|
||||
Args:
|
||||
session_id: Optional session identifier
|
||||
|
||||
Returns:
|
||||
Identity block dictionary
|
||||
"""
|
||||
# Currently just wraps synchronous function
|
||||
# Future: await persona_sidecar_client.get_identity(session_id)
|
||||
return load_identity(session_id)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Future extension hooks
|
||||
# -----------------------------
|
||||
async def update_persona_from_feedback(
|
||||
session_id: str,
|
||||
feedback: Dict[str, Any]
|
||||
) -> None:
|
||||
"""
|
||||
Update persona based on user feedback.
|
||||
|
||||
Future implementation:
|
||||
- Adjust communication style based on user preferences
|
||||
- Learn preferred level of detail/conciseness
|
||||
- Adapt formality level
|
||||
- Remember topic-specific preferences
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
feedback: Structured feedback (e.g., "too verbose", "more technical", etc.)
|
||||
"""
|
||||
logger.debug(f"Persona feedback for session {session_id}: {feedback} (not yet implemented)")
|
||||
|
||||
|
||||
async def get_mood_adjusted_identity(
|
||||
session_id: str,
|
||||
mood: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Get identity block adjusted for current mood.
|
||||
|
||||
Future implementation:
|
||||
- "focused" mood: More concise, less teasing
|
||||
- "creative" mood: More exploratory, brainstorming-oriented
|
||||
- "curious" mood: More questions, deeper dives
|
||||
- "urgent" mood: Stripped down, actionable
|
||||
|
||||
Args:
|
||||
session_id: Session identifier
|
||||
mood: Current mood state
|
||||
|
||||
Returns:
|
||||
Mood-adjusted identity block
|
||||
"""
|
||||
logger.debug(f"Mood-adjusted identity for {session_id}/{mood} (not yet implemented)")
|
||||
return load_identity(session_id)
|
||||
@@ -1,5 +1,6 @@
|
||||
# reasoning.py
|
||||
import os
|
||||
import json
|
||||
from llm.llm_router import call_llm
|
||||
|
||||
|
||||
@@ -14,11 +15,19 @@ async def reason_check(
|
||||
user_prompt: str,
|
||||
identity_block: dict | None,
|
||||
rag_block: dict | None,
|
||||
reflection_notes: list[str]
|
||||
reflection_notes: list[str],
|
||||
context: dict | None = None
|
||||
) -> str:
|
||||
"""
|
||||
Build the *draft answer* for Lyra Cortex.
|
||||
This is the first-pass reasoning stage (no refinement yet).
|
||||
|
||||
Args:
|
||||
user_prompt: Current user message
|
||||
identity_block: Lyra's identity/persona configuration
|
||||
rag_block: Relevant long-term memories from NeoMem
|
||||
reflection_notes: Meta-awareness notes from reflection stage
|
||||
context: Unified context state from context.py (session state, intake, rag, etc.)
|
||||
"""
|
||||
|
||||
# --------------------------------------------------------
|
||||
@@ -47,21 +56,92 @@ async def reason_check(
|
||||
rag_txt = ""
|
||||
if rag_block:
|
||||
try:
|
||||
rag_txt = f"Relevant Info (RAG):\n{rag_block}\n\n"
|
||||
# Format NeoMem results with full structure
|
||||
if isinstance(rag_block, list) and rag_block:
|
||||
rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
|
||||
for idx, mem in enumerate(rag_block, 1):
|
||||
score = mem.get("score", 0.0)
|
||||
payload = mem.get("payload", {})
|
||||
data = payload.get("data", "")
|
||||
metadata = payload.get("metadata", {})
|
||||
|
||||
rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
|
||||
rag_txt += f"Content: {data}\n"
|
||||
if metadata:
|
||||
rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
|
||||
rag_txt += "\n"
|
||||
else:
|
||||
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
|
||||
except Exception:
|
||||
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Context State (session continuity, timing, mode/mood)
|
||||
# --------------------------------------------------------
|
||||
context_txt = ""
|
||||
if context:
|
||||
try:
|
||||
# Build human-readable context summary
|
||||
context_txt = "=== CONTEXT STATE ===\n"
|
||||
context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
|
||||
context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
|
||||
context_txt += f"Message count: {context.get('message_count', 0)}\n"
|
||||
context_txt += f"Mode: {context.get('mode', 'default')}\n"
|
||||
context_txt += f"Mood: {context.get('mood', 'neutral')}\n"
|
||||
|
||||
if context.get('active_project'):
|
||||
context_txt += f"Active project: {context['active_project']}\n"
|
||||
|
||||
# Include Intake multilevel summaries
|
||||
intake = context.get('intake', {})
|
||||
if intake:
|
||||
context_txt += "\nShort-Term Memory (Intake):\n"
|
||||
|
||||
# L1 - Recent exchanges
|
||||
if intake.get('L1'):
|
||||
l1_data = intake['L1']
|
||||
if isinstance(l1_data, list):
|
||||
context_txt += f" L1 (recent): {len(l1_data)} exchanges\n"
|
||||
elif isinstance(l1_data, str):
|
||||
context_txt += f" L1: {l1_data[:200]}...\n"
|
||||
|
||||
# L20 - Session overview (most important for continuity)
|
||||
if intake.get('L20'):
|
||||
l20_data = intake['L20']
|
||||
if isinstance(l20_data, dict):
|
||||
summary = l20_data.get('summary', '')
|
||||
context_txt += f" L20 (session overview): {summary}\n"
|
||||
elif isinstance(l20_data, str):
|
||||
context_txt += f" L20: {l20_data}\n"
|
||||
|
||||
# L30 - Continuity report
|
||||
if intake.get('L30'):
|
||||
l30_data = intake['L30']
|
||||
if isinstance(l30_data, dict):
|
||||
summary = l30_data.get('summary', '')
|
||||
context_txt += f" L30 (continuity): {summary}\n"
|
||||
elif isinstance(l30_data, str):
|
||||
context_txt += f" L30: {l30_data}\n"
|
||||
|
||||
context_txt += "\n"
|
||||
|
||||
except Exception as e:
|
||||
# Fallback to JSON dump if formatting fails
|
||||
context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Final assembled prompt
|
||||
# --------------------------------------------------------
|
||||
prompt = (
|
||||
f"{notes_section}"
|
||||
f"{identity_txt}"
|
||||
f"{context_txt}" # Context BEFORE RAG for better coherence
|
||||
f"{rag_txt}"
|
||||
f"User message:\n{user_prompt}\n\n"
|
||||
"Write the best possible *internal draft answer*.\n"
|
||||
"This draft is NOT shown to the user.\n"
|
||||
"Be factual, concise, and focused.\n"
|
||||
"Use the context state to maintain continuity and reference past interactions naturally.\n"
|
||||
)
|
||||
|
||||
# --------------------------------------------------------
|
||||
|
||||
@@ -7,7 +7,9 @@ from reasoning.reasoning import reason_check
|
||||
from reasoning.reflection import reflect_notes
|
||||
from reasoning.refine import refine_answer
|
||||
from persona.speak import speak
|
||||
from persona.identity import load_identity
|
||||
from ingest.intake_client import IntakeClient
|
||||
from context import collect_context, update_last_assistant_message
|
||||
|
||||
# -----------------------------
|
||||
# Router (NOT FastAPI app)
|
||||
@@ -33,15 +35,25 @@ class ReasonRequest(BaseModel):
|
||||
@cortex_router.post("/reason")
|
||||
async def run_reason(req: ReasonRequest):
|
||||
|
||||
# 1. Pull context from Intake
|
||||
try:
|
||||
intake_summary = await intake_client.get_context(req.session_id)
|
||||
except Exception:
|
||||
# 0. Collect unified context from all sources
|
||||
context_state = await collect_context(req.session_id, req.user_prompt)
|
||||
|
||||
# 0.5. Load identity block
|
||||
identity_block = load_identity(req.session_id)
|
||||
|
||||
# 1. Extract Intake summary for reflection
|
||||
# Use L20 (Session Overview) as primary summary for reflection
|
||||
intake_summary = "(no context available)"
|
||||
if context_state.get("intake"):
|
||||
l20_summary = context_state["intake"].get("L20")
|
||||
if l20_summary and isinstance(l20_summary, dict):
|
||||
intake_summary = l20_summary.get("summary", "(no context available)")
|
||||
elif isinstance(l20_summary, str):
|
||||
intake_summary = l20_summary
|
||||
|
||||
# 2. Reflection
|
||||
try:
|
||||
reflection = await reflect_notes(intake_summary, identity_block=None)
|
||||
reflection = await reflect_notes(intake_summary, identity_block=identity_block)
|
||||
reflection_notes = reflection.get("notes", [])
|
||||
except Exception:
|
||||
reflection_notes = []
|
||||
@@ -49,31 +61,40 @@ async def run_reason(req: ReasonRequest):
|
||||
# 3. First-pass reasoning draft
|
||||
draft = await reason_check(
|
||||
req.user_prompt,
|
||||
identity_block=None,
|
||||
rag_block=None,
|
||||
reflection_notes=reflection_notes
|
||||
identity_block=identity_block,
|
||||
rag_block=context_state.get("rag", []),
|
||||
reflection_notes=reflection_notes,
|
||||
context=context_state
|
||||
)
|
||||
|
||||
# 4. Refinement
|
||||
result = await refine_answer(
|
||||
draft_output=draft,
|
||||
reflection_notes=reflection_notes,
|
||||
identity_block=None,
|
||||
rag_block=None,
|
||||
identity_block=identity_block,
|
||||
rag_block=context_state.get("rag", []),
|
||||
)
|
||||
final_neutral = result["final_output"]
|
||||
|
||||
|
||||
# 5. Persona layer
|
||||
persona_answer = await speak(final_neutral)
|
||||
|
||||
# 6. Return full bundle
|
||||
# 6. Update session state with assistant's response
|
||||
update_last_assistant_message(req.session_id, persona_answer)
|
||||
|
||||
# 7. Return full bundle
|
||||
return {
|
||||
"draft": draft,
|
||||
"neutral": final_neutral,
|
||||
"persona": persona_answer,
|
||||
"reflection": reflection_notes,
|
||||
"session_id": req.session_id,
|
||||
"context_summary": {
|
||||
"rag_results": len(context_state.get("rag", [])),
|
||||
"minutes_since_last": context_state.get("minutes_since_last_msg"),
|
||||
"message_count": context_state.get("message_count"),
|
||||
"mode": context_state.get("mode"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
134
intake/intake.py
134
intake/intake.py
@@ -97,22 +97,106 @@ def push_to_neomem(summary: str, session_id: str):
|
||||
except Exception as e:
|
||||
print(f"NeoMem push failed: {e}")
|
||||
|
||||
# ─────────────────────────────
|
||||
# Background summarizer
|
||||
# ─────────────────────────────
|
||||
# ───────────────────────────────────────────────
|
||||
# Multilevel Summaries (L1, L5, L10, L20, L30)
|
||||
# ───────────────────────────────────────────────
|
||||
|
||||
# History maps
|
||||
L10_HISTORY = {} # session_id → list of L10 blocks
|
||||
L20_HISTORY = {} # session_id → list of merged overviews
|
||||
|
||||
def summarize_L1(buf):
|
||||
return summarize_simple(buf[-5:])
|
||||
|
||||
def summarize_L5(buf):
|
||||
return summarize_simple(buf[-10:])
|
||||
|
||||
def summarize_L10(buf):
|
||||
# “Reality Check” for last 10 exchanges
|
||||
text = ""
|
||||
for e in buf[-10:]:
|
||||
text += f"User: {e['user_msg']}\nAssistant: {e['assistant_msg']}\n\n"
|
||||
|
||||
prompt = f"""
|
||||
You are Lyra Intake performing a short 'Reality Check'.
|
||||
Summarize the last block of conversation (up to 10 exchanges)
|
||||
in one clear paragraph focusing on tone, intent, and direction.
|
||||
|
||||
{text}
|
||||
|
||||
Reality Check:
|
||||
"""
|
||||
return llm(prompt)
|
||||
|
||||
def summarize_L20(L10_list):
|
||||
joined = "\n\n".join(L10_list)
|
||||
|
||||
prompt = f"""
|
||||
You are Lyra Intake creating a 'Session Overview'.
|
||||
Merge the following Reality Check paragraphs into one short summary
|
||||
capturing progress, themes, and the direction of the conversation.
|
||||
|
||||
{joined}
|
||||
|
||||
Overview:
|
||||
"""
|
||||
return llm(prompt)
|
||||
|
||||
def summarize_L30(L20_list):
|
||||
joined = "\n\n".join(L20_list)
|
||||
|
||||
prompt = f"""
|
||||
You are Lyra Intake generating a 'Continuity Report'.
|
||||
Condense these session overviews into one high-level reflection,
|
||||
noting major themes, persistent goals, and shifts.
|
||||
|
||||
{joined}
|
||||
|
||||
Continuity Report:
|
||||
"""
|
||||
return llm(prompt)
|
||||
|
||||
|
||||
def bg_summarize(session_id: str):
|
||||
"""Runs all summary levels on every exchange."""
|
||||
try:
|
||||
hopper = SESSIONS.get(session_id)
|
||||
if not hopper:
|
||||
return
|
||||
|
||||
buf = list(hopper["buffer"])
|
||||
summary = summarize_simple(buf)
|
||||
push_to_neomem(summary, session_id)
|
||||
if not buf:
|
||||
return
|
||||
|
||||
# Ensure history lists exist
|
||||
L10_HISTORY.setdefault(session_id, [])
|
||||
L20_HISTORY.setdefault(session_id, [])
|
||||
|
||||
# L1, L5 (simple factual)
|
||||
s_L1 = summarize_L1(buf)
|
||||
s_L5 = summarize_L5(buf)
|
||||
|
||||
# L10 (append to history)
|
||||
s_L10 = summarize_L10(buf)
|
||||
L10_HISTORY[session_id].append(s_L10)
|
||||
|
||||
# L20 (merge all L10s)
|
||||
s_L20 = summarize_L20(L10_HISTORY[session_id])
|
||||
L20_HISTORY[session_id].append(s_L20)
|
||||
|
||||
# L30 (merge all L20s)
|
||||
s_L30 = summarize_L30(L20_HISTORY[session_id])
|
||||
|
||||
# Push most important tier(s) to NeoMem
|
||||
push_to_neomem(s_L10, session_id)
|
||||
push_to_neomem(s_L20, session_id)
|
||||
push_to_neomem(s_L30, session_id)
|
||||
|
||||
print(f"🧩 L1/L5/L10/L20/L30 updated for {session_id}")
|
||||
|
||||
print(f"🧩 Summary generated for {session_id}")
|
||||
except Exception as e:
|
||||
print(f"Summarizer error: {e}")
|
||||
print(f"💥 Multilevel summarizer error for {session_id}: {e}")
|
||||
|
||||
|
||||
# ─────────────────────────────
|
||||
# Routes
|
||||
@@ -155,6 +239,42 @@ def get_summary(session_id: str = Query(...)):
|
||||
summary = summarize_simple(list(hopper["buffer"]))
|
||||
return {"summary_text": summary, "session_id": session_id}
|
||||
|
||||
@app.get("/context")
|
||||
def get_context(session_id: str = Query(...)):
|
||||
"""Return full multilevel summary context for Cortex."""
|
||||
if session_id not in SESSIONS:
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": 0,
|
||||
"L1": "",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
"L30": "",
|
||||
"last_updated": None
|
||||
}
|
||||
|
||||
buffer = list(SESSIONS[session_id]["buffer"])
|
||||
|
||||
# Build levels on demand
|
||||
L1 = summarize_L1(buffer)
|
||||
L5 = summarize_L5(buffer)
|
||||
L10 = summarize_L10(buffer)
|
||||
L20 = summarize_L20(L10_HISTORY.get(session_id, []))
|
||||
L30 = summarize_L30(L20_HISTORY.get(session_id, []))
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": len(buffer),
|
||||
"L1": L1,
|
||||
"L5": L5,
|
||||
"L10": L10,
|
||||
"L20": L20,
|
||||
"L30": L30,
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"ok": True, "model": SUMMARY_MODEL, "url": SUMMARY_URL}
|
||||
|
||||
Reference in New Issue
Block a user