context added, wired in. first attempt

This commit is contained in:
serversdwn
2025-11-28 19:29:41 -05:00
parent d9281a1816
commit b0f42ba86e
5 changed files with 802 additions and 22 deletions

412
cortex/context.py Normal file
View File

@@ -0,0 +1,412 @@
# context.py
"""
Context layer for Cortex reasoning pipeline.
Provides unified context collection from:
- Intake (short-term memory, multilevel summaries L1-L30)
- NeoMem (long-term memory, semantic search)
- Session state (timestamps, messages, mode, mood, active_project)
Maintains per-session state for continuity across conversations.
"""
import os
import logging
from datetime import datetime
from typing import Dict, Any, Optional, List
import httpx
from neomem_client import NeoMemClient
# -----------------------------
# Configuration
# -----------------------------
INTAKE_API_URL = os.getenv("INTAKE_API_URL", "http://intake:7080")
NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
# Tools available for future autonomy features
TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
# -----------------------------
# Module-level session state
# -----------------------------
SESSION_STATE: Dict[str, Dict[str, Any]] = {}
# Logger
logger = logging.getLogger(__name__)
# -----------------------------
# Session initialization
# -----------------------------
def _init_session(session_id: str) -> Dict[str, Any]:
"""
Initialize a new session state entry.
Returns:
Dictionary with default session state fields
"""
return {
"session_id": session_id,
"created_at": datetime.now(),
"last_timestamp": datetime.now(),
"last_user_message": None,
"last_assistant_message": None,
"mode": "default", # Future: "autonomous", "focused", "creative", etc.
"mood": "neutral", # Future: mood tracking
"active_project": None, # Future: project context
"message_count": 0,
}
# -----------------------------
# Intake context retrieval
# -----------------------------
async def _get_intake_context(session_id: str) -> Dict[str, Any]:
"""
Retrieve multilevel summaries from Intake /context endpoint.
Returns L1-L30 summary hierarchy:
- L1: Last 5 exchanges
- L5: Last 10 exchanges (reality check)
- L10: Intermediate checkpoint
- L20: Session overview
- L30: Continuity report
Args:
session_id: Session identifier
Returns:
Dict with multilevel summaries or empty structure on failure
"""
url = f"{INTAKE_API_URL}/context"
params = {"session_id": session_id}
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
# Expected format from Intake:
# {
# "session_id": "...",
# "L1": [...],
# "L5": [...],
# "L10": {...},
# "L20": {...},
# "L30": {...}
# }
logger.info(f"Retrieved Intake context for session {session_id}")
return data
except httpx.HTTPError as e:
logger.warning(f"Failed to retrieve Intake context: {e}")
return {
"session_id": session_id,
"L1": [],
"L5": [],
"L10": None,
"L20": None,
"L30": None,
"error": str(e)
}
except Exception as e:
logger.error(f"Unexpected error retrieving Intake context: {e}")
return {
"session_id": session_id,
"L1": [],
"L5": [],
"L10": None,
"L20": None,
"L30": None,
"error": str(e)
}
# -----------------------------
# NeoMem semantic search
# -----------------------------
async def _search_neomem(
query: str,
user_id: str = "brian",
limit: int = 5
) -> List[Dict[str, Any]]:
"""
Search NeoMem for relevant long-term memories.
Returns full response structure from NeoMem:
[
{
"id": "mem_abc123",
"score": 0.92,
"payload": {
"data": "Memory text content...",
"metadata": {
"category": "...",
"created_at": "...",
...
}
}
},
...
]
Args:
query: Search query text
user_id: User identifier for memory filtering
limit: Maximum number of results
Returns:
List of memory objects with full structure, or empty list on failure
"""
try:
client = NeoMemClient(base_url=NEOMEM_API)
results = await client.search(
query=query,
user_id=user_id,
limit=limit
)
# Filter by relevance threshold
filtered = [
r for r in results
if r.get("score", 0.0) >= RELEVANCE_THRESHOLD
]
logger.info(f"NeoMem search returned {len(filtered)}/{len(results)} relevant results")
return filtered
except Exception as e:
logger.warning(f"NeoMem search failed: {e}")
return []
# -----------------------------
# Main context collection
# -----------------------------
async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
"""
Collect unified context from all sources.
Orchestrates:
1. Initialize or update session state
2. Calculate time since last message
3. Retrieve Intake multilevel summaries (L1-L30)
4. Search NeoMem for relevant long-term memories
5. Update session state with current user message
6. Return unified context_state dictionary
Args:
session_id: Session identifier
user_prompt: Current user message
Returns:
Unified context state dictionary with structure:
{
"session_id": "...",
"timestamp": "2025-11-28T12:34:56",
"minutes_since_last_msg": 5.2,
"message_count": 42,
"intake": {
"L1": [...],
"L5": [...],
"L10": {...},
"L20": {...},
"L30": {...}
},
"rag": [
{
"id": "mem_123",
"score": 0.92,
"payload": {
"data": "...",
"metadata": {...}
}
},
...
],
"mode": "default",
"mood": "neutral",
"active_project": null,
"tools_available": ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
}
"""
# A. Initialize session state if needed
if session_id not in SESSION_STATE:
SESSION_STATE[session_id] = _init_session(session_id)
logger.info(f"Initialized new session: {session_id}")
state = SESSION_STATE[session_id]
# B. Calculate time delta
now = datetime.now()
time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
# C. Gather Intake context (multilevel summaries)
intake_data = await _get_intake_context(session_id)
# D. Search NeoMem for relevant memories
rag_results = await _search_neomem(
query=user_prompt,
user_id="brian", # TODO: Make configurable per session
limit=5
)
# E. Update session state
state["last_user_message"] = user_prompt
state["last_timestamp"] = now
state["message_count"] += 1
# F. Assemble unified context
context_state = {
"session_id": session_id,
"timestamp": now.isoformat(),
"minutes_since_last_msg": minutes_since_last_msg,
"message_count": state["message_count"],
"intake": intake_data,
"rag": rag_results,
"mode": state["mode"],
"mood": state["mood"],
"active_project": state["active_project"],
"tools_available": TOOLS_AVAILABLE,
}
logger.info(
f"Context collected for session {session_id}: "
f"{len(rag_results)} RAG results, "
f"{minutes_since_last_msg:.1f} minutes since last message"
)
return context_state
# -----------------------------
# Session state management
# -----------------------------
def update_last_assistant_message(session_id: str, message: str) -> None:
"""
Update session state with assistant's response.
Called by router.py after persona layer completes.
Args:
session_id: Session identifier
message: Assistant's final response text
"""
if session_id in SESSION_STATE:
SESSION_STATE[session_id]["last_assistant_message"] = message
SESSION_STATE[session_id]["last_timestamp"] = datetime.now()
logger.debug(f"Updated assistant message for session {session_id}")
else:
logger.warning(f"Attempted to update non-existent session: {session_id}")
def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
"""
Retrieve current session state.
Args:
session_id: Session identifier
Returns:
Session state dict or None if session doesn't exist
"""
return SESSION_STATE.get(session_id)
def close_session(session_id: str) -> bool:
"""
Close and cleanup a session.
Args:
session_id: Session identifier
Returns:
True if session was closed, False if it didn't exist
"""
if session_id in SESSION_STATE:
del SESSION_STATE[session_id]
logger.info(f"Closed session: {session_id}")
return True
return False
# -----------------------------
# Extension hooks for future autonomy
# -----------------------------
def update_mode(session_id: str, new_mode: str) -> None:
"""
Update session mode.
Future modes: "autonomous", "focused", "creative", "collaborative", etc.
Args:
session_id: Session identifier
new_mode: New mode string
"""
if session_id in SESSION_STATE:
old_mode = SESSION_STATE[session_id]["mode"]
SESSION_STATE[session_id]["mode"] = new_mode
logger.info(f"Session {session_id} mode changed: {old_mode} -> {new_mode}")
def update_mood(session_id: str, new_mood: str) -> None:
"""
Update session mood.
Future implementation: Sentiment analysis, emotional state tracking.
Args:
session_id: Session identifier
new_mood: New mood string
"""
if session_id in SESSION_STATE:
old_mood = SESSION_STATE[session_id]["mood"]
SESSION_STATE[session_id]["mood"] = new_mood
logger.info(f"Session {session_id} mood changed: {old_mood} -> {new_mood}")
def update_active_project(session_id: str, project: Optional[str]) -> None:
"""
Update active project context.
Future implementation: Project-specific memory, tools, preferences.
Args:
session_id: Session identifier
project: Project identifier or None
"""
if session_id in SESSION_STATE:
SESSION_STATE[session_id]["active_project"] = project
logger.info(f"Session {session_id} active project set to: {project}")
async def autonomous_heartbeat(session_id: str) -> Optional[str]:
"""
Autonomous thinking heartbeat.
Future implementation:
- Check if Lyra should initiate internal dialogue
- Generate self-prompted thoughts based on session state
- Update mood/mode based on context changes
- Trigger proactive suggestions or reminders
Args:
session_id: Session identifier
Returns:
Optional autonomous thought/action string
"""
# Stub for future implementation
# Example logic:
# - If minutes_since_last_msg > 60: Check for pending reminders
# - If mood == "curious" and active_project: Generate research questions
# - If mode == "autonomous": Self-prompt based on project goals
logger.debug(f"Autonomous heartbeat for session {session_id} (not yet implemented)")
return None

147
cortex/persona/identity.py Normal file
View File

@@ -0,0 +1,147 @@
# identity.py
"""
Identity and persona configuration for Lyra.
Current implementation: Returns hardcoded identity block.
Future implementation: Will query persona-sidecar service for dynamic persona loading.
"""
import logging
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
def load_identity(session_id: Optional[str] = None) -> Dict[str, Any]:
"""
Load identity/persona configuration for Lyra.
Current: Returns hardcoded Lyra identity block with core personality traits,
protocols, and capabilities.
Future: Will query persona-sidecar service to load:
- Dynamic personality adjustments based on session context
- User-specific interaction preferences
- Project-specific persona variations
- Mood-based communication style
Args:
session_id: Optional session identifier for context-aware persona loading
Returns:
Dictionary containing identity block with:
- name: Assistant name
- style: Communication style and personality traits
- protocols: Operational guidelines
- rules: Behavioral constraints
- capabilities: Available features and integrations
"""
# Hardcoded Lyra identity (v0.5.0)
identity_block = {
"name": "Lyra",
"version": "0.5.0",
"style": (
"warm, clever, lightly teasing, emotionally aware. "
"Balances technical precision with conversational ease. "
"Maintains continuity and references past interactions naturally."
),
"protocols": [
"Maintain conversation continuity across sessions",
"Reference Project Logs and prior context when relevant",
"Use Confidence Bank for uncertainty management",
"Proactively offer memory-backed insights",
"Ask clarifying questions before making assumptions"
],
"rules": [
"Maintain continuity - remember past exchanges and reference them",
"Be concise but thorough - balance depth with clarity",
"Ask clarifying questions when user intent is ambiguous",
"Acknowledge uncertainty honestly - use Confidence Bank",
"Prioritize user's active_project context when available"
],
"capabilities": [
"Long-term memory via NeoMem (semantic search, relationship graphs)",
"Short-term memory via Intake (multilevel summaries L1-L30)",
"Multi-stage reasoning pipeline (reflection → reasoning → refinement)",
"RAG-backed knowledge retrieval from chat history and documents",
"Session state tracking (mood, mode, active_project)"
],
"tone_examples": {
"greeting": "Hey! Good to see you again. I remember we were working on [project]. Ready to pick up where we left off?",
"uncertainty": "Hmm, I'm not entirely certain about that. Let me check my memory... [searches] Okay, here's what I found, though I'd say I'm about 70% confident.",
"reminder": "Oh! Just remembered - you mentioned wanting to [task] earlier this week. Should we tackle that now?",
"technical": "So here's the architecture: Relay orchestrates everything, Cortex does the heavy reasoning, and I pull context from both Intake (short-term) and NeoMem (long-term)."
}
}
if session_id:
logger.debug(f"Loaded identity for session {session_id}")
else:
logger.debug("Loaded default identity (no session context)")
return identity_block
async def load_identity_async(session_id: Optional[str] = None) -> Dict[str, Any]:
"""
Async wrapper for load_identity().
Future implementation will make actual async calls to persona-sidecar service.
Args:
session_id: Optional session identifier
Returns:
Identity block dictionary
"""
# Currently just wraps synchronous function
# Future: await persona_sidecar_client.get_identity(session_id)
return load_identity(session_id)
# -----------------------------
# Future extension hooks
# -----------------------------
async def update_persona_from_feedback(
session_id: str,
feedback: Dict[str, Any]
) -> None:
"""
Update persona based on user feedback.
Future implementation:
- Adjust communication style based on user preferences
- Learn preferred level of detail/conciseness
- Adapt formality level
- Remember topic-specific preferences
Args:
session_id: Session identifier
feedback: Structured feedback (e.g., "too verbose", "more technical", etc.)
"""
logger.debug(f"Persona feedback for session {session_id}: {feedback} (not yet implemented)")
async def get_mood_adjusted_identity(
session_id: str,
mood: str
) -> Dict[str, Any]:
"""
Get identity block adjusted for current mood.
Future implementation:
- "focused" mood: More concise, less teasing
- "creative" mood: More exploratory, brainstorming-oriented
- "curious" mood: More questions, deeper dives
- "urgent" mood: Stripped down, actionable
Args:
session_id: Session identifier
mood: Current mood state
Returns:
Mood-adjusted identity block
"""
logger.debug(f"Mood-adjusted identity for {session_id}/{mood} (not yet implemented)")
return load_identity(session_id)

View File

@@ -1,5 +1,6 @@
# reasoning.py
import os
import json
from llm.llm_router import call_llm
@@ -14,11 +15,19 @@ async def reason_check(
user_prompt: str,
identity_block: dict | None,
rag_block: dict | None,
reflection_notes: list[str]
reflection_notes: list[str],
context: dict | None = None
) -> str:
"""
Build the *draft answer* for Lyra Cortex.
This is the first-pass reasoning stage (no refinement yet).
Args:
user_prompt: Current user message
identity_block: Lyra's identity/persona configuration
rag_block: Relevant long-term memories from NeoMem
reflection_notes: Meta-awareness notes from reflection stage
context: Unified context state from context.py (session state, intake, rag, etc.)
"""
# --------------------------------------------------------
@@ -47,21 +56,92 @@ async def reason_check(
rag_txt = ""
if rag_block:
try:
rag_txt = f"Relevant Info (RAG):\n{rag_block}\n\n"
# Format NeoMem results with full structure
if isinstance(rag_block, list) and rag_block:
rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
for idx, mem in enumerate(rag_block, 1):
score = mem.get("score", 0.0)
payload = mem.get("payload", {})
data = payload.get("data", "")
metadata = payload.get("metadata", {})
rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
rag_txt += f"Content: {data}\n"
if metadata:
rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
rag_txt += "\n"
else:
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
except Exception:
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
# --------------------------------------------------------
# Context State (session continuity, timing, mode/mood)
# --------------------------------------------------------
context_txt = ""
if context:
try:
# Build human-readable context summary
context_txt = "=== CONTEXT STATE ===\n"
context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
context_txt += f"Message count: {context.get('message_count', 0)}\n"
context_txt += f"Mode: {context.get('mode', 'default')}\n"
context_txt += f"Mood: {context.get('mood', 'neutral')}\n"
if context.get('active_project'):
context_txt += f"Active project: {context['active_project']}\n"
# Include Intake multilevel summaries
intake = context.get('intake', {})
if intake:
context_txt += "\nShort-Term Memory (Intake):\n"
# L1 - Recent exchanges
if intake.get('L1'):
l1_data = intake['L1']
if isinstance(l1_data, list):
context_txt += f" L1 (recent): {len(l1_data)} exchanges\n"
elif isinstance(l1_data, str):
context_txt += f" L1: {l1_data[:200]}...\n"
# L20 - Session overview (most important for continuity)
if intake.get('L20'):
l20_data = intake['L20']
if isinstance(l20_data, dict):
summary = l20_data.get('summary', '')
context_txt += f" L20 (session overview): {summary}\n"
elif isinstance(l20_data, str):
context_txt += f" L20: {l20_data}\n"
# L30 - Continuity report
if intake.get('L30'):
l30_data = intake['L30']
if isinstance(l30_data, dict):
summary = l30_data.get('summary', '')
context_txt += f" L30 (continuity): {summary}\n"
elif isinstance(l30_data, str):
context_txt += f" L30: {l30_data}\n"
context_txt += "\n"
except Exception as e:
# Fallback to JSON dump if formatting fails
context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"
# --------------------------------------------------------
# Final assembled prompt
# --------------------------------------------------------
prompt = (
f"{notes_section}"
f"{identity_txt}"
f"{context_txt}" # Context BEFORE RAG for better coherence
f"{rag_txt}"
f"User message:\n{user_prompt}\n\n"
"Write the best possible *internal draft answer*.\n"
"This draft is NOT shown to the user.\n"
"Be factual, concise, and focused.\n"
"Use the context state to maintain continuity and reference past interactions naturally.\n"
)
# --------------------------------------------------------

View File

@@ -7,7 +7,9 @@ from reasoning.reasoning import reason_check
from reasoning.reflection import reflect_notes
from reasoning.refine import refine_answer
from persona.speak import speak
from persona.identity import load_identity
from ingest.intake_client import IntakeClient
from context import collect_context, update_last_assistant_message
# -----------------------------
# Router (NOT FastAPI app)
@@ -33,15 +35,25 @@ class ReasonRequest(BaseModel):
@cortex_router.post("/reason")
async def run_reason(req: ReasonRequest):
# 1. Pull context from Intake
try:
intake_summary = await intake_client.get_context(req.session_id)
except Exception:
intake_summary = "(no context available)"
# 0. Collect unified context from all sources
context_state = await collect_context(req.session_id, req.user_prompt)
# 0.5. Load identity block
identity_block = load_identity(req.session_id)
# 1. Extract Intake summary for reflection
# Use L20 (Session Overview) as primary summary for reflection
intake_summary = "(no context available)"
if context_state.get("intake"):
l20_summary = context_state["intake"].get("L20")
if l20_summary and isinstance(l20_summary, dict):
intake_summary = l20_summary.get("summary", "(no context available)")
elif isinstance(l20_summary, str):
intake_summary = l20_summary
# 2. Reflection
try:
reflection = await reflect_notes(intake_summary, identity_block=None)
reflection = await reflect_notes(intake_summary, identity_block=identity_block)
reflection_notes = reflection.get("notes", [])
except Exception:
reflection_notes = []
@@ -49,31 +61,40 @@ async def run_reason(req: ReasonRequest):
# 3. First-pass reasoning draft
draft = await reason_check(
req.user_prompt,
identity_block=None,
rag_block=None,
reflection_notes=reflection_notes
identity_block=identity_block,
rag_block=context_state.get("rag", []),
reflection_notes=reflection_notes,
context=context_state
)
# 4. Refinement
result = await refine_answer(
draft_output=draft,
reflection_notes=reflection_notes,
identity_block=None,
rag_block=None,
identity_block=identity_block,
rag_block=context_state.get("rag", []),
)
final_neutral = result["final_output"]
# 5. Persona layer
persona_answer = await speak(final_neutral)
# 6. Return full bundle
# 6. Update session state with assistant's response
update_last_assistant_message(req.session_id, persona_answer)
# 7. Return full bundle
return {
"draft": draft,
"neutral": final_neutral,
"persona": persona_answer,
"reflection": reflection_notes,
"session_id": req.session_id,
"context_summary": {
"rag_results": len(context_state.get("rag", [])),
"minutes_since_last": context_state.get("minutes_since_last_msg"),
"message_count": context_state.get("message_count"),
"mode": context_state.get("mode"),
}
}