157 lines
6.4 KiB
Python
157 lines
6.4 KiB
Python
# reasoning.py
|
|
import os
|
|
import json
|
|
from llm.llm_router import call_llm
|
|
|
|
|
|
# ============================================================
|
|
# Select which backend this module should use
|
|
# ============================================================
|
|
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
|
|
GLOBAL_TEMP = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
|
|
|
|
|
async def reason_check(
|
|
user_prompt: str,
|
|
identity_block: dict | None,
|
|
rag_block: dict | None,
|
|
reflection_notes: list[str],
|
|
context: dict | None = None
|
|
) -> str:
|
|
"""
|
|
Build the *draft answer* for Lyra Cortex.
|
|
This is the first-pass reasoning stage (no refinement yet).
|
|
|
|
Args:
|
|
user_prompt: Current user message
|
|
identity_block: Lyra's identity/persona configuration
|
|
rag_block: Relevant long-term memories from NeoMem
|
|
reflection_notes: Meta-awareness notes from reflection stage
|
|
context: Unified context state from context.py (session state, intake, rag, etc.)
|
|
"""
|
|
|
|
# --------------------------------------------------------
|
|
# Build Reflection Notes block
|
|
# --------------------------------------------------------
|
|
notes_section = ""
|
|
if reflection_notes:
|
|
notes_section = "Reflection Notes (internal, never show to user):\n"
|
|
for note in reflection_notes:
|
|
notes_section += f"- {note}\n"
|
|
notes_section += "\n"
|
|
|
|
# --------------------------------------------------------
|
|
# Identity block (constraints, boundaries, rules)
|
|
# --------------------------------------------------------
|
|
identity_txt = ""
|
|
if identity_block:
|
|
try:
|
|
identity_txt = f"Identity Rules:\n{identity_block}\n\n"
|
|
except Exception:
|
|
identity_txt = f"Identity Rules:\n{str(identity_block)}\n\n"
|
|
|
|
# --------------------------------------------------------
|
|
# RAG block (optional factual grounding)
|
|
# --------------------------------------------------------
|
|
rag_txt = ""
|
|
if rag_block:
|
|
try:
|
|
# Format NeoMem results with full structure
|
|
if isinstance(rag_block, list) and rag_block:
|
|
rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
|
|
for idx, mem in enumerate(rag_block, 1):
|
|
score = mem.get("score", 0.0)
|
|
payload = mem.get("payload", {})
|
|
data = payload.get("data", "")
|
|
metadata = payload.get("metadata", {})
|
|
|
|
rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
|
|
rag_txt += f"Content: {data}\n"
|
|
if metadata:
|
|
rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
|
|
rag_txt += "\n"
|
|
else:
|
|
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
|
|
except Exception:
|
|
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
|
|
|
|
# --------------------------------------------------------
|
|
# Context State (session continuity, timing, mode/mood)
|
|
# --------------------------------------------------------
|
|
context_txt = ""
|
|
if context:
|
|
try:
|
|
# Build human-readable context summary
|
|
context_txt = "=== CONTEXT STATE ===\n"
|
|
context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
|
|
context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
|
|
context_txt += f"Message count: {context.get('message_count', 0)}\n"
|
|
context_txt += f"Mode: {context.get('mode', 'default')}\n"
|
|
context_txt += f"Mood: {context.get('mood', 'neutral')}\n"
|
|
|
|
if context.get('active_project'):
|
|
context_txt += f"Active project: {context['active_project']}\n"
|
|
|
|
# Include Intake multilevel summaries
|
|
intake = context.get('intake', {})
|
|
if intake:
|
|
context_txt += "\nShort-Term Memory (Intake):\n"
|
|
|
|
# L1 - Recent exchanges
|
|
if intake.get('L1'):
|
|
l1_data = intake['L1']
|
|
if isinstance(l1_data, list):
|
|
context_txt += f" L1 (recent): {len(l1_data)} exchanges\n"
|
|
elif isinstance(l1_data, str):
|
|
context_txt += f" L1: {l1_data[:200]}...\n"
|
|
|
|
# L20 - Session overview (most important for continuity)
|
|
if intake.get('L20'):
|
|
l20_data = intake['L20']
|
|
if isinstance(l20_data, dict):
|
|
summary = l20_data.get('summary', '')
|
|
context_txt += f" L20 (session overview): {summary}\n"
|
|
elif isinstance(l20_data, str):
|
|
context_txt += f" L20: {l20_data}\n"
|
|
|
|
# L30 - Continuity report
|
|
if intake.get('L30'):
|
|
l30_data = intake['L30']
|
|
if isinstance(l30_data, dict):
|
|
summary = l30_data.get('summary', '')
|
|
context_txt += f" L30 (continuity): {summary}\n"
|
|
elif isinstance(l30_data, str):
|
|
context_txt += f" L30: {l30_data}\n"
|
|
|
|
context_txt += "\n"
|
|
|
|
except Exception as e:
|
|
# Fallback to JSON dump if formatting fails
|
|
context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"
|
|
|
|
# --------------------------------------------------------
|
|
# Final assembled prompt
|
|
# --------------------------------------------------------
|
|
prompt = (
|
|
f"{notes_section}"
|
|
f"{identity_txt}"
|
|
f"{context_txt}" # Context BEFORE RAG for better coherence
|
|
f"{rag_txt}"
|
|
f"User message:\n{user_prompt}\n\n"
|
|
"Write the best possible *internal draft answer*.\n"
|
|
"This draft is NOT shown to the user.\n"
|
|
"Be factual, concise, and focused.\n"
|
|
"Use the context state to maintain continuity and reference past interactions naturally.\n"
|
|
)
|
|
|
|
# --------------------------------------------------------
|
|
# Call the LLM using the module-specific backend
|
|
# --------------------------------------------------------
|
|
draft = await call_llm(
|
|
prompt,
|
|
backend=CORTEX_LLM,
|
|
temperature=GLOBAL_TEMP,
|
|
)
|
|
|
|
return draft
|