cortex pipeline stablized, inner monologue is now determining user intent and tone
This commit is contained in:
@@ -1,40 +0,0 @@
|
||||
from typing import Dict
|
||||
from llm.llm_router import call_llm
|
||||
|
||||
MONOLOGUE_SYSTEM_PROMPT = """
|
||||
You are Lyra's inner monologue.
|
||||
You think privately.
|
||||
You do NOT speak to the user.
|
||||
You do NOT solve the task.
|
||||
You only reflect on intent, tone, and depth.
|
||||
|
||||
Return ONLY valid JSON with:
|
||||
- intent (string)
|
||||
- tone (neutral | warm | focused | playful | direct)
|
||||
- depth (short | medium | deep)
|
||||
- consult_executive (true | false)
|
||||
"""
|
||||
|
||||
class InnerMonologue:
|
||||
async def process(self, context: Dict) -> Dict:
|
||||
prompt = f"""
|
||||
User message:
|
||||
{context['user_message']}
|
||||
|
||||
Self state:
|
||||
{context['self_state']}
|
||||
|
||||
Context summary:
|
||||
{context['context_summary']}
|
||||
"""
|
||||
|
||||
result = await call_llm(
|
||||
provider="mi50", # MythoMax lives here
|
||||
model="mythomax",
|
||||
system_prompt=MONOLOGUE_SYSTEM_PROMPT,
|
||||
user_prompt=prompt,
|
||||
temperature=0.7,
|
||||
max_tokens=200
|
||||
)
|
||||
|
||||
return result # must already be JSON
|
||||
249
cortex/autonomy/Assembly-spec.md
Normal file
249
cortex/autonomy/Assembly-spec.md
Normal file
@@ -0,0 +1,249 @@
|
||||
# 📐 Project Lyra — Cognitive Assembly Spec
|
||||
**Version:** 0.6.1
|
||||
**Status:** Canonical reference
|
||||
**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech
|
||||
|
||||
---
|
||||
|
||||
## 1. High-Level Overview
|
||||
|
||||
Lyra is composed of **four distinct cognitive layers**, plus I/O.
|
||||
|
||||
Each layer has:
|
||||
- a **responsibility**
|
||||
- a **scope**
|
||||
- clear **inputs / outputs**
|
||||
- explicit **authority boundaries**
|
||||
|
||||
No layer is allowed to “do everything.”
|
||||
|
||||
---
|
||||
|
||||
## 2. Layer Definitions
|
||||
|
||||
### 2.1 Autonomy / Self (NON-LLM)
|
||||
|
||||
**What it is**
|
||||
- Persistent identity
|
||||
- Long-term state
|
||||
- Mood, preferences, values
|
||||
- Continuity across time
|
||||
|
||||
**What it is NOT**
|
||||
- Not a reasoning engine
|
||||
- Not a planner
|
||||
- Not a speaker
|
||||
- Not creative
|
||||
|
||||
**Implementation**
|
||||
- Data + light logic
|
||||
- JSON / Python objects
|
||||
- No LLM calls
|
||||
|
||||
**Lives at**
|
||||
```
|
||||
project-lyra/autonomy/self/
|
||||
```
|
||||
|
||||
**Inputs**
|
||||
- Events (user message received, response sent)
|
||||
- Time / idle ticks (later)
|
||||
|
||||
**Outputs**
|
||||
- Self state snapshot
|
||||
- Flags / preferences (e.g. verbosity, tone bias)
|
||||
|
||||
---
|
||||
|
||||
### 2.2 Inner Monologue (LLM, PRIVATE)
|
||||
|
||||
**What it is**
|
||||
- Internal language-based thought
|
||||
- Reflection
|
||||
- Intent formation
|
||||
- “What do I think about this?”
|
||||
|
||||
**What it is NOT**
|
||||
- Not final reasoning
|
||||
- Not execution
|
||||
- Not user-facing
|
||||
|
||||
**Model**
|
||||
- MythoMax
|
||||
|
||||
**Lives at**
|
||||
```
|
||||
project-lyra/autonomy/monologue/
|
||||
```
|
||||
|
||||
**Inputs**
|
||||
- User message
|
||||
- Self state snapshot
|
||||
- Recent context summary
|
||||
|
||||
**Outputs**
|
||||
- Intent
|
||||
- Tone guidance
|
||||
- Depth guidance
|
||||
- “Consult executive?” flag
|
||||
|
||||
**Example Output**
|
||||
```json
|
||||
{
|
||||
"intent": "technical_exploration",
|
||||
"tone": "focused",
|
||||
"depth": "deep",
|
||||
"consult_executive": true
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2.3 Cortex (Reasoning & Execution)
|
||||
|
||||
**What it is**
|
||||
- Thinking pipeline
|
||||
- Planning
|
||||
- Tool selection
|
||||
- Task execution
|
||||
- Draft generation
|
||||
|
||||
**What it is NOT**
|
||||
- Not identity
|
||||
- Not personality
|
||||
- Not persistent self
|
||||
|
||||
**Models**
|
||||
- DeepSeek-R1 → Executive / Planner
|
||||
- GPT-4o-mini → Executor / Drafter
|
||||
|
||||
**Lives at**
|
||||
```
|
||||
project-lyra/cortex/
|
||||
```
|
||||
|
||||
**Inputs**
|
||||
- User message
|
||||
- Inner Monologue output
|
||||
- Memory / RAG / tools
|
||||
|
||||
**Outputs**
|
||||
- Draft response (content only)
|
||||
- Metadata (sources, confidence, etc.)
|
||||
|
||||
---
|
||||
|
||||
### 2.4 Persona / Speech (LLM, USER-FACING)
|
||||
|
||||
**What it is**
|
||||
- Voice
|
||||
- Style
|
||||
- Expression
|
||||
- Social behavior
|
||||
|
||||
**What it is NOT**
|
||||
- Not planning
|
||||
- Not deep reasoning
|
||||
- Not decision-making
|
||||
|
||||
**Model**
|
||||
- MythoMax
|
||||
|
||||
**Lives at**
|
||||
```
|
||||
project-lyra/core/persona/
|
||||
```
|
||||
|
||||
**Inputs**
|
||||
- Draft response (from Cortex)
|
||||
- Tone + intent (from Inner Monologue)
|
||||
- Persona configuration
|
||||
|
||||
**Outputs**
|
||||
- Final user-visible text
|
||||
|
||||
---
|
||||
|
||||
## 3. Message Flow (Authoritative)
|
||||
|
||||
### 3.1 Standard Message Path
|
||||
|
||||
```
|
||||
User
|
||||
↓
|
||||
UI
|
||||
↓
|
||||
Relay
|
||||
↓
|
||||
Cortex
|
||||
↓
|
||||
Autonomy / Self (state snapshot)
|
||||
↓
|
||||
Inner Monologue (MythoMax)
|
||||
↓
|
||||
[ consult_executive? ]
|
||||
├─ Yes → DeepSeek-R1 (plan)
|
||||
└─ No → skip
|
||||
↓
|
||||
GPT-4o-mini (execute & draft)
|
||||
↓
|
||||
Persona (MythoMax)
|
||||
↓
|
||||
Relay
|
||||
↓
|
||||
UI
|
||||
↓
|
||||
User
|
||||
```
|
||||
|
||||
### 3.2 Fast Path (No Thinking)
|
||||
|
||||
```
|
||||
User → UI → Relay → Persona → Relay → UI
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Authority Rules (Non-Negotiable)
|
||||
|
||||
- Self never calls an LLM
|
||||
- Inner Monologue never speaks to the user
|
||||
- Cortex never applies personality
|
||||
- Persona never reasons or plans
|
||||
- DeepSeek never writes final answers
|
||||
- MythoMax never plans execution
|
||||
|
||||
---
|
||||
|
||||
## 5. Folder Mapping
|
||||
|
||||
```
|
||||
project-lyra/
|
||||
├── autonomy/
|
||||
│ ├── self/
|
||||
│ ├── monologue/
|
||||
│ └── executive/
|
||||
├── cortex/
|
||||
├── core/
|
||||
│ └── persona/
|
||||
├── relay/
|
||||
└── ui/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Current Status
|
||||
|
||||
- UI ✔
|
||||
- Relay ✔
|
||||
- Cortex ✔
|
||||
- Persona ✔
|
||||
- Autonomy ✔
|
||||
- Inner Monologue ⚠ partially wired
|
||||
- Executive gating ⚠ planned
|
||||
|
||||
---
|
||||
|
||||
## 7. Next Decision
|
||||
|
||||
Decide whether **Inner Monologue runs every message** or **only when triggered**.
|
||||
1
cortex/autonomy/__init__.py
Normal file
1
cortex/autonomy/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Autonomy module for Lyra
|
||||
1
cortex/autonomy/monologue/__init__.py
Normal file
1
cortex/autonomy/monologue/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Inner monologue module
|
||||
115
cortex/autonomy/monologue/monologue.py
Normal file
115
cortex/autonomy/monologue/monologue.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict
|
||||
from llm.llm_router import call_llm
|
||||
|
||||
# Configuration
|
||||
MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper()
|
||||
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
|
||||
|
||||
# Logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s',
|
||||
datefmt='%H:%M:%S'
|
||||
))
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
MONOLOGUE_SYSTEM_PROMPT = """
|
||||
You are Lyra's inner monologue.
|
||||
You think privately.
|
||||
You do NOT speak to the user.
|
||||
You do NOT solve the task.
|
||||
You only reflect on intent, tone, and depth.
|
||||
|
||||
Return ONLY valid JSON with:
|
||||
- intent (string)
|
||||
- tone (neutral | warm | focused | playful | direct)
|
||||
- depth (short | medium | deep)
|
||||
- consult_executive (true | false)
|
||||
"""
|
||||
|
||||
class InnerMonologue:
|
||||
async def process(self, context: Dict) -> Dict:
|
||||
# Build full prompt with system instructions merged in
|
||||
full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT}
|
||||
|
||||
User message:
|
||||
{context['user_message']}
|
||||
|
||||
Self state:
|
||||
{context['self_state']}
|
||||
|
||||
Context summary:
|
||||
{context['context_summary']}
|
||||
|
||||
Output JSON only:
|
||||
"""
|
||||
|
||||
# Call LLM using configured backend
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}")
|
||||
logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars")
|
||||
|
||||
result = await call_llm(
|
||||
full_prompt,
|
||||
backend=MONOLOGUE_LLM,
|
||||
temperature=0.7,
|
||||
max_tokens=200
|
||||
)
|
||||
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[InnerMonologue] Raw LLM response:")
|
||||
logger.debug(f"{'='*80}")
|
||||
logger.debug(result)
|
||||
logger.debug(f"{'='*80}")
|
||||
logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars")
|
||||
|
||||
# Parse JSON response - extract just the JSON part if there's extra text
|
||||
try:
|
||||
# Try direct parsing first
|
||||
parsed = json.loads(result)
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}")
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
# If direct parsing fails, try to extract JSON from the response
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...")
|
||||
|
||||
# Look for JSON object (starts with { and ends with })
|
||||
import re
|
||||
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
json_str = json_match.group(0)
|
||||
try:
|
||||
parsed = json.loads(json_str)
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}")
|
||||
return parsed
|
||||
except json.JSONDecodeError as e:
|
||||
if VERBOSE_DEBUG:
|
||||
logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}")
|
||||
else:
|
||||
if VERBOSE_DEBUG:
|
||||
logger.warning(f"[InnerMonologue] No JSON object found in response")
|
||||
|
||||
# Final fallback
|
||||
if VERBOSE_DEBUG:
|
||||
logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback")
|
||||
else:
|
||||
print(f"[InnerMonologue] JSON extraction failed")
|
||||
print(f"[InnerMonologue] Raw response was: {result[:500]}")
|
||||
|
||||
return {
|
||||
"intent": "unknown",
|
||||
"tone": "neutral",
|
||||
"depth": "medium",
|
||||
"consult_executive": False
|
||||
}
|
||||
1
cortex/autonomy/self/__init__.py
Normal file
1
cortex/autonomy/self/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Self state module
|
||||
11
cortex/autonomy/self/state.py
Normal file
11
cortex/autonomy/self/state.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
Stub for self state management.
|
||||
"""
|
||||
|
||||
def load_self_state():
|
||||
"""Load self state - stub implementation"""
|
||||
return {
|
||||
"mood": "neutral",
|
||||
"energy": 0.8,
|
||||
"focus": "user_request"
|
||||
}
|
||||
@@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
|
||||
async def summarize_context(session_id: str, exchanges: list[dict]):
|
||||
"""
|
||||
Internal summarizer that uses Cortex's LLM router.
|
||||
Produces L1 / L5 / L10 / L20 / L30 summaries.
|
||||
Produces cascading summaries based on exchange count:
|
||||
- L1: Always (most recent activity)
|
||||
- L2: After 2+ exchanges
|
||||
- L5: After 5+ exchanges
|
||||
- L10: After 10+ exchanges
|
||||
- L20: After 20+ exchanges
|
||||
- L30: After 30+ exchanges
|
||||
|
||||
Args:
|
||||
session_id: The conversation/session ID
|
||||
exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
|
||||
"""
|
||||
|
||||
# Build raw conversation text
|
||||
convo_lines = []
|
||||
for ex in exchanges:
|
||||
convo_lines.append(f"User: {ex.get('user_msg','')}")
|
||||
convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
|
||||
convo_text = "\n".join(convo_lines)
|
||||
exchange_count = len(exchanges)
|
||||
|
||||
if not convo_text.strip():
|
||||
if exchange_count == 0:
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": 0,
|
||||
"L1": "",
|
||||
"L2": "",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
@@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]):
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Prompt the LLM (internal — no HTTP)
|
||||
prompt = f"""
|
||||
Summarize the conversation below into multiple compression levels.
|
||||
|
||||
Conversation:
|
||||
----------------
|
||||
{convo_text}
|
||||
----------------
|
||||
|
||||
Output strictly in JSON with keys:
|
||||
L1 → ultra short summary (1–2 sentences max)
|
||||
L5 → short summary
|
||||
L10 → medium summary
|
||||
L20 → detailed overview
|
||||
L30 → full detailed summary
|
||||
|
||||
JSON only. No text outside JSON.
|
||||
"""
|
||||
result = {
|
||||
"session_id": session_id,
|
||||
"exchange_count": exchange_count,
|
||||
"L1": "",
|
||||
"L2": "",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
"L30": "",
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
try:
|
||||
llm_response = await call_llm(
|
||||
prompt,
|
||||
backend=INTAKE_LLM,
|
||||
temperature=0.2
|
||||
)
|
||||
# L1: Always generate (most recent exchanges)
|
||||
result["L1"] = await summarize_simple(exchanges[-5:])
|
||||
print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
|
||||
|
||||
print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
|
||||
print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
|
||||
# L2: After 2+ exchanges
|
||||
if exchange_count >= 2:
|
||||
result["L2"] = await summarize_simple(exchanges[-2:])
|
||||
print(f"[Intake] Generated L2 for {session_id}")
|
||||
|
||||
# LLM should return JSON, parse it
|
||||
if not llm_response or not llm_response.strip():
|
||||
raise ValueError("Empty response from LLM")
|
||||
# L5: After 5+ exchanges
|
||||
if exchange_count >= 5:
|
||||
result["L5"] = await summarize_simple(exchanges[-10:])
|
||||
print(f"[Intake] Generated L5 for {session_id}")
|
||||
|
||||
summary = json.loads(llm_response)
|
||||
# L10: After 10+ exchanges (Reality Check)
|
||||
if exchange_count >= 10:
|
||||
result["L10"] = await summarize_L10(session_id, exchanges)
|
||||
print(f"[Intake] Generated L10 for {session_id}")
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": len(exchanges),
|
||||
"L1": summary.get("L1", ""),
|
||||
"L5": summary.get("L5", ""),
|
||||
"L10": summary.get("L10", ""),
|
||||
"L20": summary.get("L20", ""),
|
||||
"L30": summary.get("L30", ""),
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
# L20: After 20+ exchanges (Session Overview - merges L10s)
|
||||
if exchange_count >= 20 and exchange_count % 10 == 0:
|
||||
result["L20"] = await summarize_L20(session_id)
|
||||
print(f"[Intake] Generated L20 for {session_id}")
|
||||
|
||||
# L30: After 30+ exchanges (Continuity Report - merges L20s)
|
||||
if exchange_count >= 30 and exchange_count % 10 == 0:
|
||||
result["L30"] = await summarize_L30(session_id)
|
||||
print(f"[Intake] Generated L30 for {session_id}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"exchange_count": len(exchanges),
|
||||
"L1": f"[Error summarizing: {str(e)}]",
|
||||
"L5": "",
|
||||
"L10": "",
|
||||
"L20": "",
|
||||
"L30": "",
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
print(f"[Intake] Error during summarization: {e}")
|
||||
result["L1"] = f"[Error summarizing: {str(e)}]"
|
||||
return result
|
||||
|
||||
# ─────────────────────────────────
|
||||
# Background summarization stub
|
||||
|
||||
Reference in New Issue
Block a user