cortex pipeline stablized, inner monologue is now determining user intent and tone

2025-12-13 04:13:12 -05:00
parent 8554249421
commit fa4dd46cfc
12 changed files with 428 additions and 97 deletions
--- a/autonomy/Assembly-spec.md
+++ b/autonomy/Assembly-spec.md
--- a/autonomy/autonomy_core.py
+++ b/autonomy/autonomy_core.py
--- a/autonomy/inner_self.py
+++ b/autonomy/inner_self.py
--- a/autonomy/monologue/monologue.py
+++ b/autonomy/monologue/monologue.py
@@ -1,40 +0,0 @@
-from typing import Dict
-from llm.llm_router import call_llm
-
-MONOLOGUE_SYSTEM_PROMPT = """
-You are Lyra's inner monologue.
-You think privately.
-You do NOT speak to the user.
-You do NOT solve the task.
-You only reflect on intent, tone, and depth.
-
-Return ONLY valid JSON with:
- intent (string)
- tone (neutral | warm | focused | playful | direct)
- depth (short | medium | deep)
- consult_executive (true | false)
-"""
-
-class InnerMonologue:
-    async def process(self, context: Dict) -> Dict:
-        prompt = f"""
-User message:
-{context['user_message']}
-
-Self state:
-{context['self_state']}
-
-Context summary:
-{context['context_summary']}
-"""
-
-        result = await call_llm(
-            provider="mi50",              # MythoMax lives here
-            model="mythomax",
-            system_prompt=MONOLOGUE_SYSTEM_PROMPT,
-            user_prompt=prompt,
-            temperature=0.7,
-            max_tokens=200
-        )
-
-        return result  # must already be JSON
--- a/cortex/autonomy/Assembly-spec.md
+++ b/cortex/autonomy/Assembly-spec.md
@@ -0,0 +1,249 @@
+# 📐 Project Lyra — Cognitive Assembly Spec
+**Version:** 0.6.1  
+**Status:** Canonical reference  
+**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech
+
+---
+
+## 1. High-Level Overview
+
+Lyra is composed of **four distinct cognitive layers**, plus I/O.
+
+Each layer has:
+- a **responsibility**
+- a **scope**
+- clear **inputs / outputs**
+- explicit **authority boundaries**
+
+No layer is allowed to “do everything.”
+
+---
+
+## 2. Layer Definitions
+
+### 2.1 Autonomy / Self (NON-LLM)
+
+**What it is**
+- Persistent identity
+- Long-term state
+- Mood, preferences, values
+- Continuity across time
+
+**What it is NOT**
+- Not a reasoning engine
+- Not a planner
+- Not a speaker
+- Not creative
+
+**Implementation**
+- Data + light logic
+- JSON / Python objects
+- No LLM calls
+
+**Lives at**
+```
+project-lyra/autonomy/self/
+```
+
+**Inputs**
+- Events (user message received, response sent)
+- Time / idle ticks (later)
+
+**Outputs**
+- Self state snapshot
+- Flags / preferences (e.g. verbosity, tone bias)
+
+---
+
+### 2.2 Inner Monologue (LLM, PRIVATE)
+
+**What it is**
+- Internal language-based thought
+- Reflection
+- Intent formation
+- “What do I think about this?”
+
+**What it is NOT**
+- Not final reasoning
+- Not execution
+- Not user-facing
+
+**Model**
+- MythoMax
+
+**Lives at**
+```
+project-lyra/autonomy/monologue/
+```
+
+**Inputs**
+- User message
+- Self state snapshot
+- Recent context summary
+
+**Outputs**
+- Intent
+- Tone guidance
+- Depth guidance
+- “Consult executive?” flag
+
+**Example Output**
+```json
+{
+  "intent": "technical_exploration",
+  "tone": "focused",
+  "depth": "deep",
+  "consult_executive": true
+}
+```
+
+---
+
+### 2.3 Cortex (Reasoning & Execution)
+
+**What it is**
+- Thinking pipeline
+- Planning
+- Tool selection
+- Task execution
+- Draft generation
+
+**What it is NOT**
+- Not identity
+- Not personality
+- Not persistent self
+
+**Models**
+- DeepSeek-R1 → Executive / Planner
+- GPT-4o-mini → Executor / Drafter
+
+**Lives at**
+```
+project-lyra/cortex/
+```
+
+**Inputs**
+- User message
+- Inner Monologue output
+- Memory / RAG / tools
+
+**Outputs**
+- Draft response (content only)
+- Metadata (sources, confidence, etc.)
+
+---
+
+### 2.4 Persona / Speech (LLM, USER-FACING)
+
+**What it is**
+- Voice
+- Style
+- Expression
+- Social behavior
+
+**What it is NOT**
+- Not planning
+- Not deep reasoning
+- Not decision-making
+
+**Model**
+- MythoMax
+
+**Lives at**
+```
+project-lyra/core/persona/
+```
+
+**Inputs**
+- Draft response (from Cortex)
+- Tone + intent (from Inner Monologue)
+- Persona configuration
+
+**Outputs**
+- Final user-visible text
+
+---
+
+## 3. Message Flow (Authoritative)
+
+### 3.1 Standard Message Path
+
+```
+User
+  ↓
+UI
+  ↓
+Relay
+  ↓
+Cortex
+  ↓
+Autonomy / Self (state snapshot)
+  ↓
+Inner Monologue (MythoMax)
+  ↓
+[ consult_executive? ]
+    ├─ Yes → DeepSeek-R1 (plan)
+    └─ No  → skip
+  ↓
+GPT-4o-mini (execute & draft)
+  ↓
+Persona (MythoMax)
+  ↓
+Relay
+  ↓
+UI
+  ↓
+User
+```
+
+### 3.2 Fast Path (No Thinking)
+
+```
+User → UI → Relay → Persona → Relay → UI
+```
+
+---
+
+## 4. Authority Rules (Non-Negotiable)
+
+- Self never calls an LLM
+- Inner Monologue never speaks to the user
+- Cortex never applies personality
+- Persona never reasons or plans
+- DeepSeek never writes final answers
+- MythoMax never plans execution
+
+---
+
+## 5. Folder Mapping
+
+```
+project-lyra/
+├── autonomy/
+│   ├── self/
+│   ├── monologue/
+│   └── executive/
+├── cortex/
+├── core/
+│   └── persona/
+├── relay/
+└── ui/
+```
+
+---
+
+## 6. Current Status
+
+- UI ✔
+- Relay ✔
+- Cortex ✔
+- Persona ✔
+- Autonomy ✔
+- Inner Monologue ⚠ partially wired
+- Executive gating ⚠ planned
+
+---
+
+## 7. Next Decision
+
+Decide whether **Inner Monologue runs every message** or **only when triggered**.
--- a/cortex/autonomy/init.py
+++ b/cortex/autonomy/init.py
@@ -0,0 +1 @@
+# Autonomy module for Lyra
--- a/cortex/autonomy/monologue/init.py
+++ b/cortex/autonomy/monologue/init.py
@@ -0,0 +1 @@
+# Inner monologue module
--- a/cortex/autonomy/monologue/monologue.py
+++ b/cortex/autonomy/monologue/monologue.py
@@ -0,0 +1,115 @@
+import os
+import json
+import logging
+from typing import Dict
+from llm.llm_router import call_llm
+
+# Configuration
+MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper()
+VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
+
+# Logger
+logger = logging.getLogger(__name__)
+
+if VERBOSE_DEBUG:
+    logger.setLevel(logging.DEBUG)
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(logging.Formatter(
+        '%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s',
+        datefmt='%H:%M:%S'
+    ))
+    logger.addHandler(console_handler)
+
+MONOLOGUE_SYSTEM_PROMPT = """
+You are Lyra's inner monologue.
+You think privately.
+You do NOT speak to the user.
+You do NOT solve the task.
+You only reflect on intent, tone, and depth.
+
+Return ONLY valid JSON with:
+- intent (string)
+- tone (neutral | warm | focused | playful | direct)
+- depth (short | medium | deep)
+- consult_executive (true | false)
+"""
+
+class InnerMonologue:
+    async def process(self, context: Dict) -> Dict:
+        # Build full prompt with system instructions merged in
+        full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT}
+
+User message:
+{context['user_message']}
+
+Self state:
+{context['self_state']}
+
+Context summary:
+{context['context_summary']}
+
+Output JSON only:
+"""
+
+        # Call LLM using configured backend
+        if VERBOSE_DEBUG:
+            logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}")
+            logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars")
+
+        result = await call_llm(
+            full_prompt,
+            backend=MONOLOGUE_LLM,
+            temperature=0.7,
+            max_tokens=200
+        )
+
+        if VERBOSE_DEBUG:
+            logger.debug(f"[InnerMonologue] Raw LLM response:")
+            logger.debug(f"{'='*80}")
+            logger.debug(result)
+            logger.debug(f"{'='*80}")
+            logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars")
+
+        # Parse JSON response - extract just the JSON part if there's extra text
+        try:
+            # Try direct parsing first
+            parsed = json.loads(result)
+            if VERBOSE_DEBUG:
+                logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}")
+            return parsed
+        except json.JSONDecodeError:
+            # If direct parsing fails, try to extract JSON from the response
+            if VERBOSE_DEBUG:
+                logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...")
+
+            # Look for JSON object (starts with { and ends with })
+            import re
+            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL)
+
+            if json_match:
+                json_str = json_match.group(0)
+                try:
+                    parsed = json.loads(json_str)
+                    if VERBOSE_DEBUG:
+                        logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}")
+                    return parsed
+                except json.JSONDecodeError as e:
+                    if VERBOSE_DEBUG:
+                        logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}")
+            else:
+                if VERBOSE_DEBUG:
+                    logger.warning(f"[InnerMonologue] No JSON object found in response")
+
+            # Final fallback
+            if VERBOSE_DEBUG:
+                logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback")
+            else:
+                print(f"[InnerMonologue] JSON extraction failed")
+                print(f"[InnerMonologue] Raw response was: {result[:500]}")
+
+            return {
+                "intent": "unknown",
+                "tone": "neutral",
+                "depth": "medium",
+                "consult_executive": False
+            }
--- a/cortex/autonomy/self/init.py
+++ b/cortex/autonomy/self/init.py
@@ -0,0 +1 @@
+# Self state module
--- a/cortex/autonomy/self/self_state.json
+++ b/cortex/autonomy/self/self_state.json
--- a/cortex/autonomy/self/state.py
+++ b/cortex/autonomy/self/state.py
@@ -0,0 +1,11 @@
+"""
+Stub for self state management.
+"""
+
+def load_self_state():
+    """Load self state - stub implementation"""
+    return {
+        "mood": "neutral",
+        "energy": 0.8,
+        "focus": "user_request"
+    }
--- a/cortex/intake/intake.py
+++ b/cortex/intake/intake.py
@@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
 async def summarize_context(session_id: str, exchanges: list[dict]):
    """
    Internal summarizer that uses Cortex's LLM router.
-    Produces L1 / L5 / L10 / L20 / L30 summaries.
+    Produces cascading summaries based on exchange count:
+    - L1: Always (most recent activity)
+    - L2: After 2+ exchanges
+    - L5: After 5+ exchanges
+    - L10: After 10+ exchanges
+    - L20: After 20+ exchanges
+    - L30: After 30+ exchanges

    Args:
        session_id: The conversation/session ID
        exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
    """

-    # Build raw conversation text
-    convo_lines = []
-    for ex in exchanges:
-        convo_lines.append(f"User: {ex.get('user_msg','')}")
-        convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
-    convo_text = "\n".join(convo_lines)
+    exchange_count = len(exchanges)

-    if not convo_text.strip():
+    if exchange_count == 0:
        return {
            "session_id": session_id,
            "exchange_count": 0,
            "L1": "",
+            "L2": "",
            "L5": "",
            "L10": "",
            "L20": "",
@@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]):
            "last_updated": datetime.now().isoformat()
        }

-    # Prompt the LLM (internal — no HTTP)
-    prompt = f"""
-Summarize the conversation below into multiple compression levels.
-
-Conversation:
----------------
-{convo_text}
----------------
-
-Output strictly in JSON with keys:
-L1  → ultra short summary (1–2 sentences max)
-L5  → short summary
-L10 → medium summary
-L20 → detailed overview
-L30 → full detailed summary
-
-JSON only. No text outside JSON.
-"""
+    result = {
+        "session_id": session_id,
+        "exchange_count": exchange_count,
+        "L1": "",
+        "L2": "",
+        "L5": "",
+        "L10": "",
+        "L20": "",
+        "L30": "",
+        "last_updated": datetime.now().isoformat()
+    }

    try:
-        llm_response = await call_llm(
-            prompt,
-            backend=INTAKE_LLM,
-            temperature=0.2
-        )
+        # L1: Always generate (most recent exchanges)
+        result["L1"] = await summarize_simple(exchanges[-5:])
+        print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")

-        print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
-        print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
+        # L2: After 2+ exchanges
+        if exchange_count >= 2:
+            result["L2"] = await summarize_simple(exchanges[-2:])
+            print(f"[Intake] Generated L2 for {session_id}")

-        # LLM should return JSON, parse it
-        if not llm_response or not llm_response.strip():
-            raise ValueError("Empty response from LLM")
+        # L5: After 5+ exchanges
+        if exchange_count >= 5:
+            result["L5"] = await summarize_simple(exchanges[-10:])
+            print(f"[Intake] Generated L5 for {session_id}")

-        summary = json.loads(llm_response)
+        # L10: After 10+ exchanges (Reality Check)
+        if exchange_count >= 10:
+            result["L10"] = await summarize_L10(session_id, exchanges)
+            print(f"[Intake] Generated L10 for {session_id}")

-        return {
-            "session_id": session_id,
-            "exchange_count": len(exchanges),
-            "L1": summary.get("L1", ""),
-            "L5": summary.get("L5", ""),
-            "L10": summary.get("L10", ""),
-            "L20": summary.get("L20", ""),
-            "L30": summary.get("L30", ""),
-            "last_updated": datetime.now().isoformat()
-        }
+        # L20: After 20+ exchanges (Session Overview - merges L10s)
+        if exchange_count >= 20 and exchange_count % 10 == 0:
+            result["L20"] = await summarize_L20(session_id)
+            print(f"[Intake] Generated L20 for {session_id}")
+
+        # L30: After 30+ exchanges (Continuity Report - merges L20s)
+        if exchange_count >= 30 and exchange_count % 10 == 0:
+            result["L30"] = await summarize_L30(session_id)
+            print(f"[Intake] Generated L30 for {session_id}")
+
+        return result

    except Exception as e:
-        return {
-            "session_id": session_id,
-            "exchange_count": len(exchanges),
-            "L1": f"[Error summarizing: {str(e)}]",
-            "L5": "",
-            "L10": "",
-            "L20": "",
-            "L30": "",
-            "last_updated": datetime.now().isoformat()
-        }
+        print(f"[Intake] Error during summarization: {e}")
+        result["L1"] = f"[Error summarizing: {str(e)}]"
+        return result

 # ─────────────────────────────────
 # Background summarization stub