From fa4dd46cfcb765d5fb1471e2e9329dc6b6ec70fe Mon Sep 17 00:00:00 2001 From: serversdwn Date: Sat, 13 Dec 2025 04:13:12 -0500 Subject: [PATCH] cortex pipeline stablized, inner monologue is now determining user intent and tone --- autonomy/Assembly-spec.md | 0 autonomy/autonomy_core.py | 0 autonomy/inner_self.py | 0 autonomy/monologue/monologue.py | 40 --- cortex/autonomy/Assembly-spec.md | 249 ++++++++++++++++++ cortex/autonomy/__init__.py | 1 + cortex/autonomy/monologue/__init__.py | 1 + cortex/autonomy/monologue/monologue.py | 115 ++++++++ cortex/autonomy/self/__init__.py | 1 + .../autonomy}/self/self_state.json | 0 cortex/autonomy/self/state.py | 11 + cortex/intake/intake.py | 107 ++++---- 12 files changed, 428 insertions(+), 97 deletions(-) delete mode 100644 autonomy/Assembly-spec.md delete mode 100644 autonomy/autonomy_core.py delete mode 100644 autonomy/inner_self.py delete mode 100644 autonomy/monologue/monologue.py create mode 100644 cortex/autonomy/Assembly-spec.md create mode 100644 cortex/autonomy/__init__.py create mode 100644 cortex/autonomy/monologue/__init__.py create mode 100644 cortex/autonomy/monologue/monologue.py create mode 100644 cortex/autonomy/self/__init__.py rename {autonomy => cortex/autonomy}/self/self_state.json (100%) create mode 100644 cortex/autonomy/self/state.py diff --git a/autonomy/Assembly-spec.md b/autonomy/Assembly-spec.md deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/autonomy_core.py b/autonomy/autonomy_core.py deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/inner_self.py b/autonomy/inner_self.py deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py deleted file mode 100644 index 63534b8..0000000 --- a/autonomy/monologue/monologue.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Dict -from llm.llm_router import call_llm - -MONOLOGUE_SYSTEM_PROMPT = """ -You are Lyra's inner monologue. -You think privately. -You do NOT speak to the user. -You do NOT solve the task. -You only reflect on intent, tone, and depth. - -Return ONLY valid JSON with: -- intent (string) -- tone (neutral | warm | focused | playful | direct) -- depth (short | medium | deep) -- consult_executive (true | false) -""" - -class InnerMonologue: - async def process(self, context: Dict) -> Dict: - prompt = f""" -User message: -{context['user_message']} - -Self state: -{context['self_state']} - -Context summary: -{context['context_summary']} -""" - - result = await call_llm( - provider="mi50", # MythoMax lives here - model="mythomax", - system_prompt=MONOLOGUE_SYSTEM_PROMPT, - user_prompt=prompt, - temperature=0.7, - max_tokens=200 - ) - - return result # must already be JSON diff --git a/cortex/autonomy/Assembly-spec.md b/cortex/autonomy/Assembly-spec.md new file mode 100644 index 0000000..25e7442 --- /dev/null +++ b/cortex/autonomy/Assembly-spec.md @@ -0,0 +1,249 @@ +# πŸ“ Project Lyra β€” Cognitive Assembly Spec +**Version:** 0.6.1 +**Status:** Canonical reference +**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech + +--- + +## 1. High-Level Overview + +Lyra is composed of **four distinct cognitive layers**, plus I/O. + +Each layer has: +- a **responsibility** +- a **scope** +- clear **inputs / outputs** +- explicit **authority boundaries** + +No layer is allowed to β€œdo everything.” + +--- + +## 2. Layer Definitions + +### 2.1 Autonomy / Self (NON-LLM) + +**What it is** +- Persistent identity +- Long-term state +- Mood, preferences, values +- Continuity across time + +**What it is NOT** +- Not a reasoning engine +- Not a planner +- Not a speaker +- Not creative + +**Implementation** +- Data + light logic +- JSON / Python objects +- No LLM calls + +**Lives at** +``` +project-lyra/autonomy/self/ +``` + +**Inputs** +- Events (user message received, response sent) +- Time / idle ticks (later) + +**Outputs** +- Self state snapshot +- Flags / preferences (e.g. verbosity, tone bias) + +--- + +### 2.2 Inner Monologue (LLM, PRIVATE) + +**What it is** +- Internal language-based thought +- Reflection +- Intent formation +- β€œWhat do I think about this?” + +**What it is NOT** +- Not final reasoning +- Not execution +- Not user-facing + +**Model** +- MythoMax + +**Lives at** +``` +project-lyra/autonomy/monologue/ +``` + +**Inputs** +- User message +- Self state snapshot +- Recent context summary + +**Outputs** +- Intent +- Tone guidance +- Depth guidance +- β€œConsult executive?” flag + +**Example Output** +```json +{ + "intent": "technical_exploration", + "tone": "focused", + "depth": "deep", + "consult_executive": true +} +``` + +--- + +### 2.3 Cortex (Reasoning & Execution) + +**What it is** +- Thinking pipeline +- Planning +- Tool selection +- Task execution +- Draft generation + +**What it is NOT** +- Not identity +- Not personality +- Not persistent self + +**Models** +- DeepSeek-R1 β†’ Executive / Planner +- GPT-4o-mini β†’ Executor / Drafter + +**Lives at** +``` +project-lyra/cortex/ +``` + +**Inputs** +- User message +- Inner Monologue output +- Memory / RAG / tools + +**Outputs** +- Draft response (content only) +- Metadata (sources, confidence, etc.) + +--- + +### 2.4 Persona / Speech (LLM, USER-FACING) + +**What it is** +- Voice +- Style +- Expression +- Social behavior + +**What it is NOT** +- Not planning +- Not deep reasoning +- Not decision-making + +**Model** +- MythoMax + +**Lives at** +``` +project-lyra/core/persona/ +``` + +**Inputs** +- Draft response (from Cortex) +- Tone + intent (from Inner Monologue) +- Persona configuration + +**Outputs** +- Final user-visible text + +--- + +## 3. Message Flow (Authoritative) + +### 3.1 Standard Message Path + +``` +User + ↓ +UI + ↓ +Relay + ↓ +Cortex + ↓ +Autonomy / Self (state snapshot) + ↓ +Inner Monologue (MythoMax) + ↓ +[ consult_executive? ] + β”œβ”€ Yes β†’ DeepSeek-R1 (plan) + └─ No β†’ skip + ↓ +GPT-4o-mini (execute & draft) + ↓ +Persona (MythoMax) + ↓ +Relay + ↓ +UI + ↓ +User +``` + +### 3.2 Fast Path (No Thinking) + +``` +User β†’ UI β†’ Relay β†’ Persona β†’ Relay β†’ UI +``` + +--- + +## 4. Authority Rules (Non-Negotiable) + +- Self never calls an LLM +- Inner Monologue never speaks to the user +- Cortex never applies personality +- Persona never reasons or plans +- DeepSeek never writes final answers +- MythoMax never plans execution + +--- + +## 5. Folder Mapping + +``` +project-lyra/ +β”œβ”€β”€ autonomy/ +β”‚ β”œβ”€β”€ self/ +β”‚ β”œβ”€β”€ monologue/ +β”‚ └── executive/ +β”œβ”€β”€ cortex/ +β”œβ”€β”€ core/ +β”‚ └── persona/ +β”œβ”€β”€ relay/ +└── ui/ +``` + +--- + +## 6. Current Status + +- UI βœ” +- Relay βœ” +- Cortex βœ” +- Persona βœ” +- Autonomy βœ” +- Inner Monologue ⚠ partially wired +- Executive gating ⚠ planned + +--- + +## 7. Next Decision + +Decide whether **Inner Monologue runs every message** or **only when triggered**. diff --git a/cortex/autonomy/__init__.py b/cortex/autonomy/__init__.py new file mode 100644 index 0000000..49f54e0 --- /dev/null +++ b/cortex/autonomy/__init__.py @@ -0,0 +1 @@ +# Autonomy module for Lyra diff --git a/cortex/autonomy/monologue/__init__.py b/cortex/autonomy/monologue/__init__.py new file mode 100644 index 0000000..8cd4fb8 --- /dev/null +++ b/cortex/autonomy/monologue/__init__.py @@ -0,0 +1 @@ +# Inner monologue module diff --git a/cortex/autonomy/monologue/monologue.py b/cortex/autonomy/monologue/monologue.py new file mode 100644 index 0000000..a03e5f5 --- /dev/null +++ b/cortex/autonomy/monologue/monologue.py @@ -0,0 +1,115 @@ +import os +import json +import logging +from typing import Dict +from llm.llm_router import call_llm + +# Configuration +MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper() +VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" + +# Logger +logger = logging.getLogger(__name__) + +if VERBOSE_DEBUG: + logger.setLevel(logging.DEBUG) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter( + '%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s', + datefmt='%H:%M:%S' + )) + logger.addHandler(console_handler) + +MONOLOGUE_SYSTEM_PROMPT = """ +You are Lyra's inner monologue. +You think privately. +You do NOT speak to the user. +You do NOT solve the task. +You only reflect on intent, tone, and depth. + +Return ONLY valid JSON with: +- intent (string) +- tone (neutral | warm | focused | playful | direct) +- depth (short | medium | deep) +- consult_executive (true | false) +""" + +class InnerMonologue: + async def process(self, context: Dict) -> Dict: + # Build full prompt with system instructions merged in + full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT} + +User message: +{context['user_message']} + +Self state: +{context['self_state']} + +Context summary: +{context['context_summary']} + +Output JSON only: +""" + + # Call LLM using configured backend + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}") + logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars") + + result = await call_llm( + full_prompt, + backend=MONOLOGUE_LLM, + temperature=0.7, + max_tokens=200 + ) + + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Raw LLM response:") + logger.debug(f"{'='*80}") + logger.debug(result) + logger.debug(f"{'='*80}") + logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars") + + # Parse JSON response - extract just the JSON part if there's extra text + try: + # Try direct parsing first + parsed = json.loads(result) + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}") + return parsed + except json.JSONDecodeError: + # If direct parsing fails, try to extract JSON from the response + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...") + + # Look for JSON object (starts with { and ends with }) + import re + json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL) + + if json_match: + json_str = json_match.group(0) + try: + parsed = json.loads(json_str) + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}") + return parsed + except json.JSONDecodeError as e: + if VERBOSE_DEBUG: + logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}") + else: + if VERBOSE_DEBUG: + logger.warning(f"[InnerMonologue] No JSON object found in response") + + # Final fallback + if VERBOSE_DEBUG: + logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback") + else: + print(f"[InnerMonologue] JSON extraction failed") + print(f"[InnerMonologue] Raw response was: {result[:500]}") + + return { + "intent": "unknown", + "tone": "neutral", + "depth": "medium", + "consult_executive": False + } diff --git a/cortex/autonomy/self/__init__.py b/cortex/autonomy/self/__init__.py new file mode 100644 index 0000000..60c47c7 --- /dev/null +++ b/cortex/autonomy/self/__init__.py @@ -0,0 +1 @@ +# Self state module diff --git a/autonomy/self/self_state.json b/cortex/autonomy/self/self_state.json similarity index 100% rename from autonomy/self/self_state.json rename to cortex/autonomy/self/self_state.json diff --git a/cortex/autonomy/self/state.py b/cortex/autonomy/self/state.py new file mode 100644 index 0000000..ab9c626 --- /dev/null +++ b/cortex/autonomy/self/state.py @@ -0,0 +1,11 @@ +""" +Stub for self state management. +""" + +def load_self_state(): + """Load self state - stub implementation""" + return { + "mood": "neutral", + "energy": 0.8, + "focus": "user_request" + } diff --git a/cortex/intake/intake.py b/cortex/intake/intake.py index f5d9cba..ce0e592 100644 --- a/cortex/intake/intake.py +++ b/cortex/intake/intake.py @@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None: async def summarize_context(session_id: str, exchanges: list[dict]): """ Internal summarizer that uses Cortex's LLM router. - Produces L1 / L5 / L10 / L20 / L30 summaries. + Produces cascading summaries based on exchange count: + - L1: Always (most recent activity) + - L2: After 2+ exchanges + - L5: After 5+ exchanges + - L10: After 10+ exchanges + - L20: After 20+ exchanges + - L30: After 30+ exchanges Args: session_id: The conversation/session ID exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...} """ - # Build raw conversation text - convo_lines = [] - for ex in exchanges: - convo_lines.append(f"User: {ex.get('user_msg','')}") - convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}") - convo_text = "\n".join(convo_lines) + exchange_count = len(exchanges) - if not convo_text.strip(): + if exchange_count == 0: return { "session_id": session_id, "exchange_count": 0, "L1": "", + "L2": "", "L5": "", "L10": "", "L20": "", @@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]): "last_updated": datetime.now().isoformat() } - # Prompt the LLM (internal β€” no HTTP) - prompt = f""" -Summarize the conversation below into multiple compression levels. - -Conversation: ----------------- -{convo_text} ----------------- - -Output strictly in JSON with keys: -L1 β†’ ultra short summary (1–2 sentences max) -L5 β†’ short summary -L10 β†’ medium summary -L20 β†’ detailed overview -L30 β†’ full detailed summary - -JSON only. No text outside JSON. -""" + result = { + "session_id": session_id, + "exchange_count": exchange_count, + "L1": "", + "L2": "", + "L5": "", + "L10": "", + "L20": "", + "L30": "", + "last_updated": datetime.now().isoformat() + } try: - llm_response = await call_llm( - prompt, - backend=INTAKE_LLM, - temperature=0.2 - ) + # L1: Always generate (most recent exchanges) + result["L1"] = await summarize_simple(exchanges[-5:]) + print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)") - print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}") - print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}") + # L2: After 2+ exchanges + if exchange_count >= 2: + result["L2"] = await summarize_simple(exchanges[-2:]) + print(f"[Intake] Generated L2 for {session_id}") - # LLM should return JSON, parse it - if not llm_response or not llm_response.strip(): - raise ValueError("Empty response from LLM") + # L5: After 5+ exchanges + if exchange_count >= 5: + result["L5"] = await summarize_simple(exchanges[-10:]) + print(f"[Intake] Generated L5 for {session_id}") - summary = json.loads(llm_response) + # L10: After 10+ exchanges (Reality Check) + if exchange_count >= 10: + result["L10"] = await summarize_L10(session_id, exchanges) + print(f"[Intake] Generated L10 for {session_id}") - return { - "session_id": session_id, - "exchange_count": len(exchanges), - "L1": summary.get("L1", ""), - "L5": summary.get("L5", ""), - "L10": summary.get("L10", ""), - "L20": summary.get("L20", ""), - "L30": summary.get("L30", ""), - "last_updated": datetime.now().isoformat() - } + # L20: After 20+ exchanges (Session Overview - merges L10s) + if exchange_count >= 20 and exchange_count % 10 == 0: + result["L20"] = await summarize_L20(session_id) + print(f"[Intake] Generated L20 for {session_id}") + + # L30: After 30+ exchanges (Continuity Report - merges L20s) + if exchange_count >= 30 and exchange_count % 10 == 0: + result["L30"] = await summarize_L30(session_id) + print(f"[Intake] Generated L30 for {session_id}") + + return result except Exception as e: - return { - "session_id": session_id, - "exchange_count": len(exchanges), - "L1": f"[Error summarizing: {str(e)}]", - "L5": "", - "L10": "", - "L20": "", - "L30": "", - "last_updated": datetime.now().isoformat() - } + print(f"[Intake] Error during summarization: {e}") + result["L1"] = f"[Error summarizing: {str(e)}]" + return result # ───────────────────────────────── # Background summarization stub