Update to v0.9.1 #1

Merged
serversdown merged 44 commits from dev into main 2026-01-18 02:46:25 -05:00
12 changed files with 428 additions and 97 deletions
Showing only changes of commit fa4dd46cfc - Show all commits

View File

View File

@@ -1,40 +0,0 @@
from typing import Dict
from llm.llm_router import call_llm
MONOLOGUE_SYSTEM_PROMPT = """
You are Lyra's inner monologue.
You think privately.
You do NOT speak to the user.
You do NOT solve the task.
You only reflect on intent, tone, and depth.
Return ONLY valid JSON with:
- intent (string)
- tone (neutral | warm | focused | playful | direct)
- depth (short | medium | deep)
- consult_executive (true | false)
"""
class InnerMonologue:
async def process(self, context: Dict) -> Dict:
prompt = f"""
User message:
{context['user_message']}
Self state:
{context['self_state']}
Context summary:
{context['context_summary']}
"""
result = await call_llm(
provider="mi50", # MythoMax lives here
model="mythomax",
system_prompt=MONOLOGUE_SYSTEM_PROMPT,
user_prompt=prompt,
temperature=0.7,
max_tokens=200
)
return result # must already be JSON

View File

@@ -0,0 +1,249 @@
# 📐 Project Lyra — Cognitive Assembly Spec
**Version:** 0.6.1
**Status:** Canonical reference
**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech
---
## 1. High-Level Overview
Lyra is composed of **four distinct cognitive layers**, plus I/O.
Each layer has:
- a **responsibility**
- a **scope**
- clear **inputs / outputs**
- explicit **authority boundaries**
No layer is allowed to “do everything.”
---
## 2. Layer Definitions
### 2.1 Autonomy / Self (NON-LLM)
**What it is**
- Persistent identity
- Long-term state
- Mood, preferences, values
- Continuity across time
**What it is NOT**
- Not a reasoning engine
- Not a planner
- Not a speaker
- Not creative
**Implementation**
- Data + light logic
- JSON / Python objects
- No LLM calls
**Lives at**
```
project-lyra/autonomy/self/
```
**Inputs**
- Events (user message received, response sent)
- Time / idle ticks (later)
**Outputs**
- Self state snapshot
- Flags / preferences (e.g. verbosity, tone bias)
---
### 2.2 Inner Monologue (LLM, PRIVATE)
**What it is**
- Internal language-based thought
- Reflection
- Intent formation
- “What do I think about this?”
**What it is NOT**
- Not final reasoning
- Not execution
- Not user-facing
**Model**
- MythoMax
**Lives at**
```
project-lyra/autonomy/monologue/
```
**Inputs**
- User message
- Self state snapshot
- Recent context summary
**Outputs**
- Intent
- Tone guidance
- Depth guidance
- “Consult executive?” flag
**Example Output**
```json
{
"intent": "technical_exploration",
"tone": "focused",
"depth": "deep",
"consult_executive": true
}
```
---
### 2.3 Cortex (Reasoning & Execution)
**What it is**
- Thinking pipeline
- Planning
- Tool selection
- Task execution
- Draft generation
**What it is NOT**
- Not identity
- Not personality
- Not persistent self
**Models**
- DeepSeek-R1 → Executive / Planner
- GPT-4o-mini → Executor / Drafter
**Lives at**
```
project-lyra/cortex/
```
**Inputs**
- User message
- Inner Monologue output
- Memory / RAG / tools
**Outputs**
- Draft response (content only)
- Metadata (sources, confidence, etc.)
---
### 2.4 Persona / Speech (LLM, USER-FACING)
**What it is**
- Voice
- Style
- Expression
- Social behavior
**What it is NOT**
- Not planning
- Not deep reasoning
- Not decision-making
**Model**
- MythoMax
**Lives at**
```
project-lyra/core/persona/
```
**Inputs**
- Draft response (from Cortex)
- Tone + intent (from Inner Monologue)
- Persona configuration
**Outputs**
- Final user-visible text
---
## 3. Message Flow (Authoritative)
### 3.1 Standard Message Path
```
User
UI
Relay
Cortex
Autonomy / Self (state snapshot)
Inner Monologue (MythoMax)
[ consult_executive? ]
├─ Yes → DeepSeek-R1 (plan)
└─ No → skip
GPT-4o-mini (execute & draft)
Persona (MythoMax)
Relay
UI
User
```
### 3.2 Fast Path (No Thinking)
```
User → UI → Relay → Persona → Relay → UI
```
---
## 4. Authority Rules (Non-Negotiable)
- Self never calls an LLM
- Inner Monologue never speaks to the user
- Cortex never applies personality
- Persona never reasons or plans
- DeepSeek never writes final answers
- MythoMax never plans execution
---
## 5. Folder Mapping
```
project-lyra/
├── autonomy/
│ ├── self/
│ ├── monologue/
│ └── executive/
├── cortex/
├── core/
│ └── persona/
├── relay/
└── ui/
```
---
## 6. Current Status
- UI ✔
- Relay ✔
- Cortex ✔
- Persona ✔
- Autonomy ✔
- Inner Monologue ⚠ partially wired
- Executive gating ⚠ planned
---
## 7. Next Decision
Decide whether **Inner Monologue runs every message** or **only when triggered**.

View File

@@ -0,0 +1 @@
# Autonomy module for Lyra

View File

@@ -0,0 +1 @@
# Inner monologue module

View File

@@ -0,0 +1,115 @@
import os
import json
import logging
from typing import Dict
from llm.llm_router import call_llm
# Configuration
MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper()
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
# Logger
logger = logging.getLogger(__name__)
if VERBOSE_DEBUG:
logger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s',
datefmt='%H:%M:%S'
))
logger.addHandler(console_handler)
MONOLOGUE_SYSTEM_PROMPT = """
You are Lyra's inner monologue.
You think privately.
You do NOT speak to the user.
You do NOT solve the task.
You only reflect on intent, tone, and depth.
Return ONLY valid JSON with:
- intent (string)
- tone (neutral | warm | focused | playful | direct)
- depth (short | medium | deep)
- consult_executive (true | false)
"""
class InnerMonologue:
async def process(self, context: Dict) -> Dict:
# Build full prompt with system instructions merged in
full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT}
User message:
{context['user_message']}
Self state:
{context['self_state']}
Context summary:
{context['context_summary']}
Output JSON only:
"""
# Call LLM using configured backend
if VERBOSE_DEBUG:
logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}")
logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars")
result = await call_llm(
full_prompt,
backend=MONOLOGUE_LLM,
temperature=0.7,
max_tokens=200
)
if VERBOSE_DEBUG:
logger.debug(f"[InnerMonologue] Raw LLM response:")
logger.debug(f"{'='*80}")
logger.debug(result)
logger.debug(f"{'='*80}")
logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars")
# Parse JSON response - extract just the JSON part if there's extra text
try:
# Try direct parsing first
parsed = json.loads(result)
if VERBOSE_DEBUG:
logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}")
return parsed
except json.JSONDecodeError:
# If direct parsing fails, try to extract JSON from the response
if VERBOSE_DEBUG:
logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...")
# Look for JSON object (starts with { and ends with })
import re
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL)
if json_match:
json_str = json_match.group(0)
try:
parsed = json.loads(json_str)
if VERBOSE_DEBUG:
logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}")
return parsed
except json.JSONDecodeError as e:
if VERBOSE_DEBUG:
logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}")
else:
if VERBOSE_DEBUG:
logger.warning(f"[InnerMonologue] No JSON object found in response")
# Final fallback
if VERBOSE_DEBUG:
logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback")
else:
print(f"[InnerMonologue] JSON extraction failed")
print(f"[InnerMonologue] Raw response was: {result[:500]}")
return {
"intent": "unknown",
"tone": "neutral",
"depth": "medium",
"consult_executive": False
}

View File

@@ -0,0 +1 @@
# Self state module

View File

@@ -0,0 +1,11 @@
"""
Stub for self state management.
"""
def load_self_state():
"""Load self state - stub implementation"""
return {
"mood": "neutral",
"energy": 0.8,
"focus": "user_request"
}

View File

@@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None:
async def summarize_context(session_id: str, exchanges: list[dict]):
"""
Internal summarizer that uses Cortex's LLM router.
Produces L1 / L5 / L10 / L20 / L30 summaries.
Produces cascading summaries based on exchange count:
- L1: Always (most recent activity)
- L2: After 2+ exchanges
- L5: After 5+ exchanges
- L10: After 10+ exchanges
- L20: After 20+ exchanges
- L30: After 30+ exchanges
Args:
session_id: The conversation/session ID
exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
"""
# Build raw conversation text
convo_lines = []
for ex in exchanges:
convo_lines.append(f"User: {ex.get('user_msg','')}")
convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}")
convo_text = "\n".join(convo_lines)
exchange_count = len(exchanges)
if not convo_text.strip():
if exchange_count == 0:
return {
"session_id": session_id,
"exchange_count": 0,
"L1": "",
"L2": "",
"L5": "",
"L10": "",
"L20": "",
@@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]):
"last_updated": datetime.now().isoformat()
}
# Prompt the LLM (internal — no HTTP)
prompt = f"""
Summarize the conversation below into multiple compression levels.
Conversation:
----------------
{convo_text}
----------------
Output strictly in JSON with keys:
L1 → ultra short summary (12 sentences max)
L5 → short summary
L10 → medium summary
L20 → detailed overview
L30 → full detailed summary
JSON only. No text outside JSON.
"""
result = {
"session_id": session_id,
"exchange_count": exchange_count,
"L1": "",
"L2": "",
"L5": "",
"L10": "",
"L20": "",
"L30": "",
"last_updated": datetime.now().isoformat()
}
try:
llm_response = await call_llm(
prompt,
backend=INTAKE_LLM,
temperature=0.2
)
# L1: Always generate (most recent exchanges)
result["L1"] = await summarize_simple(exchanges[-5:])
print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}")
print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}")
# L2: After 2+ exchanges
if exchange_count >= 2:
result["L2"] = await summarize_simple(exchanges[-2:])
print(f"[Intake] Generated L2 for {session_id}")
# LLM should return JSON, parse it
if not llm_response or not llm_response.strip():
raise ValueError("Empty response from LLM")
# L5: After 5+ exchanges
if exchange_count >= 5:
result["L5"] = await summarize_simple(exchanges[-10:])
print(f"[Intake] Generated L5 for {session_id}")
summary = json.loads(llm_response)
# L10: After 10+ exchanges (Reality Check)
if exchange_count >= 10:
result["L10"] = await summarize_L10(session_id, exchanges)
print(f"[Intake] Generated L10 for {session_id}")
return {
"session_id": session_id,
"exchange_count": len(exchanges),
"L1": summary.get("L1", ""),
"L5": summary.get("L5", ""),
"L10": summary.get("L10", ""),
"L20": summary.get("L20", ""),
"L30": summary.get("L30", ""),
"last_updated": datetime.now().isoformat()
}
# L20: After 20+ exchanges (Session Overview - merges L10s)
if exchange_count >= 20 and exchange_count % 10 == 0:
result["L20"] = await summarize_L20(session_id)
print(f"[Intake] Generated L20 for {session_id}")
# L30: After 30+ exchanges (Continuity Report - merges L20s)
if exchange_count >= 30 and exchange_count % 10 == 0:
result["L30"] = await summarize_L30(session_id)
print(f"[Intake] Generated L30 for {session_id}")
return result
except Exception as e:
return {
"session_id": session_id,
"exchange_count": len(exchanges),
"L1": f"[Error summarizing: {str(e)}]",
"L5": "",
"L10": "",
"L20": "",
"L30": "",
"last_updated": datetime.now().isoformat()
}
print(f"[Intake] Error during summarization: {e}")
result["L1"] = f"[Error summarizing: {str(e)}]"
return result
# ─────────────────────────────────
# Background summarization stub