chore: add sessions to gitignore

feat: SQLite-backed memory with brute-force cosine recall
- lyra.memory.remember(session_id, role, content) embeds and stores - lyra.memory.recent(session_id, n) returns the last N from a session - lyra.memory.recall(query, k, session_id=None) returns top-k by cosine similarity across the chosen scope (all sessions by default) - Embeddings live in the exchanges.embedding BLOB column as float32 bytes - Connection reopens automatically if LYRA_DB_PATH changes (test-friendly)
2026-05-29 18:23:29 -04:00 · 2026-05-16 06:35:52 +00:00 · 2026-05-16 06:10:48 +00:00 · 2026-05-16 06:01:08 +00:00 · 2026-05-16 05:57:07 +00:00 · 2025-12-29 01:59:14 -05:00
255 changed files with 851 additions and 47828 deletions
@@ -1,87 +1,11 @@
-# ====================================
-# 🌌 GLOBAL LYRA CONFIG
-# ====================================
-LOCAL_TZ_LABEL=America/New_York
-DEFAULT_SESSION_ID=default
-
-
-# ====================================
-# 🤖 LLM BACKEND OPTIONS
-# ====================================
-# Services choose which backend to use from these options
-# Primary: vLLM on MI50 GPU
-LLM_PRIMARY_PROVIDER=vllm
-LLM_PRIMARY_URL=http://10.0.0.43:8000
-LLM_PRIMARY_MODEL=/model
-
-# Secondary: Ollama on 3090 GPU
-LLM_SECONDARY_PROVIDER=ollama
-LLM_SECONDARY_URL=http://10.0.0.3:11434
-LLM_SECONDARY_MODEL=qwen2.5:7b-instruct-q4_K_M
-
-# Cloud: OpenAI
-LLM_CLOUD_PROVIDER=openai_chat
-LLM_CLOUD_URL=https://api.openai.com/v1
-LLM_CLOUD_MODEL=gpt-4o-mini
-OPENAI_API_KEY=sk-proj-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-
-# Local Fallback: llama.cpp or LM Studio
-LLM_FALLBACK_PROVIDER=openai_completions
-LLM_FALLBACK_URL=http://10.0.0.41:11435
-LLM_FALLBACK_MODEL=llama-3.2-8b-instruct
-
-# Global LLM controls
-LLM_TEMPERATURE=0.7
-
-
-# ====================================
-# 🗄️ DATABASE CONFIGURATION
-# ====================================
-# Postgres (pgvector for NeoMem)
-POSTGRES_USER=neomem
-POSTGRES_PASSWORD=change_me_in_production
-POSTGRES_DB=neomem
-POSTGRES_HOST=neomem-postgres
-POSTGRES_PORT=5432
-
-# Neo4j Graph Database
-NEO4J_URI=bolt://neomem-neo4j:7687
-NEO4J_USERNAME=neo4j
-NEO4J_PASSWORD=change_me_in_production
-NEO4J_AUTH=neo4j/change_me_in_production
-
-
-# ====================================
-# 🧠 MEMORY SERVICES (NEOMEM)
-# ====================================
-NEOMEM_API=http://neomem-api:7077
-NEOMEM_API_KEY=generate_secure_random_token_here
-NEOMEM_HISTORY_DB=postgresql://neomem:change_me_in_production@neomem-postgres:5432/neomem
-
-# Embeddings configuration (used by NeoMem)
-EMBEDDER_PROVIDER=openai
-EMBEDDER_MODEL=text-embedding-3-small
-
-
-# ====================================
-# 🔌 INTERNAL SERVICE URLS
-# ====================================
-# Using container names for Docker network communication
-INTAKE_API_URL=http://intake:7080
-CORTEX_API=http://cortex:7081
-CORTEX_URL=http://cortex:7081/reflect
-CORTEX_URL_INGEST=http://cortex:7081/ingest
-RAG_API_URL=http://rag:7090
-RELAY_URL=http://relay:7078
-
-# Persona service (optional)
-PERSONA_URL=http://persona-sidecar:7080/current
-
-
-# ====================================
-# 🔧 FEATURE FLAGS
-# ====================================
-CORTEX_ENABLED=true
-MEMORY_ENABLED=true
-PERSONA_ENABLED=false
-DEBUG_PROMPT=true
+# Local backend (Ollama) — used by default for most calls.
+LOCAL_BASE_URL=http://localhost:11434
+LOCAL_MODEL=qwen2.5:7b-instruct
+
+# Cloud backend (OpenAI) — used for harder reasoning and embeddings.
+OPENAI_API_KEY=
+CLOUD_MODEL=gpt-4o-mini
+EMBED_MODEL=text-embedding-3-small
+
+# Where Lyra stores her memory.
+LYRA_DB_PATH=data/lyra.db
@@ -1,132 +0,0 @@
-# ============================================================================
-# CORTEX LOGGING CONFIGURATION
-# ============================================================================
-# This file contains all logging-related environment variables for the
-# Cortex reasoning pipeline. Copy this to your .env file and adjust as needed.
-#
-# Log Detail Levels:
-#   minimal  - Only errors and critical events
-#   summary  - Stage completion + errors (DEFAULT - RECOMMENDED FOR PRODUCTION)
-#   detailed - Include raw LLM outputs, RAG results, timing breakdowns
-#   verbose  - Everything including intermediate states, full JSON dumps
-#
-# Quick Start:
-#   - For debugging weak links: LOG_DETAIL_LEVEL=detailed
-#   - For finding performance bottlenecks: LOG_DETAIL_LEVEL=detailed + VERBOSE_DEBUG=true
-#   - For production: LOG_DETAIL_LEVEL=summary
-#   - For silent mode: LOG_DETAIL_LEVEL=minimal
-# ============================================================================
-
-# -----------------------------
-# Primary Logging Level
-# -----------------------------
-# Controls overall verbosity across all components
-LOG_DETAIL_LEVEL=detailed
-
-# Legacy verbose debug flag (kept for compatibility)
-# When true, enables maximum logging including raw data dumps
-VERBOSE_DEBUG=false
-
-# -----------------------------
-# LLM Logging
-# -----------------------------
-# Enable raw LLM response logging (only works with detailed/verbose levels)
-# Shows full JSON responses from each LLM backend call
-# Set to "true" to see exact LLM outputs for debugging weak links
-LOG_RAW_LLM_RESPONSES=true
-
-# -----------------------------
-# Context Logging
-# -----------------------------
-# Show full raw intake data (L1-L30 summaries) in logs
-# WARNING: Very verbose, use only for deep debugging
-LOG_RAW_CONTEXT_DATA=false
-
-# -----------------------------
-# Loop Detection & Protection
-# -----------------------------
-# Enable duplicate message detection to prevent processing loops
-ENABLE_DUPLICATE_DETECTION=true
-
-# Maximum number of messages to keep in session history (prevents unbounded growth)
-# Older messages are trimmed automatically
-MAX_MESSAGE_HISTORY=100
-
-# Session TTL in hours - sessions inactive longer than this are auto-expired
-SESSION_TTL_HOURS=24
-
-# -----------------------------
-# NeoMem / RAG Logging
-# -----------------------------
-# Relevance score threshold for NeoMem results
-RELEVANCE_THRESHOLD=0.4
-
-# Enable NeoMem long-term memory retrieval
-NEOMEM_ENABLED=false
-
-# -----------------------------
-# Autonomous Features
-# -----------------------------
-# Enable autonomous tool invocation (RAG, WEB, WEATHER, CODEBRAIN)
-ENABLE_AUTONOMOUS_TOOLS=true
-
-# Confidence threshold for autonomous tool invocation (0.0 - 1.0)
-AUTONOMOUS_TOOL_CONFIDENCE_THRESHOLD=0.6
-
-# Enable proactive monitoring and suggestions
-ENABLE_PROACTIVE_MONITORING=true
-
-# Minimum priority for proactive suggestions to be included (0.0 - 1.0)
-PROACTIVE_SUGGESTION_MIN_PRIORITY=0.6
-
-# ============================================================================
-# EXAMPLE LOGGING OUTPUT AT DIFFERENT LEVELS
-# ============================================================================
-#
-# LOG_DETAIL_LEVEL=summary (RECOMMENDED):
-# ────────────────────────────────────────────────────────────────────────────
-# ✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question about...
-# 📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 3 results
-# 🧠 Monologue | question | Tone: curious
-# ✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-# 📤 Output: 342 characters
-# ────────────────────────────────────────────────────────────────────────────
-#
-# LOG_DETAIL_LEVEL=detailed (FOR DEBUGGING):
-# ────────────────────────────────────────────────────────────────────────────
-# 🚀 PIPELINE START | Session: abc123 | 14:23:45.123
-# 📝 User: What is the meaning of life?
-# ────────────────────────────────────────────────────────────────────────────
-# 🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
-# ────────────────────────────────────────────────────────────────────────────
-# 📝 Prompt: You are Lyra, a thoughtful AI assistant...
-# 💬 Reply: Based on philosophical perspectives, the meaning...
-# ╭─ RAW RESPONSE ────────────────────────────────────────────────────────────
-# │ {
-# │   "choices": [
-# │     {
-# │       "message": {
-# │         "content": "Based on philosophical perspectives..."
-# │       }
-# │     }
-# │   ]
-# │ }
-# ╰───────────────────────────────────────────────────────────────────────────
-#
-# ✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-# ⏱️  Stage Timings:
-#    context        :   150ms ( 12.0%)
-#    identity       :    10ms (  0.8%)
-#    monologue      :   200ms ( 16.0%)
-#    reasoning      :   450ms ( 36.0%)
-#    refinement     :   300ms ( 24.0%)
-#    persona        :   140ms ( 11.2%)
-# ────────────────────────────────────────────────────────────────────────────
-#
-# LOG_DETAIL_LEVEL=verbose (MAXIMUM DEBUG):
-# Same as detailed but includes:
-# - Full 50+ line raw JSON dumps
-# - Complete intake data structures
-# - All intermediate processing states
-# - Detailed traceback on errors
-# ============================================================================
@@ -1,83 +1,37 @@
-# =============================
-# 📦 General
-# =============================
+# Python
 __pycache__/
-*.pyc
-*.log
-/.vscode/
-.vscode/
-# =============================
-# 🔐 Environment files (NEVER commit secrets!)
-# =============================
-# Ignore all .env files
+*.py[cod]
+*.egg-info/
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+build/
+dist/
+
+# Virtual environments
+.venv/
+venv/
+env/
+
+# Env files (never commit secrets)
 .env
 .env.local
 .env.*.local
-**/.env
-**/.env.local
-
-# BUT track .env.example templates (safe to commit)
 !.env.example
-!**/.env.example

-# Ignore backup directory
-.env-backups/
-
-# =============================
-# 🐳 Docker volumes (HUGE)
-# =============================
-volumes/
-*/volumes/
-
-# =============================
-# 📚 Databases & vector stores
-# =============================
-postgres_data/
-neo4j_data/
-*/postgres_data/
-*/neo4j_data/
-rag/chromadb/
-rag/*.sqlite3
-rag/chatlogs/
-rag/lyra-chatlogs/
-
-# =============================
-# 🤖 Model weights (big)
-# =============================
-models/
-*.gguf
-*.bin
-*.pt
-*.safetensors
-
-# =============================
-# 📦 Node modules (installed via npm)
-# =============================
-node_modules/
-core/relay/node_modules/
-
-# =============================
-# 💬 Runtime data & sessions
-# =============================
-# Session files (contain user conversation data)
-core/relay/sessions/
-**/sessions/
-*.jsonl
-
-# Log directories
-logs/
-**/logs/
-*-logs/
-intake-logs/
-
-# Database files (generated at runtime)
+# Local data
+data/
 *.db
 *.sqlite
 *.sqlite3
-neomem_history/
-**/neomem_history/

-# Temporary and cache files
-.cache/
-*.tmp
-*.temp
+# IDE / OS
+.vscode/
+.idea/
+.DS_Store
+
+# Logs
+*.log
+
+#lyra Stuff
+/core/relay/sessions/
@@ -1,91 +0,0 @@
-# Deprecated Files - Safe to Delete
-
-This file lists all deprecated files that can be safely deleted after verification.
-
-## Files Marked for Deletion
-
-### Docker Compose Files
-
-#### `/core/docker-compose.yml.DEPRECATED`
- **Status**: DEPRECATED
- **Reason**: All services consolidated into main `/docker-compose.yml`
- **Replaced by**: `/docker-compose.yml` (relay service now has complete config)
- **Safe to delete**: Yes, after verifying main docker-compose works
-
-### Environment Files
-
-All original `.env` files have been consolidated. Backups exist in `.env-backups/` directory.
-
-#### Previously Deleted (Already Done)
- ✅ `/core/.env` - Deleted (redundant with root .env)
-
-### Experimental/Orphaned Files
-
-#### `/core/env experiments/` (entire directory)
- **Status**: User will handle separately
- **Contains**: `.env`, `.env.local`, `.env.openai`
- **Action**: User to review and clean up
-
-## Verification Steps Before Deleting
-
-Before deleting the deprecated files, verify:
-
-1. **Test main docker-compose.yml works:**
-   ```bash
-   cd /home/serversdown/project-lyra
-   docker-compose down
-   docker-compose up -d
-   docker-compose ps  # All services should be running
-   ```
-
-2. **Verify relay service has correct config:**
-   ```bash
-   docker exec relay env | grep -E "LLM_|NEOMEM_|OPENAI"
-   docker exec relay ls -la /app/sessions  # Sessions volume mounted
-   ```
-
-3. **Test relay functionality:**
-   - Send a test message through relay
-   - Verify memory storage works
-   - Confirm LLM backend connections work
-
-## Deletion Commands
-
-After successful verification, run:
-
-```bash
-cd /home/serversdown/project-lyra
-
-# Delete deprecated docker-compose file
-rm core/docker-compose.yml.DEPRECATED
-
-# Optionally clean up backup directory after confirming everything works
-# (Keep backups for at least a few days/weeks)
-# rm -rf .env-backups/
-```
-
-## Files to Keep
-
-These files should **NOT** be deleted:
-
- ✅ `.env` (root) - Single source of truth
- ✅ `.env.example` (root) - Security template (commit to git)
- ✅ `cortex/.env` - Service-specific config
- ✅ `cortex/.env.example` - Security template (commit to git)
- ✅ `neomem/.env` - Service-specific config
- ✅ `neomem/.env.example` - Security template (commit to git)
- ✅ `intake/.env` - Service-specific config
- ✅ `intake/.env.example` - Security template (commit to git)
- ✅ `rag/.env.example` - Security template (commit to git)
- ✅ `docker-compose.yml` - Main orchestration file
- ✅ `ENVIRONMENT_VARIABLES.md` - Documentation
- ✅ `.gitignore` - Git configuration
-
-## Backup Information
-
-All original `.env` files backed up to:
- Location: `/home/serversdown/project-lyra/.env-backups/`
- Timestamp: `20251126_025334`
- Files: 6 original .env files
-
-Keep backups until you're confident the new setup is stable (recommended: 2-4 weeks).
@@ -1,178 +0,0 @@
-# Logging System Migration Complete
-
-## ✅ What Changed
-
-The old `VERBOSE_DEBUG` logging system has been completely replaced with the new structured `LOG_DETAIL_LEVEL` system.
-
-### Files Modified
-
-1. **[.env](.env)** - Removed `VERBOSE_DEBUG`, cleaned up duplicate `LOG_DETAIL_LEVEL` settings
-2. **[cortex/.env](cortex/.env)** - Removed `VERBOSE_DEBUG` from cortex config
-3. **[cortex/router.py](cortex/router.py)** - Replaced `VERBOSE_DEBUG` checks with `LOG_DETAIL_LEVEL`
-4. **[cortex/context.py](cortex/context.py)** - Replaced `VERBOSE_DEBUG` with `LOG_DETAIL_LEVEL`, removed verbose file logging setup
-
-## 🎯 New Logging Configuration
-
-### Single Environment Variable
-
-Set `LOG_DETAIL_LEVEL` in your `.env` file:
-
-```bash
-LOG_DETAIL_LEVEL=detailed
-```
-
-### Logging Levels
-
-| Level | Lines/Message | What You See |
-|-------|---------------|--------------|
-| **minimal** | 1-2 | Only errors and critical events |
-| **summary** | 5-7 | Pipeline completion, errors, warnings (production mode) |
-| **detailed** | 30-50 | LLM outputs, timing breakdowns, context (debugging mode) |
-| **verbose** | 100+ | Everything including raw JSON dumps (deep debugging) |
-
-## 📊 What You Get at Each Level
-
-### Summary Mode (Production)
-```
-📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 3 results
-🧠 Monologue | question | Tone: curious
-
-====================================================================================================
-✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-====================================================================================================
-📤 Output: 342 characters
-====================================================================================================
-```
-
-### Detailed Mode (Debugging - RECOMMENDED)
-```
-====================================================================================================
-🚀 PIPELINE START | Session: abc123 | 14:23:45.123
-====================================================================================================
-📝 User: What is the meaning of life?
-────────────────────────────────────────────────────────────────────────────────────────────────────
-
-────────────────────────────────────────────────────────────────────────────────────────────────────
-🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
-────────────────────────────────────────────────────────────────────────────────────────────────────
-📝 Prompt: You are Lyra, analyzing the user's question...
-💬 Reply: Based on the context provided, here's my analysis...
-────────────────────────────────────────────────────────────────────────────────────────────────────
-
-📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 3 results
-────────────────────────────────────────────────────────────────────────────────────────────────────
-[CONTEXT] Session abc123 | User: What is the meaning of life?
-────────────────────────────────────────────────────────────────────────────────────────────────────
-  Mode: default | Mood: neutral | Project: None
-  Tools: RAG, WEB, WEATHER, CODEBRAIN, POKERBRAIN
-
-  ╭─ INTAKE SUMMARIES ────────────────────────────────────────────────
-  │ L1  : Last message discussed philosophy...
-  │ L5  : Recent 5 messages covered existential topics...
-  │ L10 : Past 10 messages showed curiosity pattern...
-  ╰───────────────────────────────────────────────────────────────────
-
-  ╭─ RAG RESULTS (3) ──────────────────────────────────────────────
-  │ [1] 0.923 | Previous discussion about purpose...
-  │ [2] 0.891 | Note about existential philosophy...
-  │ [3] 0.867 | Memory of Viktor Frankl discussion...
-  ╰───────────────────────────────────────────────────────────────────
-────────────────────────────────────────────────────────────────────────────────────────────────────
-
-🧠 Monologue | question | Tone: curious
-
-====================================================================================================
-✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-====================================================================================================
-⏱️  Stage Timings:
-   context        :   150ms ( 12.0%)
-   identity       :    10ms (  0.8%)
-   monologue      :   200ms ( 16.0%)
-   tools          :     0ms (  0.0%)
-   reflection     :    50ms (  4.0%)
-   reasoning      :   450ms ( 36.0%)  ← BOTTLENECK!
-   refinement     :   300ms ( 24.0%)
-   persona        :   140ms ( 11.2%)
-   learning       :    50ms (  4.0%)
-📤 Output: 342 characters
-====================================================================================================
-```
-
-### Verbose Mode (Maximum Debug)
-Same as detailed, plus:
- Full raw JSON responses from LLMs (50-line boxes)
- Complete intake data structures
- Stack traces on errors
-
-## 🚀 How to Use
-
-### For Finding Weak Links (Your Use Case)
-```bash
-# In .env:
-LOG_DETAIL_LEVEL=detailed
-
-# Restart services:
-docker-compose restart cortex relay
-```
-
-You'll now see:
- ✅ Which LLM backend is used
- ✅ What prompts are sent to each LLM
- ✅ What each LLM responds with
- ✅ Timing breakdown showing which stage is slow
- ✅ Context being used (RAG, intake summaries)
- ✅ Clean, hierarchical structure
-
-### For Production
-```bash
-LOG_DETAIL_LEVEL=summary
-```
-
-### For Deep Debugging
-```bash
-LOG_DETAIL_LEVEL=verbose
-```
-
-## 🔍 Finding Performance Bottlenecks
-
-With `detailed` mode, look for:
-
-1. **Slow stages in timing breakdown:**
-   ```
-   reasoning      :  3450ms ( 76.0%)  ← THIS IS YOUR BOTTLENECK!
-   ```
-
-2. **Backend failures:**
-   ```
-   ⚠️  [LLM] PRIMARY failed | 14:23:45.234 | Connection timeout
-   ✅ [LLM] SECONDARY | Reply: Based on...  ← Fell back to secondary
-   ```
-
-3. **Loop detection:**
-   ```
-   ⚠️  DUPLICATE MESSAGE DETECTED | Session: abc123
-   🔁 LOOP DETECTED - Returning cached context
-   ```
-
-## 📁 Removed Features
-
-The following old logging features have been removed:
-
- ❌ `VERBOSE_DEBUG` environment variable (replaced with `LOG_DETAIL_LEVEL`)
- ❌ File logging to `/app/logs/cortex_verbose_debug.log` (use `docker logs` instead)
- ❌ Separate verbose handlers in Python logging
- ❌ Per-module verbose flags
-
-## ✨ New Features
-
- ✅ Single unified logging configuration
- ✅ Hierarchical, scannable output
- ✅ Collapsible data sections (boxes)
- ✅ Stage timing always shown in detailed mode
- ✅ Performance profiling built-in
- ✅ Loop detection and warnings
- ✅ Clean error formatting
-
---
-
-**The logging is now clean, concise, and gives you exactly what you need to find weak links!** 🎯
@@ -1,176 +0,0 @@
-# Cortex Logging Quick Reference
-
-## 🎯 TL;DR
-
-**Finding weak links in the LLM chain?**
-```bash
-export LOG_DETAIL_LEVEL=detailed
-export VERBOSE_DEBUG=true
-```
-
-**Production use?**
-```bash
-export LOG_DETAIL_LEVEL=summary
-```
-
---
-
-## 📊 Log Levels Comparison
-
-| Level | Output Lines/Message | Use Case | Raw LLM Output? |
-|-------|---------------------|----------|-----------------|
-| **minimal** | 1-2 | Silent production | ❌ No |
-| **summary** | 5-7 | Production (DEFAULT) | ❌ No |
-| **detailed** | 30-50 | Debugging, finding bottlenecks | ✅ Parsed only |
-| **verbose** | 100+ | Deep debugging, seeing raw data | ✅ Full JSON |
-
---
-
-## 🔍 Common Debugging Tasks
-
-### See Raw LLM Outputs
-```bash
-export LOG_DETAIL_LEVEL=verbose
-```
-Look for:
-```
-╭─ RAW RESPONSE ────────────────────────────────────
-│ { "choices": [ { "message": { "content": "..." } } ] }
-╰───────────────────────────────────────────────────
-```
-
-### Find Performance Bottlenecks
-```bash
-export LOG_DETAIL_LEVEL=detailed
-```
-Look for:
-```
-⏱️  Stage Timings:
-   reasoning      :  3450ms ( 76.0%)  ← SLOW!
-```
-
-### Check Which RAG Memories Are Used
-```bash
-export LOG_DETAIL_LEVEL=detailed
-```
-Look for:
-```
-╭─ RAG RESULTS (5) ──────────────────────────────
-│ [1] 0.923 | Memory content...
-```
-
-### Detect Loops
-```bash
-export ENABLE_DUPLICATE_DETECTION=true  # (default)
-```
-Look for:
-```
-⚠️  DUPLICATE MESSAGE DETECTED
-🔁 LOOP DETECTED - Returning cached context
-```
-
-### See All Backend Failures
-```bash
-export LOG_DETAIL_LEVEL=summary  # or higher
-```
-Look for:
-```
-⚠️  [LLM] PRIMARY failed | Connection timeout
-⚠️  [LLM] SECONDARY failed | Model not found
-✅ [LLM] CLOUD | Reply: Based on...
-```
-
---
-
-## 🛠️ Environment Variables Cheat Sheet
-
-```bash
-# Verbosity Control
-LOG_DETAIL_LEVEL=detailed        # minimal | summary | detailed | verbose
-VERBOSE_DEBUG=false              # true = maximum verbosity (legacy)
-
-# Raw Data Visibility
-LOG_RAW_CONTEXT_DATA=false       # Show full intake L1-L30 dumps
-
-# Loop Protection
-ENABLE_DUPLICATE_DETECTION=true  # Detect duplicate messages
-MAX_MESSAGE_HISTORY=100          # Trim history after N messages
-SESSION_TTL_HOURS=24             # Expire sessions after N hours
-
-# Features
-NEOMEM_ENABLED=false             # Enable long-term memory
-ENABLE_AUTONOMOUS_TOOLS=true     # Enable tool invocation
-ENABLE_PROACTIVE_MONITORING=true # Enable suggestions
-```
-
---
-
-## 📋 Sample Output
-
-### Summary Mode (Default - Production)
-```
-✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question...
-📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
-🧠 Monologue | question | Tone: curious
-✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-📤 Output: 342 characters
-```
-
-### Detailed Mode (Debugging)
-```
-════════════════════════════════════════════════════════════════════════════
-🚀 PIPELINE START | Session: abc123 | 14:23:45.123
-════════════════════════════════════════════════════════════════════════════
-📝 User: What is the meaning of life?
-────────────────────────────────────────────────────────────────────────────
-
-────────────────────────────────────────────────────────────────────────────
-🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
-────────────────────────────────────────────────────────────────────────────
-📝 Prompt: You are Lyra, a thoughtful AI assistant...
-💬 Reply: Based on philosophical perspectives...
-
-📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
-  ╭─ RAG RESULTS (5) ──────────────────────────────
-  │ [1] 0.923 | Previous philosophy discussion...
-  │ [2] 0.891 | Existential note...
-  ╰────────────────────────────────────────────────
-
-════════════════════════════════════════════════════════════════════════════
-✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-════════════════════════════════════════════════════════════════════════════
-⏱️  Stage Timings:
-   context        :   150ms ( 12.0%)
-   reasoning      :   450ms ( 36.0%)  ← Largest component
-   persona        :   140ms ( 11.2%)
-📤 Output: 342 characters
-════════════════════════════════════════════════════════════════════════════
-```
-
---
-
-## ⚡ Quick Troubleshooting
-
-| Symptom | Check | Fix |
-|---------|-------|-----|
-| **Logs too verbose** | Current level | Set `LOG_DETAIL_LEVEL=summary` |
-| **Can't see LLM outputs** | Current level | Set `LOG_DETAIL_LEVEL=detailed` or `verbose` |
-| **Repeating operations** | Loop warnings | Check for `🔁 LOOP DETECTED` messages |
-| **Slow responses** | Stage timings | Look for stages >1000ms in detailed mode |
-| **Missing RAG data** | NEOMEM_ENABLED | Set `NEOMEM_ENABLED=true` |
-| **Out of memory** | Message history | Lower `MAX_MESSAGE_HISTORY` |
-
---
-
-## 📁 Key Files
-
- **[.env.logging.example](.env.logging.example)** - Full configuration guide
- **[LOGGING_REFACTOR_SUMMARY.md](LOGGING_REFACTOR_SUMMARY.md)** - Detailed explanation
- **[cortex/utils/logging_utils.py](cortex/utils/logging_utils.py)** - Logging utilities
- **[cortex/context.py](cortex/context.py)** - Context + loop protection
- **[cortex/router.py](cortex/router.py)** - Pipeline stages
- **[core/relay/lib/llm.js](core/relay/lib/llm.js)** - LLM backend logging
-
---
-
-**Need more detail? See [LOGGING_REFACTOR_SUMMARY.md](LOGGING_REFACTOR_SUMMARY.md)**
@@ -1,352 +0,0 @@
-# Cortex Logging Refactor Summary
-
-## 🎯 Problem Statement
-
-The cortex chat loop had severe logging issues that made debugging impossible:
-
-1. **Massive verbosity**: 100+ log lines per chat message
-2. **Raw LLM dumps**: Full JSON responses pretty-printed on every call (1000s of lines)
-3. **Repeated data**: NeoMem results logged 71 times individually
-4. **No structure**: Scattered emoji logs with no hierarchy
-5. **Impossible to debug**: Couldn't tell if loops were happening or just verbose logging
-6. **No loop protection**: Unbounded message history growth, no session cleanup, no duplicate detection
-
-## ✅ What Was Fixed
-
-### 1. **Structured Hierarchical Logging**
-
-**Before:**
-```
-🔍 RAW LLM RESPONSE: {
-  "id": "chatcmpl-123",
-  "object": "chat.completion",
-  "created": 1234567890,
-  "model": "gpt-4",
-  "choices": [
-    {
-      "index": 0,
-      "message": {
-        "role": "assistant",
-        "content": "Here is a very long response that goes on for hundreds of lines..."
-      }
-    }
-  ],
-  "usage": {
-    "prompt_tokens": 123,
-    "completion_tokens": 456,
-    "total_tokens": 579
-  }
-}
-🧠 Trying backend: PRIMARY (http://localhost:8000)
-✅ Success via PRIMARY
-[STAGE 0] Collecting unified context...
-[STAGE 0] Context collected - 5 RAG results
-[COLLECT_CONTEXT] Intake data retrieved:
-{
-  "L1": [...],
-  "L5": [...],
-  "L10": {...},
-  "L20": {...},
-  "L30": {...}
-}
-[COLLECT_CONTEXT] NeoMem search returned 71 results
-  [1] Score: 0.923 - Memory content here...
-  [2] Score: 0.891 - More memory content...
-  [3] Score: 0.867 - Even more content...
-  ... (68 more lines)
-```
-
-**After (summary mode - DEFAULT):**
-```
-✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question about...
-📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
-🧠 Monologue | question | Tone: curious
-✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-📤 Output: 342 characters
-```
-
-**After (detailed mode - for debugging):**
-```
-════════════════════════════════════════════════════════════════════════════════════════════════════
-🚀 PIPELINE START | Session: abc123 | 14:23:45.123
-════════════════════════════════════════════════════════════════════════════════════════════════════
-📝 User: What is the meaning of life?
-────────────────────────────────────────────────────────────────────────────────────────────────────
-
-────────────────────────────────────────────────────────────────────────────────────────────────────
-🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
-────────────────────────────────────────────────────────────────────────────────────────────────────
-📝 Prompt: You are Lyra, a thoughtful AI assistant...
-💬 Reply: Based on philosophical perspectives, the meaning...
-
-📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
-────────────────────────────────────────────────────────────────────────────────────────────────────
-[CONTEXT] Session abc123 | User: What is the meaning of life?
-────────────────────────────────────────────────────────────────────────────────────────────────────
-  Mode: default | Mood: neutral | Project: None
-  Tools: RAG, WEB, WEATHER, CODEBRAIN, POKERBRAIN
-
-  ╭─ INTAKE SUMMARIES ────────────────────────────────────────────────
-  │ L1  : Last message discussed philosophy...
-  │ L5  : Recent 5 messages covered existential topics...
-  │ L10 : Past 10 messages showed curiosity pattern...
-  │ L20 : Session focused on deep questions...
-  │ L30 : Long-term trend shows philosophical interest...
-  ╰───────────────────────────────────────────────────────────────────
-
-  ╭─ RAG RESULTS (5) ──────────────────────────────────────────────
-  │ [1] 0.923 | Previous discussion about purpose and meaning...
-  │ [2] 0.891 | Note about existential philosophy...
-  │ [3] 0.867 | Memory of Viktor Frankl discussion...
-  │ [4] 0.834 | Reference to stoic philosophy...
-  │ [5] 0.801 | Buddhism and the middle path...
-  ╰───────────────────────────────────────────────────────────────────
-────────────────────────────────────────────────────────────────────────────────────────────────────
-
-════════════════════════════════════════════════════════════════════════════════════════════════════
-✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-════════════════════════════════════════════════════════════════════════════════════════════════════
-⏱️  Stage Timings:
-   context        :   150ms ( 12.0%)
-   identity       :    10ms (  0.8%)
-   monologue      :   200ms ( 16.0%)
-   tools          :     0ms (  0.0%)
-   reflection     :    50ms (  4.0%)
-   reasoning      :   450ms ( 36.0%)
-   refinement     :   300ms ( 24.0%)
-   persona        :   140ms ( 11.2%)
-📤 Output: 342 characters
-════════════════════════════════════════════════════════════════════════════════════════════════════
-```
-
-### 2. **Configurable Verbosity Levels**
-
-Set via `LOG_DETAIL_LEVEL` environment variable:
-
- **`minimal`**: Only errors and critical events
- **`summary`**: Stage completion + errors (DEFAULT - recommended for production)
- **`detailed`**: Include raw LLM outputs, RAG results, timing breakdowns (for debugging)
- **`verbose`**: Everything including full JSON dumps (for deep debugging)
-
-### 3. **Raw LLM Output Visibility** ✅
-
-**You can now see raw LLM outputs clearly!**
-
-In `detailed` or `verbose` mode, LLM calls show:
- Backend used
- Prompt preview
- Parsed reply
- **Raw JSON response in collapsible format** (verbose only)
-
-```
-╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────
-│ {
-│   "id": "chatcmpl-123",
-│   "object": "chat.completion",
-│   "model": "gpt-4",
-│   "choices": [
-│     {
-│       "message": {
-│         "content": "Full response here..."
-│       }
-│     }
-│   ]
-│ }
-╰───────────────────────────────────────────────────────────────────────────────────────────
-```
-
-### 4. **Loop Detection & Protection** ✅
-
-**New safety features:**
-
- **Duplicate message detection**: Prevents processing the same message twice
- **Message history trimming**: Auto-trims to last 100 messages (configurable via `MAX_MESSAGE_HISTORY`)
- **Session TTL**: Auto-expires inactive sessions after 24 hours (configurable via `SESSION_TTL_HOURS`)
- **Hash-based detection**: Uses MD5 hash to detect exact duplicate messages
-
-**Example warning when loop detected:**
-```
-⚠️  DUPLICATE MESSAGE DETECTED | Session: abc123 | Message: What is the meaning of life?
-🔁 LOOP DETECTED - Returning cached context to prevent processing duplicate
-```
-
-### 5. **Performance Timing** ✅
-
-In `detailed` mode, see exactly where time is spent:
-
-```
-⏱️  Stage Timings:
-   context        :   150ms ( 12.0%)  ← Context collection
-   identity       :    10ms (  0.8%)  ← Identity loading
-   monologue      :   200ms ( 16.0%)  ← Inner monologue
-   tools          :     0ms (  0.0%)  ← Autonomous tools
-   reflection     :    50ms (  4.0%)  ← Reflection notes
-   reasoning      :   450ms ( 36.0%)  ← Main reasoning (BOTTLENECK)
-   refinement     :   300ms ( 24.0%)  ← Answer refinement
-   persona        :   140ms ( 11.2%)  ← Persona layer
-```
-
-**This helps you identify weak links in the chain!**
-
-## 📁 Files Modified
-
-### Core Changes
-
-1. **[llm.js](core/relay/lib/llm.js)**
-   - Removed massive JSON dump on line 53
-   - Added structured logging with 4 verbosity levels
-   - Shows raw responses only in verbose mode (collapsible format)
-   - Tracks failed backends and shows summary on total failure
-
-2. **[context.py](cortex/context.py)**
-   - Condensed 71-line NeoMem loop to 5-line summary
-   - Removed repeated intake data dumps
-   - Added structured hierarchical logging with boxes
-   - Added duplicate message detection
-   - Added message history trimming
-   - Added session TTL and cleanup
-
-3. **[router.py](cortex/router.py)**
-   - Replaced 15+ stage logs with unified pipeline summary
-   - Added stage timing collection
-   - Shows performance breakdown in detailed mode
-   - Clean start/end markers with total duration
-
-### New Files
-
-4. **[utils/logging_utils.py](cortex/utils/logging_utils.py)** (NEW)
-   - Reusable structured logging utilities
-   - `PipelineLogger` class for hierarchical logging
-   - Collapsible data sections
-   - Stage timing tracking
-   - Future-ready for expansion
-
-5. **[.env.logging.example](.env.logging.example)** (NEW)
-   - Complete logging configuration guide
-   - Shows example output at each verbosity level
-   - Documents all environment variables
-   - Production-ready defaults
-
-6. **[LOGGING_REFACTOR_SUMMARY.md](LOGGING_REFACTOR_SUMMARY.md)** (THIS FILE)
-
-## 🚀 How to Use
-
-### For Finding Weak Links (Your Use Case)
-
-```bash
-# Set in your .env or export:
-export LOG_DETAIL_LEVEL=detailed
-export VERBOSE_DEBUG=false  # or true for even more detail
-
-# Now run your chat - you'll see:
-# 1. Which LLM backend is used
-# 2. Raw LLM outputs (in verbose mode)
-# 3. Exact timing per stage
-# 4. Which stage is taking longest
-```
-
-### For Production
-
-```bash
-export LOG_DETAIL_LEVEL=summary
-
-# Minimal, clean logs:
-# ✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question...
-# ✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
-```
-
-### For Deep Debugging
-
-```bash
-export LOG_DETAIL_LEVEL=verbose
-export LOG_RAW_CONTEXT_DATA=true
-
-# Shows EVERYTHING including full JSON dumps
-```
-
-## 🔍 Finding Weak Links - Quick Guide
-
-**Problem: "Which LLM stage is failing or producing bad output?"**
-
-1. Set `LOG_DETAIL_LEVEL=detailed`
-2. Run a test conversation
-3. Look for timing anomalies:
-   ```
-   reasoning      :  3450ms ( 76.0%)  ← BOTTLENECK!
-   ```
-4. Look for errors:
-   ```
-   ⚠️  Reflection failed: Connection timeout
-   ```
-5. Check raw LLM outputs (set `VERBOSE_DEBUG=true`):
-   ```
-   ╭─ RAW RESPONSE ────────────────────────────────────
-   │ {
-   │   "choices": [
-   │     { "message": { "content": "..." } }
-   │   ]
-   │ }
-   ╰───────────────────────────────────────────────────
-   ```
-
-**Problem: "Is the loop repeating operations?"**
-
-1. Enable duplicate detection (on by default)
-2. Look for loop warnings:
-   ```
-   ⚠️  DUPLICATE MESSAGE DETECTED | Session: abc123
-   🔁 LOOP DETECTED - Returning cached context
-   ```
-3. Check stage timings - repeated stages will show up as duplicates
-
-**Problem: "Which RAG memories are being used?"**
-
-1. Set `LOG_DETAIL_LEVEL=detailed`
-2. Look for RAG results box:
-   ```
-   ╭─ RAG RESULTS (5) ──────────────────────────────
-   │ [1] 0.923 | Previous discussion about X...
-   │ [2] 0.891 | Note about Y...
-   ╰────────────────────────────────────────────────
-   ```
-
-## 📊 Environment Variables Reference
-
-| Variable | Default | Description |
-|----------|---------|-------------|
-| `LOG_DETAIL_LEVEL` | `summary` | Verbosity: minimal/summary/detailed/verbose |
-| `VERBOSE_DEBUG` | `false` | Legacy flag for maximum verbosity |
-| `LOG_RAW_CONTEXT_DATA` | `false` | Show full intake data dumps |
-| `ENABLE_DUPLICATE_DETECTION` | `true` | Detect and prevent duplicate messages |
-| `MAX_MESSAGE_HISTORY` | `100` | Max messages to keep per session |
-| `SESSION_TTL_HOURS` | `24` | Auto-expire sessions after N hours |
-
-## 🎉 Results
-
-**Before:** 1000+ lines of logs per chat message, unreadable, couldn't identify issues
-
-**After (summary mode):** 5 lines of structured logs, clear and actionable
-
-**After (detailed mode):** ~50 lines with full visibility into each stage, timing, and raw outputs
-
-**Loop protection:** Automatic detection and prevention of duplicate processing
-
-**You can now:**
-✅ See raw LLM outputs clearly (in detailed/verbose mode)
-✅ Identify performance bottlenecks (stage timings)
-✅ Detect loops and duplicates (automatic)
-✅ Find failing stages (error markers)
-✅ Scan logs quickly (hierarchical structure)
-✅ Debug production issues (adjustable verbosity)
-
-## 🔧 Next Steps (Optional Improvements)
-
-1. **Structured JSON logging**: Output as JSON for log aggregation tools
-2. **Log rotation**: Implement file rotation for verbose logs
-3. **Metrics export**: Export stage timings to Prometheus/Grafana
-4. **Error categorization**: Tag errors by type (network, timeout, parsing, etc.)
-5. **Performance alerts**: Auto-alert when stages exceed thresholds
-
---
-
-**Happy debugging! You can now see what's actually happening in the cortex loop.** 🎯
@@ -1,902 +1,21 @@
-# Project Lyra - README v0.9.1
+# Lyra

-Lyra is a modular persistent AI companion system with advanced reasoning capabilities and autonomous decision-making.
-It provides memory-backed chat using **Relay** + **Cortex** with integrated **Autonomy System**,
-featuring a multi-stage reasoning pipeline powered by HTTP-based LLM backends.
+A persistent, autonomous AI assistant. From-scratch rewrite of an earlier attempt.

-**NEW in v0.9.0:** Trilium Notes integration - Search and create notes from conversations
+The design thinking that survives the rewrite lives in [`docs/`](docs/) — start with [`docs/ARCH_v0-6-1.md`](docs/ARCH_v0-6-1.md). The previous implementation is preserved on the `archive` branch.

-**Current Version:** v0.9.1 (2025-12-29)
+## Status

-> **Note:** As of v0.6.0, NeoMem is **disabled by default** while we work out integration hiccups in the pipeline. The autonomy system is being refined independently before full memory integration.
+Pre-MVP. Building toward the smallest useful version: chat with persistent memory across sessions.

-## Mission Statement
+## Setup

-The point of Project Lyra is to give an AI chatbot more abilities than a typical chatbot. Typical chatbots are essentially amnesic and forget evertything about your project. Lyra helps keep projects organized and remembers everything you have done. Think of her abilities as a notepad/schedule/database/co-creator/collaborator all with its own executive function. Say something in passing, Lyra remembers it then reminds you of it later.
-
---
-
-## Architecture Overview
-
-Project Lyra operates as a **single docker-compose deployment** with multiple Docker containers networked together in a microservices architecture. Like how the brain has regions, Lyra has modules:
-
-### Core Services
-
-**1. Relay** (Node.js/Express) - Port 7078
- Main orchestrator and message router
- Coordinates all module interactions
- OpenAI-compatible endpoint: `POST /v1/chat/completions`
- Internal endpoint: `POST /chat`
- Dual-mode routing: Standard Mode (simple chat) or Cortex Mode (full reasoning)
- Server-side session persistence with file-based storage
- Session management API: `GET/POST/PATCH/DELETE /sessions`
- Manages async calls to Cortex ingest
- *(NeoMem integration currently disabled in v0.6.0)*
-
-**2. UI** (Static HTML) - Port 8081 (nginx)
- Browser-based chat interface with cyberpunk theme
- Mode selector (Standard/Cortex) in header
- Settings modal with backend selection and session management
- Light/Dark mode toggle (dark by default)
- **NEW in v0.8.0:** "🧠 Show Work" button for real-time thinking stream
-  - Opens popup window with live SSE connection
-  - Color-coded events: thinking, tool calls, results, completion
-  - Auto-scrolling with animations
-  - Session-aware (matches current chat session)
- Server-synced session management (persists across browsers and reboots)
- OpenAI-compatible message format
-
-**3. NeoMem** (Python/FastAPI) - Port 7077 - **DISABLED IN v0.6.0**
- Long-term memory database (fork of Mem0 OSS)
- Vector storage (PostgreSQL + pgvector) + Graph storage (Neo4j)
- RESTful API: `/memories`, `/search`
- Semantic memory updates and retrieval
- No external SDK dependencies - fully local
- **Status:** Currently disabled while pipeline integration is refined
-
-### Reasoning Layer
-
-**4. Cortex** (Python/FastAPI) - Port 7081
- Primary reasoning engine with multi-stage pipeline and autonomy system
- **Includes embedded Intake module** (no separate service as of v0.5.1)
- **Integrated Autonomy System** (NEW in v0.6.0) - See Autonomy System section below
- **Tool Calling System** (NEW in v0.8.0) - Agentic execution for Standard Mode
-  - Sandboxed code execution (Python, JavaScript, Bash)
-  - Web search via Tavily API
-  - **Trilium knowledge base integration** (NEW in v0.9.0)
-  - Multi-iteration autonomous tool use (max 5 iterations)
-  - Real-time thinking stream via SSE
- **Dual Operating Modes:**
-  - **Standard Mode** (v0.7.0) - Simple chatbot with context retention + tool calling (v0.8.0)
-    - Bypasses reflection, reasoning, refinement stages
-    - Direct LLM call with conversation history
-    - User-selectable backend (SECONDARY, OPENAI, or custom)
-    - **NEW:** Autonomous tool calling for code execution, web search, knowledge queries
-    - **NEW:** "Show Your Work" real-time thinking stream
-    - Faster responses for coding and practical tasks
-  - **Cortex Mode** - Full 4-stage reasoning pipeline
-    1. **Reflection** - Generates meta-awareness notes about conversation
-    2. **Reasoning** - Creates initial draft answer using context
-    3. **Refinement** - Polishes and improves the draft
-    4. **Persona** - Applies Lyra's personality and speaking style
- Integrates with Intake for short-term context via internal Python imports
- Flexible LLM router supporting multiple backends via HTTP
- **Endpoints:**
-  - `POST /reason` - Main reasoning pipeline (Cortex Mode)
-  - `POST /simple` - Direct LLM chat with tool calling (Standard Mode)
-  - `GET /stream/thinking/{session_id}` - SSE stream for thinking events **NEW in v0.8.0**
-  - `POST /ingest` - Receives conversation exchanges from Relay
-  - `GET /health` - Service health check
-  - `GET /debug/sessions` - Inspect in-memory SESSIONS state
-  - `GET /debug/summary` - Test summarization for a session
-
-**5. Intake** (Python Module) - **Embedded in Cortex**
- **No longer a standalone service** - runs as Python module inside Cortex container
- Short-term memory management with session-based circular buffer
- In-memory SESSIONS dictionary: `session_id → {buffer: deque(maxlen=200), created_at: timestamp}`
- Multi-level summarization (L1/L5/L10/L20/L30) produced by `summarize_context()`
- Deferred summarization - actual summary generation happens during `/reason` call
- Internal Python API:
-  - `add_exchange_internal(exchange)` - Direct function call from Cortex
-  - `summarize_context(session_id, exchanges)` - Async LLM-based summarization
-  - `SESSIONS` - Module-level global state (requires single Uvicorn worker)
-
-### LLM Backends (HTTP-based)
-
-**All LLM communication is done via HTTP APIs:**
- **PRIMARY**: llama.cpp server (`http://10.0.0.44:8080`) - AMD MI50 GPU backend
- **SECONDARY**: Ollama server (`http://10.0.0.3:11434`) - RTX 3090 backend
-  - Model: qwen2.5:7b-instruct-q4_K_M
- **CLOUD**: OpenAI API (`https://api.openai.com/v1`) - Cloud-based models
-  - Model: gpt-4o-mini
- **FALLBACK**: Local backup (`http://10.0.0.41:11435`) - Emergency fallback
-  - Model: llama-3.2-8b-instruct
-
-Each module can be configured to use a different backend via environment variables.
-
-### Autonomy System (NEW in v0.6.0)
-
-**Cortex Autonomy Subsystems** - Multi-layered autonomous decision-making and learning
- **Executive Layer** [cortex/autonomy/executive/](cortex/autonomy/executive/)
-  - High-level planning and goal setting
-  - Multi-step reasoning for complex objectives
-  - Strategic decision making
- **Decision Engine** [cortex/autonomy/tools/decision_engine.py](cortex/autonomy/tools/decision_engine.py)
-  - Autonomous decision-making framework
-  - Option evaluation and selection
-  - Coordinated decision orchestration
- **Autonomous Actions** [cortex/autonomy/actions/](cortex/autonomy/actions/)
-  - Self-initiated action execution
-  - Context-aware behavior implementation
-  - Action logging and tracking
- **Pattern Learning** [cortex/autonomy/learning/](cortex/autonomy/learning/)
-  - Learns from interaction patterns
-  - Identifies recurring user needs
-  - Adaptive behavior refinement
- **Proactive Monitoring** [cortex/autonomy/proactive/](cortex/autonomy/proactive/)
-  - System state monitoring
-  - Intervention opportunity detection
-  - Background awareness capabilities
- **Self-Analysis** [cortex/autonomy/self/](cortex/autonomy/self/)
-  - Performance tracking and analysis
-  - Cognitive pattern identification
-  - Self-state persistence in [cortex/data/self_state.json](cortex/data/self_state.json)
- **Orchestrator** [cortex/autonomy/tools/orchestrator.py](cortex/autonomy/tools/orchestrator.py)
-  - Coordinates all autonomy subsystems
-  - Manages tool selection and execution
-  - Handles external integrations (with enable/disable controls)
-
-**Autonomy Architecture:**
-The autonomy system operates in coordinated layers, all maintaining state in `self_state.json`:
-1. Executive Layer → Planning and goals
-2. Decision Layer → Evaluation and choices
-3. Action Layer → Execution
-4. Learning Layer → Pattern adaptation
-5. Monitoring Layer → Proactive awareness
-
---
-
-## Data Flow Architecture (v0.7.0)
-
-### Standard Mode Flow (NEW in v0.7.0):
-
-```
-User (UI) → POST /v1/chat/completions {mode: "standard", backend: "SECONDARY"}
-  ↓
-Relay (7078)
-  ↓ POST /simple
-Cortex (7081)
-  ↓ (internal Python call)
-Intake module → get_recent_messages() (last 20 messages)
-  ↓
-Direct LLM call (user-selected backend: SECONDARY/OPENAI/custom)
-  ↓
-Returns simple response to Relay
-  ↓
-Relay → POST /ingest (async)
-  ↓
-Cortex → add_exchange_internal() → SESSIONS buffer
-  ↓
-Relay → POST /sessions/:id (save session to file)
-  ↓
-Relay → UI (returns final response)
-
-Note: Bypasses reflection, reasoning, refinement, persona stages
-```
-
-### Cortex Mode Flow (Full Reasoning):
-
-```
-User (UI) → POST /v1/chat/completions {mode: "cortex"}
-  ↓
-Relay (7078)
-  ↓ POST /reason
-Cortex (7081)
-  ↓ (internal Python call)
-Intake module → summarize_context()
-  ↓
-Autonomy System → Decision evaluation & pattern learning
-  ↓
-Cortex processes (4 stages):
-  1. reflection.py → meta-awareness notes (CLOUD backend)
-  2. reasoning.py → draft answer (PRIMARY backend, autonomy-aware)
-  3. refine.py → refined answer (PRIMARY backend)
-  4. persona/speak.py → Lyra personality (CLOUD backend, autonomy-aware)
-  ↓
-Returns persona answer to Relay
-  ↓
-Relay → POST /ingest (async)
-  ↓
-Cortex → add_exchange_internal() → SESSIONS buffer
-  ↓
-Autonomy System → Update self_state.json (pattern tracking)
-  ↓
-Relay → POST /sessions/:id (save session to file)
-  ↓
-Relay → UI (returns final response)
-
-Note: NeoMem integration disabled in v0.6.0
-```
-
-### Session Persistence Flow (NEW in v0.7.0):
-
-```
-UI loads → GET /sessions → Relay → List all sessions from files → UI dropdown
-User sends message → POST /sessions/:id → Relay → Save to sessions/*.json
-User renames session → PATCH /sessions/:id/metadata → Relay → Update *.meta.json
-User deletes session → DELETE /sessions/:id → Relay → Remove session files
-
-Sessions stored in: core/relay/sessions/
- {sessionId}.json (conversation history)
- {sessionId}.meta.json (name, timestamps, metadata)
-```
-
-### Cortex 4-Stage Reasoning Pipeline:
-
-1. **Reflection** (`reflection.py`) - Cloud LLM (OpenAI)
-   - Analyzes user intent and conversation context
-   - Generates meta-awareness notes
-   - "What is the user really asking?"
-
-2. **Reasoning** (`reasoning.py`) - Primary LLM (llama.cpp)
-   - Retrieves short-term context from Intake module
-   - Creates initial draft answer
-   - Integrates context, reflection notes, and user prompt
-
-3. **Refinement** (`refine.py`) - Primary LLM (llama.cpp)
-   - Polishes the draft answer
-   - Improves clarity and coherence
-   - Ensures factual consistency
-
-4. **Persona** (`speak.py`) - Cloud LLM (OpenAI)
-   - Applies Lyra's personality and speaking style
-   - Natural, conversational output
-   - Final answer returned to user
-
---
-
-## Features
-
-### Core Services
-
-**Relay**:
- Main orchestrator and message router
- OpenAI-compatible endpoint: `POST /v1/chat/completions`
- Internal endpoint: `POST /chat`
- Health check: `GET /_health`
- **NEW:** Dual-mode routing (Standard/Cortex)
- **NEW:** Server-side session persistence with CRUD API
- **NEW:** Session management endpoints:
-  - `GET /sessions` - List all sessions
-  - `GET /sessions/:id` - Retrieve session history
-  - `POST /sessions/:id` - Save session history
-  - `PATCH /sessions/:id/metadata` - Update session metadata
-  - `DELETE /sessions/:id` - Delete session
- Async non-blocking calls to Cortex
- Shared request handler for code reuse
- Comprehensive error handling
-
-**NeoMem (Memory Engine)**:
- Forked from Mem0 OSS - fully independent
- Drop-in compatible API (`/memories`, `/search`)
- Local-first: runs on FastAPI with Postgres + Neo4j
- No external SDK dependencies
- Semantic memory updates - compares embeddings and performs in-place updates
- Default service: `neomem-api` (port 7077)
-
-**UI**:
- Lightweight static HTML chat interface
- Cyberpunk theme with light/dark mode toggle
- **NEW:** Mode selector (Standard/Cortex) in header
- **NEW:** Settings modal (⚙ button) with:
-  - Backend selection for Standard Mode (SECONDARY/OPENAI/custom)
-  - Session management (view, delete sessions)
-  - Theme toggle (dark mode default)
- **NEW:** Server-synced session management
-  - Sessions persist across browsers and reboots
-  - Rename sessions with custom names
-  - Delete sessions with confirmation
-  - Automatic session save on every message
- OpenAI message format support
-
-### Reasoning Layer
-
-**Cortex** (v0.7.0):
- **NEW:** Dual operating modes:
-  - **Standard Mode** - Simple chat with context (`/simple` endpoint)
-    - User-selectable backend (SECONDARY, OPENAI, or custom)
-    - Full conversation history via Intake integration
-    - Bypasses reasoning pipeline for faster responses
-  - **Cortex Mode** - Full reasoning pipeline (`/reason` endpoint)
-    - Multi-stage processing: reflection → reasoning → refine → persona
-    - Per-stage backend selection
-    - Autonomy system integration
- Flexible LLM backend routing via HTTP
- Async processing throughout
- Embedded Intake module for short-term context
- `/reason`, `/simple`, `/ingest`, `/health`, `/debug/sessions`, `/debug/summary` endpoints
- Lenient error handling - never fails the chat pipeline
-
-**Intake** (Embedded Module):
- **Architectural change**: Now runs as Python module inside Cortex container
- In-memory SESSIONS management (session_id → buffer)
- Multi-level summarization: L1 (ultra-short), L5 (short), L10 (medium), L20 (detailed), L30 (full)
- Deferred summarization strategy - summaries generated during `/reason` call
- `bg_summarize()` is a logging stub - actual work deferred
- **Single-worker constraint**: SESSIONS requires single Uvicorn worker or Redis/shared storage
-
-**LLM Router**:
- Dynamic backend selection via HTTP
- Environment-driven configuration
- Support for llama.cpp, Ollama, OpenAI, custom endpoints
- Per-module backend preferences:
-  - `CORTEX_LLM=SECONDARY` (Ollama for reasoning)
-  - `INTAKE_LLM=PRIMARY` (llama.cpp for summarization)
-  - `SPEAK_LLM=OPENAI` (Cloud for persona)
-  - `NEOMEM_LLM=PRIMARY` (llama.cpp for memory operations)
-
-### Beta Lyrae (RAG Memory DB) - Currently Disabled
-
- **RAG Knowledge DB - Beta Lyrae (sheliak)**
-  - This module implements the **Retrieval-Augmented Generation (RAG)** layer for Project Lyra.
-  - It serves as the long-term searchable memory store that Cortex and Relay can query for relevant context before reasoning or response generation.
-  - **Status**: Disabled in docker-compose.yml (v0.5.1)
-
-The system uses:
- **ChromaDB** for persistent vector storage
- **OpenAI Embeddings (`text-embedding-3-small`)** for semantic similarity
- **FastAPI** (port 7090) for the `/rag/search` REST endpoint
-
-Directory Layout:
-```
-rag/
-├── rag_chat_import.py    # imports JSON chat logs
-├── rag_docs_import.py    # (planned) PDF/EPUB/manual importer
-├── rag_build.py          # legacy single-folder builder
-├── rag_query.py          # command-line query helper
-├── rag_api.py            # FastAPI service providing /rag/search
-├── chromadb/             # persistent vector store
-├── chatlogs/             # organized source data
-│   ├── poker/
-│   ├── work/
-│   ├── lyra/
-│   ├── personal/
-│   └── ...
-└── import.log            # progress log for batch runs
-```
-
-**OpenAI chatlog importer features:**
- Recursive folder indexing with **category detection** from directory name
- Smart chunking for long messages (5,000 chars per slice)
- Automatic deduplication using SHA-1 hash of file + chunk
- Timestamps for both file modification and import time
- Full progress logging via tqdm
- Safe to run in background with `nohup … &`
-
---
-
-## Docker Deployment
-
-All services run in a single docker-compose stack with the following containers:
-
-**Active Services:**
- **relay** - Main orchestrator (port 7078)
- **cortex** - Reasoning engine with embedded Intake and Autonomy System (port 7081)
-
-**Disabled Services (v0.6.0):**
- **neomem-postgres** - PostgreSQL with pgvector extension (port 5432) - *disabled while refining pipeline*
- **neomem-neo4j** - Neo4j graph database (ports 7474, 7687) - *disabled while refining pipeline*
- **neomem-api** - NeoMem memory service (port 7077) - *disabled while refining pipeline*
- **intake** - No longer needed (embedded in Cortex as of v0.5.1)
- **rag** - Beta Lyrae RAG service (port 7090) - currently disabled
-
-All containers communicate via the `lyra_net` Docker bridge network.
-
-## External LLM Services
-
-The following LLM backends are accessed via HTTP (not part of docker-compose):
-
- **llama.cpp Server** (`http://10.0.0.44:8080`)
-  - AMD MI50 GPU-accelerated inference
-  - Primary backend for reasoning and refinement stages
-  - Model path: `/model`
-
- **Ollama Server** (`http://10.0.0.3:11434`)
-  - RTX 3090 GPU-accelerated inference
-  - Secondary/configurable backend
-  - Model: qwen2.5:7b-instruct-q4_K_M
-
- **OpenAI API** (`https://api.openai.com/v1`)
-  - Cloud-based inference
-  - Used for reflection and persona stages
-  - Model: gpt-4o-mini
-
- **Fallback Server** (`http://10.0.0.41:11435`)
-  - Emergency backup endpoint
-  - Local llama-3.2-8b-instruct model
-
---
-
-## Version History
-
-### v0.9.0 (2025-12-29) - Current Release
-**Major Feature: Trilium Notes Integration**
- ✅ Added Trilium ETAPI integration for knowledge base access
- ✅ `search_notes()` tool for searching personal notes during conversations
- ✅ `create_note()` tool for capturing insights and information
- ✅ ETAPI authentication with secure token management
- ✅ Complete setup documentation and API reference
- ✅ Environment configuration with feature flag (`ENABLE_TRILIUM`)
- ✅ Automatic parent note handling (defaults to "root")
- ✅ Connection error handling and user-friendly messages
-
-**Key Capabilities:**
- Search your Trilium notes during conversations for context
- Create new notes from conversation insights automatically
- Cross-reference information between chat and knowledge base
- Future: Find duplicates, suggest organization, summarize notes
-
-**Documentation:**
- Added [TRILIUM_SETUP.md](TRILIUM_SETUP.md) - Complete setup guide
- Added [docs/TRILIUM_API.md](docs/TRILIUM_API.md) - Full API reference
-
-### v0.8.0 (2025-12-26)
-**Major Feature: Agentic Tool Calling + "Show Your Work"**
- ✅ Added tool calling system for Standard Mode
- ✅ Real-time thinking stream visualization
- ✅ Sandboxed code execution (Python, JavaScript, Bash)
- ✅ Web search integration via Tavily API
- ✅ Server-Sent Events (SSE) for live tool execution updates
-
-### v0.7.0 (2025-12-21)
-**Major Features: Standard Mode + Backend Selection + Session Persistence**
- ✅ Added Standard Mode for simple chatbot functionality
- ✅ UI mode selector (Standard/Cortex) in header
- ✅ Settings modal with backend selection for Standard Mode
- ✅ Server-side session persistence with file-based storage
- ✅ Session management UI (view, rename, delete sessions)
- ✅ Light/Dark mode toggle (dark by default)
- ✅ Context retention in Standard Mode via Intake integration
- ✅ Fixed modal positioning and z-index issues
- ✅ Cortex `/simple` endpoint for direct LLM calls
- ✅ Session CRUD API in Relay
- ✅ Full backward compatibility - Cortex Mode unchanged
-
-**Key Changes:**
- Standard Mode bypasses 6 of 7 reasoning stages for faster responses
- Sessions now sync across browsers and survive container restarts
- User can select SECONDARY (Ollama), OPENAI, or custom backend for Standard Mode
- Theme preference and backend selection persisted in localStorage
- Session files stored in `core/relay/sessions/` directory
-
-### v0.6.0 (2025-12-18)
-**Major Feature: Autonomy System (Phase 1, 2, and 2.5)**
- ✅ Added autonomous decision-making framework
- ✅ Implemented executive planning and goal-setting layer
- ✅ Added pattern learning system for adaptive behavior
- ✅ Implemented proactive monitoring capabilities
- ✅ Created self-analysis and performance tracking system
- ✅ Integrated self-state persistence (`cortex/data/self_state.json`)
- ✅ Built decision engine with orchestrator coordination
- ✅ Added autonomous action execution framework
- ✅ Integrated autonomy into reasoning and persona layers
- ✅ Created comprehensive test suites for autonomy features
- ✅ Added complete system breakdown documentation
-
-**Architecture Changes:**
- Autonomy system integrated into Cortex reasoning pipeline
- Multi-layered autonomous decision-making architecture
- Self-state tracking across sessions
- NeoMem disabled by default while refining pipeline integration
- Enhanced orchestrator with flexible service controls
-
-**Documentation:**
- Added [PROJECT_LYRA_COMPLETE_BREAKDOWN.md](docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md)
- Updated changelog with comprehensive autonomy system details
-
-### v0.5.1 (2025-12-11)
-**Critical Intake Integration Fixes:**
- ✅ Fixed `bg_summarize()` NameError preventing SESSIONS persistence
- ✅ Fixed `/ingest` endpoint unreachable code
- ✅ Added `cortex/intake/__init__.py` for proper package structure
- ✅ Added diagnostic logging to verify SESSIONS singleton behavior
- ✅ Added `/debug/sessions` and `/debug/summary` endpoints
- ✅ Documented single-worker constraint in Dockerfile
- ✅ Implemented lenient error handling (never fails chat pipeline)
- ✅ Intake now embedded in Cortex - no longer standalone service
-
-**Architecture Changes:**
- Intake module runs inside Cortex container as pure Python import
- No HTTP calls between Cortex and Intake (internal function calls)
- SESSIONS persist correctly in Uvicorn worker
- Deferred summarization strategy (summaries generated during `/reason`)
-
-### v0.5.0 (2025-11-28)
- ✅ Fixed all critical API wiring issues
- ✅ Added OpenAI-compatible endpoint to Relay (`/v1/chat/completions`)
- ✅ Fixed Cortex → Intake integration
- ✅ Added missing Python package `__init__.py` files
- ✅ End-to-end message flow verified and working
-
-### Infrastructure v1.0.0 (2025-11-26)
- Consolidated 9 scattered `.env` files into single source of truth
- Multi-backend LLM strategy implemented
- Docker Compose consolidation
- Created `.env.example` security templates
-
-### v0.4.x (Major Rewire)
- Cortex multi-stage reasoning pipeline
- LLM router with multi-backend support
- Major architectural restructuring
-
-### v0.3.x
- Beta Lyrae RAG system
- NeoMem integration
- Basic Cortex reasoning loop
-
---
-
-## Known Issues (v0.7.0)
-
-### Temporarily Disabled
- **NeoMem disabled by default** - Being refined independently before full integration
-  - PostgreSQL + pgvector storage inactive
-  - Neo4j graph database inactive
-  - Memory persistence endpoints not active
- RAG service (Beta Lyrae) currently disabled in docker-compose.yml
-
-### Standard Mode Limitations
- No reflection, reasoning, or refinement stages (by design)
- DeepSeek R1 not recommended for Standard Mode (generates reasoning artifacts)
- No RAG integration (same as Cortex Mode - currently disabled)
- No NeoMem memory storage (same as Cortex Mode - currently disabled)
-
-### Session Management Limitations
- Sessions stored in container filesystem - requires volume mount for true persistence
- No session import/export functionality yet
- No session search or filtering
- Old localStorage sessions don't automatically migrate to server
-
-### Operational Notes
- **Single-worker constraint**: Cortex must run with single Uvicorn worker to maintain SESSIONS state
-  - Multi-worker scaling requires migrating SESSIONS to Redis or shared storage
- Diagnostic endpoints (`/debug/sessions`, `/debug/summary`) available for troubleshooting
- Backend selection only affects Standard Mode - Cortex Mode uses environment-configured backends
-
-### Future Enhancements
- Re-enable NeoMem integration after pipeline refinement
- Full autonomy system maturation and optimization
- Re-enable RAG service integration
- Session import/export functionality
- Session search and filtering UI
- Migrate SESSIONS to Redis for multi-worker support
- Add request correlation IDs for tracing
- Comprehensive health checks across all services
- Enhanced pattern learning with long-term memory integration
-
---
-
-## Quick Start
-
-### Prerequisites
- Docker + Docker Compose
- At least one HTTP-accessible LLM endpoint (llama.cpp, Ollama, or OpenAI API key)
-
-### Setup
-1. Copy `.env.example` to `.env` and configure your LLM backend URLs and API keys:
-   ```bash
-   # Required: Configure at least one LLM backend
-   LLM_PRIMARY_URL=http://10.0.0.44:8080       # llama.cpp
-   LLM_SECONDARY_URL=http://10.0.0.3:11434     # Ollama
-   OPENAI_API_KEY=sk-...                        # OpenAI
-   ```
-
-2. Start all services with docker-compose:
-   ```bash
-   docker-compose up -d
-   ```
-
-3. Check service health:
-   ```bash
-   # Relay health
-   curl http://localhost:7078/_health
-
-   # Cortex health
-   curl http://localhost:7081/health
-
-   # NeoMem health
-   curl http://localhost:7077/health
-   ```
-
-4. Access the UI at `http://localhost:8081`
-
-### Using the UI
-
-**Mode Selection:**
- Use the **Mode** dropdown in the header to switch between:
-  - **Standard** - Simple chatbot for coding and practical tasks
-  - **Cortex** - Full reasoning pipeline with autonomy features
-
-**Settings Menu:**
-1. Click the **⚙ Settings** button in the header
-2. **Backend Selection** (Standard Mode only):
-   - Choose **SECONDARY** (Ollama/Qwen on 3090) - Fast, local
-   - Choose **OPENAI** (GPT-4o-mini) - Cloud-based, high quality
-   - Enter custom backend name for advanced configurations
-3. **Session Management**:
-   - View all saved sessions with message counts and timestamps
-   - Click 🗑️ to delete unwanted sessions
-4. **Theme Toggle**:
-   - Click **🌙 Dark Mode** or **☀️ Light Mode** to switch themes
-
-**Session Management:**
- Sessions automatically save on every message
- Use the **Session** dropdown to switch between sessions
- Click **➕ New** to create a new session
- Click **✏️ Rename** to rename the current session
- Sessions persist across browsers and container restarts
-
-### Test
-
-**Test Standard Mode:**
 ```bash
-curl -X POST http://localhost:7078/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "mode": "standard",
-    "backend": "SECONDARY",
-    "messages": [{"role": "user", "content": "Hello!"}],
-    "sessionId": "test"
-  }'
+uv sync
+cp .env.example .env
+# fill in ANTHROPIC_API_KEY and point LOCAL_BASE_URL at your Ollama
 ```

-**Test Cortex Mode (Full Reasoning):**
-```bash
-curl -X POST http://localhost:7078/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "mode": "cortex",
-    "messages": [{"role": "user", "content": "Hello Lyra!"}],
-    "sessionId": "test"
-  }'
-```
+## Architecture

-**Test Cortex /ingest endpoint:**
-```bash
-curl -X POST http://localhost:7081/ingest \
-  -H "Content-Type: application/json" \
-  -d '{
-    "session_id": "test",
-    "user_msg": "Hello",
-    "assistant_msg": "Hi there!"
-  }'
-```
-
-**Inspect SESSIONS state:**
-```bash
-curl http://localhost:7081/debug/sessions
-```
-
-**Get summary for a session:**
-```bash
-curl "http://localhost:7081/debug/summary?session_id=test"
-```
-
-**List all sessions:**
-```bash
-curl http://localhost:7078/sessions
-```
-
-**Get session history:**
-```bash
-curl http://localhost:7078/sessions/sess-abc123
-```
-
-**Delete a session:**
-```bash
-curl -X DELETE http://localhost:7078/sessions/sess-abc123
-```
-
-All backend databases (PostgreSQL and Neo4j) are automatically started as part of the docker-compose stack.
-
---
-
-## Environment Variables
-
-### LLM Backend Configuration
-
-**Backend URLs (Full API endpoints):**
-```bash
-LLM_PRIMARY_URL=http://10.0.0.44:8080           # llama.cpp
-LLM_PRIMARY_MODEL=/model
-
-LLM_SECONDARY_URL=http://10.0.0.3:11434         # Ollama
-LLM_SECONDARY_MODEL=qwen2.5:7b-instruct-q4_K_M
-
-LLM_OPENAI_URL=https://api.openai.com/v1
-LLM_OPENAI_MODEL=gpt-4o-mini
-OPENAI_API_KEY=sk-...
-```
-
-**Module-specific backend selection:**
-```bash
-CORTEX_LLM=SECONDARY         # Use Ollama for reasoning
-INTAKE_LLM=PRIMARY           # Use llama.cpp for summarization
-SPEAK_LLM=OPENAI             # Use OpenAI for persona
-NEOMEM_LLM=PRIMARY           # Use llama.cpp for memory
-UI_LLM=OPENAI                # Use OpenAI for UI
-RELAY_LLM=PRIMARY            # Use llama.cpp for relay
-STANDARD_MODE_LLM=SECONDARY  # Default backend for Standard Mode (NEW in v0.7.0)
-```
-
-### Database Configuration
-```bash
-POSTGRES_USER=neomem
-POSTGRES_PASSWORD=neomempass
-POSTGRES_DB=neomem
-POSTGRES_HOST=neomem-postgres
-POSTGRES_PORT=5432
-
-NEO4J_URI=bolt://neomem-neo4j:7687
-NEO4J_USERNAME=neo4j
-NEO4J_PASSWORD=neomemgraph
-```
-
-### Service URLs (Internal Docker Network)
-```bash
-NEOMEM_API=http://neomem-api:7077
-CORTEX_API=http://cortex:7081
-CORTEX_REASON_URL=http://cortex:7081/reason
-CORTEX_SIMPLE_URL=http://cortex:7081/simple      # NEW in v0.7.0
-CORTEX_INGEST_URL=http://cortex:7081/ingest
-RELAY_URL=http://relay:7078
-```
-
-### Feature Flags
-```bash
-CORTEX_ENABLED=true
-MEMORY_ENABLED=true
-PERSONA_ENABLED=false
-DEBUG_PROMPT=true
-VERBOSE_DEBUG=true
-ENABLE_TRILIUM=true          # NEW in v0.9.0
-```
-
-For complete environment variable reference, see [ENVIRONMENT_VARIABLES.md](ENVIRONMENT_VARIABLES.md).
-
---
-
-## Documentation
-
- [CHANGELOG.md](CHANGELOG.md) - Detailed version history
- [PROJECT_SUMMARY.md](PROJECT_SUMMARY.md) - Comprehensive project overview for AI context
- [ENVIRONMENT_VARIABLES.md](ENVIRONMENT_VARIABLES.md) - Environment variable reference
- [DEPRECATED_FILES.md](DEPRECATED_FILES.md) - Deprecated files and migration guide
-
---
-
-## Troubleshooting
-
-### SESSIONS not persisting
-**Symptom:** Intake buffer always shows 0 exchanges, summaries always empty.
-
-**Solution (Fixed in v0.5.1):**
- Ensure `cortex/intake/__init__.py` exists
- Check Cortex logs for `[Intake Module Init]` message showing SESSIONS object ID
- Verify single-worker mode (Dockerfile: `uvicorn main:app --workers 1`)
- Use `/debug/sessions` endpoint to inspect current state
-
-### Cortex connection errors
-**Symptom:** Relay can't reach Cortex, 502 errors.
-
-**Solution:**
- Verify Cortex container is running: `docker ps | grep cortex`
- Check Cortex health: `curl http://localhost:7081/health`
- Verify environment variables: `CORTEX_REASON_URL=http://cortex:7081/reason`
- Check docker network: `docker network inspect lyra_net`
-
-### LLM backend timeouts
-**Symptom:** Reasoning stage hangs or times out.
-
-**Solution:**
- Verify LLM backend is running and accessible
- Check LLM backend health: `curl http://10.0.0.44:8080/health`
- Increase timeout in llm_router.py if using slow models
- Check logs for specific backend errors
-
---
-
-## License
-
-NeoMem is a derivative work based on Mem0 OSS (Apache 2.0).
-© 2025 Terra-Mechanics / ServersDown Labs. All modifications released under Apache 2.0.
-
-**Built with Claude Code**
-
---
-
-## Integration Notes
-
- NeoMem API is compatible with Mem0 OSS endpoints (`/memories`, `/search`)
- All services communicate via Docker internal networking on the `lyra_net` bridge
- History and entity graphs are managed via PostgreSQL + Neo4j
- LLM backends are accessed via HTTP and configured in `.env`
- Intake module is imported internally by Cortex (no HTTP communication)
- SESSIONS state is maintained in-memory within Cortex container
-
---
-
-## Beta Lyrae - RAG Memory System (Currently Disabled)
-
-**Note:** The RAG service is currently disabled in docker-compose.yml
-
-### Requirements
- Python 3.10+
- Dependencies: `chromadb openai tqdm python-dotenv fastapi uvicorn`
- Persistent storage: `./chromadb` or `/mnt/data/lyra_rag_db`
-
-### Setup
-1. Import chat logs (must be in OpenAI message format):
-   ```bash
-   python3 rag/rag_chat_import.py
-   ```
-
-2. Build and start the RAG API server:
-   ```bash
-   cd rag
-   python3 rag_build.py
-   uvicorn rag_api:app --host 0.0.0.0 --port 7090
-   ```
-
-3. Query the RAG system:
-   ```bash
-   curl -X POST http://127.0.0.1:7090/rag/search \
-     -H "Content-Type: application/json" \
-     -d '{
-       "query": "What is the current state of Cortex?",
-       "where": {"category": "lyra"}
-     }'
-   ```
-
---
-
-## Development Notes
-
-### Cortex Architecture (v0.6.0)
- Cortex contains embedded Intake module at `cortex/intake/`
- Intake is imported as: `from intake.intake import add_exchange_internal, SESSIONS`
- SESSIONS is a module-level global dictionary (singleton pattern)
- Single-worker constraint required to maintain SESSIONS state
- Diagnostic endpoints available for debugging: `/debug/sessions`, `/debug/summary`
- **NEW:** Autonomy system integrated at `cortex/autonomy/`
-  - Executive, decision, action, learning, and monitoring layers
-  - Self-state persistence in `cortex/data/self_state.json`
-  - Coordinated via orchestrator with flexible service controls
-
-### Adding New LLM Backends
-1. Add backend URL to `.env`:
-   ```bash
-   LLM_CUSTOM_URL=http://your-backend:port
-   LLM_CUSTOM_MODEL=model-name
-   ```
-
-2. Configure module to use new backend:
-   ```bash
-   CORTEX_LLM=CUSTOM
-   ```
-
-3. Restart Cortex container:
-   ```bash
-   docker-compose restart cortex
-   ```
-
-### Debugging Tips
- Enable verbose logging: `VERBOSE_DEBUG=true` in `.env`
- Check Cortex logs: `docker logs cortex -f`
- Check Relay logs: `docker logs relay -f`
- Inspect SESSIONS: `curl http://localhost:7081/debug/sessions`
- Test summarization: `curl "http://localhost:7081/debug/summary?session_id=test"`
- List sessions: `curl http://localhost:7078/sessions`
- Test Standard Mode: `curl -X POST http://localhost:7078/v1/chat/completions -H "Content-Type: application/json" -d '{"mode":"standard","backend":"SECONDARY","messages":[{"role":"user","content":"test"}],"sessionId":"test"}'`
- Monitor Docker network: `docker network inspect lyra_net`
- Check session files: `ls -la core/relay/sessions/`
+The long-term target is the cognitive split in `docs/ARCH_v0-6-1.md` — Inner Self as the seat of consciousness, Executive for hard reasoning, Cortex Chat for drafting, Persona for voice. The MVP implements only the chat + memory baseline. Cognitive layers come back one at a time.
@@ -1,163 +0,0 @@
-# "Show Your Work" - Thinking Stream Feature
-
-Real-time Server-Sent Events (SSE) stream that broadcasts the internal thinking process during tool calling operations.
-
-## What It Does
-
-When Lyra uses tools to answer a question, you can now watch her "think" in real-time through a parallel stream:
-
- 🤔 **Thinking** - When she's planning what to do
- 🔧 **Tool Calls** - When she decides to use a tool
- 📊 **Tool Results** - The results from tool execution
- ✅ **Done** - When she has the final answer
- ❌ **Errors** - If something goes wrong
-
-## How To Use
-
-### 1. Open the SSE Stream
-
-Connect to the thinking stream for a session:
-
-```bash
-curl -N http://localhost:7081/stream/thinking/{session_id}
-```
-
-The stream will send Server-Sent Events in this format:
-
-```
-data: {"type": "thinking", "data": {"message": "🤔 Thinking... (iteration 1/5)"}}
-
-data: {"type": "tool_call", "data": {"tool": "execute_code", "args": {...}, "message": "🔧 Using tool: execute_code"}}
-
-data: {"type": "tool_result", "data": {"tool": "execute_code", "result": {...}, "message": "📊 Result: ..."}}
-
-data: {"type": "done", "data": {"message": "✅ Complete!", "final_answer": "The result is..."}}
-```
-
-### 2. Send a Request
-
-In parallel, send a request to `/simple` with the same `session_id`:
-
-```bash
-curl -X POST http://localhost:7081/simple \
-  -H "Content-Type: application/json" \
-  -d '{
-    "session_id": "your-session-id",
-    "user_prompt": "Calculate 50/2 using Python",
-    "backend": "SECONDARY"
-  }'
-```
-
-### 3. Watch the Stream
-
-As the request processes, you'll see real-time events showing:
- Each thinking iteration
- Every tool call being made
- The results from each tool
- The final answer
-
-## Event Types
-
-| Event Type | Description | Data Fields |
-|-----------|-------------|-------------|
-| `connected` | Initial connection | `session_id` |
-| `thinking` | LLM is processing | `message` |
-| `tool_call` | Tool is being invoked | `tool`, `args`, `message` |
-| `tool_result` | Tool execution completed | `tool`, `result`, `message` |
-| `done` | Process complete | `message`, `final_answer` |
-| `error` | Something went wrong | `message` |
-
-## Demo Page
-
-A demo HTML page is included at [test_thinking_stream.html](../test_thinking_stream.html):
-
-```bash
-# Serve the demo page
-python3 -m http.server 8000
-```
-
-Then open http://localhost:8000/test_thinking_stream.html in your browser.
-
-The demo shows:
- **Left panel**: Chat interface
- **Right panel**: Real-time thinking stream
- **Mobile**: Swipe between panels
-
-## Architecture
-
-### Components
-
-1. **ToolStreamManager** (`autonomy/tools/stream_events.py`)
-   - Manages SSE subscriptions per session
-   - Broadcasts events to all connected clients
-   - Handles automatic cleanup
-
-2. **FunctionCaller** (`autonomy/tools/function_caller.py`)
-   - Enhanced with event emission at each step
-   - Checks for active subscribers before emitting
-   - Passes `session_id` through the call chain
-
-3. **SSE Endpoint** (`/stream/thinking/{session_id}`)
-   - FastAPI streaming response
-   - 30-second keepalive for connection maintenance
-   - Automatic reconnection on client side
-
-### Event Flow
-
-```
-Client                 SSE Endpoint           FunctionCaller          Tools
-  |                         |                         |                  |
-  |--- Connect SSE -------->|                         |                  |
-  |<-- connected ----------|                          |                  |
-  |                         |                         |                  |
-  |--- POST /simple --------|                         |                  |
-  |                         |                         |                  |
-  |                         |<-- emit("thinking") ---|                  |
-  |<-- thinking ------------|                         |                  |
-  |                         |                         |                  |
-  |                         |<-- emit("tool_call") ---|                  |
-  |<-- tool_call -----------|                         |                  |
-  |                         |                         |-- execute ------>|
-  |                         |                         |<-- result -------|
-  |                         |<-- emit("tool_result")--|                  |
-  |<-- tool_result ---------|                         |                  |
-  |                         |                         |                  |
-  |                         |<-- emit("done") --------|                  |
-  |<-- done ---------------|                         |                  |
-  |                         |                         |                  |
-```
-
-## Configuration
-
-No additional configuration needed! The feature works automatically when:
-1. `STANDARD_MODE_ENABLE_TOOLS=true` (already set)
-2. A client connects to the SSE stream BEFORE sending the request
-
-## Example Output
-
-```
-🟢 Connected to thinking stream
-✓ Connected (Session: thinking-demo-1735177234567)
-🤔 Thinking... (iteration 1/5)
-🔧 Using tool: execute_code
-📊 Result: {'stdout': '12.0\n', 'stderr': '', 'exit_code': 0, 'execution_time': 0.04}
-🤔 Thinking... (iteration 2/5)
-✅ Complete!
-```
-
-## Use Cases
-
- **Debugging**: See exactly what tools are being called and why
- **Transparency**: Show users what the AI is doing behind the scenes
- **Education**: Learn how the system breaks down complex tasks
- **UI Enhancement**: Create engaging "thinking" animations
- **Mobile App**: Separate tab for "Show Your Work" view
-
-## Future Enhancements
-
-Potential additions:
- Token usage per iteration
- Estimated time remaining
- Tool execution duration
- Intermediate reasoning steps
- Visual progress indicators
@@ -1,159 +0,0 @@
-# Trilium ETAPI Integration Setup
-
-This guide will help you enable Lyra's integration with your Trilium notes using the ETAPI (External API).
-
-## What You Can Do with Trilium Integration
-
-Once enabled, Lyra can help you:
- 🔍 Search through your notes
- 📝 Create new notes from conversations
- 🔄 Find duplicate or similar notes
- 🏷️ Suggest better organization and tags
- 📊 Summarize and update existing notes
-
-## Prerequisites
-
- Trilium Notes installed and running
- Access to Trilium's web interface
- Lyra running on the same network as Trilium
-
-## Step 1: Generate ETAPI Token in Trilium
-
-1. **Open Trilium** in your web browser (e.g., `http://10.0.0.2:4292`)
-
-2. **Navigate to Options**:
-   - Click the menu icon (≡) in the top-left corner
-   - Select **"Options"** from the menu
-
-3. **Go to ETAPI Section**:
-   - In the Options sidebar, find and click **"ETAPI"**
-   - This section manages external API access
-
-4. **Generate a New Token**:
-   - Look for the **"Create New Token"** or **"Generate Token"** button
-   - Click it to create a new ETAPI token
-   - You may be asked to provide a name/description for the token (e.g., "Lyra Integration")
-
-5. **Copy the Token**:
-   - Once generated, you'll see a long string of characters (this is your token)
-   - **IMPORTANT**: Copy this token immediately - Trilium stores it hashed and you won't see it again!
-   - The token message will say: "ETAPI token created, copy the created token into the clipboard"
-   - Example format: `3ZOIydvNps3R_fZEE+kOFXiJlJ7vaeXHMEW6QuRYQm3+6qpjVxFwp9LE=`
-
-6. **Save the Token Securely**:
-   - Store it temporarily in a secure place (password manager or secure note)
-   - You'll need to paste it into Lyra's configuration in the next step
-
-## Step 2: Configure Lyra
-
-1. **Edit the Environment File**:
-   ```bash
-   nano /home/serversdown/project-lyra/.env
-   ```
-
-2. **Add/Update Trilium Configuration**:
-   Find or add these lines:
-   ```env
-   # Trilium ETAPI Integration
-   ENABLE_TRILIUM=true
-   TRILIUM_URL=http://10.0.0.2:4292
-   TRILIUM_ETAPI_TOKEN=your_token_here
-
-   # Enable tools in standard mode (if not already set)
-   STANDARD_MODE_ENABLE_TOOLS=true
-   ```
-
-3. **Replace `your_token_here`** with the actual token you copied from Trilium
-
-4. **Save and exit** (Ctrl+O, Enter, Ctrl+X in nano)
-
-## Step 3: Restart Cortex Service
-
-For the changes to take effect, restart the Cortex service:
-
-```bash
-cd /home/serversdown/project-lyra
-docker-compose restart cortex
-```
-
-Or if running with Docker directly:
-```bash
-docker restart cortex
-```
-
-## Step 4: Test the Integration
-
-Once restarted, try these example queries in Lyra (using Cortex mode):
-
-1. **Test Search**:
-   - "Search my Trilium notes for topics about AI"
-   - "Find notes containing 'project planning'"
-
-2. **Test Create Note**:
-   - "Create a note in Trilium titled 'Meeting Notes' with a summary of our conversation"
-   - "Save this to my Trilium as a new note"
-
-3. **Watch the Thinking Stream**:
-   - Open the thinking stream panel (🧠 Show Work)
-   - You should see tool calls to `search_notes` and `create_note`
-
-## Troubleshooting
-
-### "Connection refused" or "Cannot reach Trilium"
- Verify Trilium is running: `curl http://10.0.0.2:4292`
- Check that Cortex can access Trilium's network
- Ensure the URL in `.env` is correct
-
-### "Authentication failed" or "Invalid token"
- Double-check the token was copied correctly (no extra spaces)
- Generate a new token in Trilium if needed
- Verify `TRILIUM_ETAPI_TOKEN` in `.env` is set correctly
-
-### "No results found" when searching
- Verify you have notes in Trilium
- Try a broader search query
- Check Trilium's search functionality works directly
-
-### Tools not appearing in Cortex mode
- Verify `ENABLE_TRILIUM=true` is set
- Restart Cortex after changing `.env`
- Check Cortex logs: `docker logs cortex`
-
-## Security Notes
-
-⚠️ **Important Security Considerations**:
-
- The ETAPI token provides **full access** to your Trilium notes
- Keep the token secure - do not share or commit to git
- The `.env` file should be in `.gitignore` (already configured)
- Consider using a dedicated token for Lyra (you can create multiple tokens)
- Revoke tokens you no longer use from Trilium's ETAPI settings
-
-## Available Functions
-
-Currently enabled functions:
-
-### `search_notes(query, limit)`
-Search through your Trilium notes by keyword or phrase.
-
-**Example**: "Search my notes for 'machine learning' and show the top 5 results"
-
-### `create_note(title, content, parent_note_id)`
-Create a new note in Trilium with specified title and content.
-
-**Example**: "Create a note called 'Ideas from Today' with this summary: [content]"
-
-**Optional**: Specify a parent note ID to nest the new note under an existing note.
-
-## Future Enhancements
-
-Potential additions to the integration:
- Update existing notes
- Retrieve full note content by ID
- Manage tags and attributes
- Clone/duplicate notes
- Export notes in various formats
-
---
-
-**Need Help?** Check the Cortex logs or open an issue on the project repository.
@@ -1,109 +0,0 @@
-# Thinking Stream UI Integration
-
-## What Was Added
-
-Added a "🧠 Show Work" button to the main chat interface that opens a dedicated thinking stream window.
-
-## Changes Made
-
-### 1. Main Chat Interface ([core/ui/index.html](core/ui/index.html))
-
-Added button to session selector:
-```html
-<button id="thinkingStreamBtn" title="Show thinking stream in new window">🧠 Show Work</button>
-```
-
-Added event listener to open stream window:
-```javascript
-document.getElementById("thinkingStreamBtn").addEventListener("click", () => {
-  const streamUrl = `/thinking-stream.html?session=${currentSession}`;
-  const windowFeatures = "width=600,height=800,menubar=no,toolbar=no,location=no,status=no";
-  window.open(streamUrl, `thinking_${currentSession}`, windowFeatures);
-});
-```
-
-### 2. Thinking Stream Window ([core/ui/thinking-stream.html](core/ui/thinking-stream.html))
-
-New dedicated page for the thinking stream:
- **Header**: Shows connection status with live indicator
- **Events Area**: Scrollable list of thinking events
- **Footer**: Clear button and session info
-
-Features:
- Auto-reconnecting SSE connection
- Color-coded event types
- Slide-in animations for new events
- Automatic scrolling to latest event
- Session ID from URL parameter
-
-### 3. Styling ([core/ui/style.css](core/ui/style.css))
-
-Added purple/violet theme for the thinking button:
-```css
-#thinkingStreamBtn {
-  background: rgba(138, 43, 226, 0.2);
-  border-color: #8a2be2;
-}
-```
-
-## How To Use
-
-1. **Open Chat Interface**
-   - Navigate to http://localhost:7078 (relay)
-   - Select or create a session
-
-2. **Open Thinking Stream**
-   - Click the "🧠 Show Work" button
-   - A new window opens showing the thinking stream
-
-3. **Send a Message**
-   - Type a message that requires tools (e.g., "Calculate 50/2 in Python")
-   - Watch the thinking stream window for real-time updates
-
-4. **Observe Events**
-   - 🤔 Thinking iterations
-   - 🔧 Tool calls
-   - 📊 Tool results
-   - ✅ Completion
-
-## Event Types & Colors
-
-| Event | Icon | Color | Description |
-|-------|------|-------|-------------|
-| Connected | ✓ | Green | Stream established |
-| Thinking | 🤔 | Light Green | LLM processing |
-| Tool Call | 🔧 | Orange | Tool invocation |
-| Tool Result | 📊 | Blue | Tool output |
-| Done | ✅ | Purple | Task complete |
-| Error | ❌ | Red | Something failed |
-
-## Architecture
-
-```
-User clicks "Show Work"
-        ↓
-Opens thinking-stream.html?session=xxx
-        ↓
-Connects to SSE: /stream/thinking/{session}
-        ↓
-User sends message in main chat
-        ↓
-FunctionCaller emits events
-        ↓
-Events appear in thinking stream window
-```
-
-## Mobile Support
-
-The thinking stream window is responsive:
- Desktop: Side-by-side windows
- Mobile: Use browser's tab switcher to swap between chat and thinking stream
-
-## Future Enhancements
-
-Potential improvements:
- **Embedded panel**: Option to show thinking stream in a split panel within main UI
- **Event filtering**: Toggle event types on/off
- **Export**: Download thinking trace as JSON
- **Replay**: Replay past thinking sessions
- **Statistics**: Show timing, token usage per step
@@ -1,14 +0,0 @@
-FROM node:18-alpine
-
-WORKDIR /app
-
-# install deps
-COPY package.json ./package.json
-RUN npm install --production
-
-# copy code + config
-COPY persona-server.js ./persona-server.js
-COPY personas.json ./personas.json
-
-EXPOSE 7080
-CMD ["node", "persona-server.js"]
@@ -1,8 +0,0 @@
-{
-  "name": "persona-sidecar",
-  "version": "0.1.0",
-  "type": "module",
-  "dependencies": {
-    "express": "^4.19.2"
-  }
-}
@@ -1,78 +0,0 @@
-// persona-server.js — Persona Sidecar v0.1.0 (Docker Lyra)
-// Node 18+, Express REST
-
-import express from "express";
-import fs from "fs";
-
-const app = express();
-app.use(express.json());
-
-const PORT = process.env.PORT || 7080;
-const CONFIG_FILE = process.env.PERSONAS_FILE || "./personas.json";
-
-// allow JSON with // and /* */ comments
-function parseJsonWithComments(raw) {
-  return JSON.parse(
-    raw
-      .replace(/\/\*[\s\S]*?\*\//g, "")   // block comments
-      .replace(/^\s*\/\/.*$/gm, "")       // line comments
-  );
-}
-
-function loadConfig() {
-  const raw = fs.readFileSync(CONFIG_FILE, "utf-8");
-  return parseJsonWithComments(raw);
-}
-
-function saveConfig(cfg) {
-  fs.writeFileSync(CONFIG_FILE, JSON.stringify(cfg, null, 2));
-}
-
-// GET /persona → active persona JSON
-app.get("/persona", (_req, res) => {
-  try {
-    const cfg = loadConfig();
-    const active = cfg.active;
-    const persona = cfg.personas?.[active];
-    if (!persona) return res.status(404).json({ error: "Active persona not found" });
-    res.json({ active, persona });
-  } catch (err) {
-    res.status(500).json({ error: String(err.message || err) });
-  }
-});
-
-// GET /personas → all personas
-app.get("/personas", (_req, res) => {
-  try {
-    const cfg = loadConfig();
-    res.json(cfg.personas || {});
-  } catch (err) {
-    res.status(500).json({ error: String(err.message || err) });
-  }
-});
-
-// POST /persona/select { name }
-app.post("/persona/select", (req, res) => {
-  try {
-    const { name } = req.body || {};
-    if (!name) return res.status(400).json({ error: "Missing 'name'" });
-
-    const cfg = loadConfig();
-    if (!cfg.personas || !cfg.personas[name]) {
-      return res.status(404).json({ error: `Persona '${name}' not found` });
-    }
-    cfg.active = name;
-    saveConfig(cfg);
-    res.json({ ok: true, active: name });
-  } catch (err) {
-    res.status(500).json({ error: String(err.message || err) });
-  }
-});
-
-// health + fallback
-app.get("/_health", (_req, res) => res.json({ ok: true, time: new Date().toISOString() }));
-app.use((_req, res) => res.status(404).json({ error: "no such route" }));
-
-app.listen(PORT, () => {
-  console.log(`Persona Sidecar listening on :${PORT}`);
-});
@@ -1,17 +0,0 @@
-{
-  // v0.1.0 default active persona
-  "active": "Lyra",
-
-  // Personas available to the service
-  "personas": {
-    "Lyra": {
-      "name": "Lyra",
-      "style": "warm, slyly supportive, collaborative confidante",
-      "protocols": ["Project logs", "Confidence Bank", "Scar Notes"]
-    }
-  }
-
-  // Placeholders for later (commented out for now)
-  // "Doyle": { "name": "Doyle", "style": "gritty poker grinder", "protocols": [] },
-  // "Mr GPT": { "name": "Mr GPT", "style": "direct, tactical mentor", "protocols": [] }
-}
@@ -1,16 +0,0 @@
-# Ignore node_modules - Docker will rebuild them inside
-node_modules
-npm-debug.log
-yarn-error.log
-*.log
-
-# Ignore environment files
-.env
-.env.local
-
-# Ignore OS/editor cruft
-.DS_Store
-*.swp
-*.swo
-.vscode
-.idea
@@ -1,18 +0,0 @@
-# relay/Dockerfile
-FROM node:18-alpine
-
-# Create app directory
-WORKDIR /app
-
-# Copy package.json and install deps first (better caching)
-COPY package.json ./
-RUN npm install
-
-# Copy the rest of the app
-COPY . .
-
-# Expose port
-EXPOSE 7078
-
-# Run the server
-CMD ["npm", "start"]
@@ -1,73 +0,0 @@
-// relay/lib/cortex.js
-import fetch from "node-fetch";
-
-const REFLECT_URL = process.env.CORTEX_URL || "http://localhost:7081/reflect";
-const INGEST_URL  = process.env.CORTEX_URL_INGEST || "http://localhost:7081/ingest";
-
-export async function reflectWithCortex(userInput, memories = []) {
-  const body = { prompt: userInput, memories };
-  try {
-    const res = await fetch(REFLECT_URL, {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify(body),
-      timeout: 120000,
-    });
-
-    const rawText = await res.text();
-	console.log("🔎 [Cortex-Debug] rawText from /reflect →", rawText.slice(0, 300));
-    if (!res.ok) {
-      throw new Error(`HTTP ${res.status} — ${rawText.slice(0, 200)}`);
-    }
-
-    let data;
-    try {
-      data = JSON.parse(rawText);
-    } catch (err) {
-      // Fallback ① try to grab a JSON-looking block
-      const match = rawText.match(/\{[\s\S]*\}/);
-      if (match) {
-        try {
-          data = JSON.parse(match[0]);
-        } catch {
-          data = { reflection_raw: rawText.trim(), notes: "partial parse" };
-        }
-      } else {
-        // Fallback ② if it’s already an object (stringified Python dict)
-        try {
-          const normalized = rawText
-            .replace(/'/g, '"')        // convert single quotes
-            .replace(/None/g, 'null'); // convert Python None
-          data = JSON.parse(normalized);
-        } catch {
-          data = { reflection_raw: rawText.trim(), notes: "no JSON found" };
-        }
-      }
-    }
-
-    if (typeof data !== "object") {
-      data = { reflection_raw: rawText.trim(), notes: "non-object response" };
-    }
-
-    console.log("🧠 Cortex reflection normalized:", data);
-    return data;
-  } catch (e) {
-    console.warn("⚠️ Cortex reflect failed:", e.message);
-    return { error: e.message, reflection_raw: "" };
-  }
-}
-
-export async function ingestToCortex(user, assistant, reflection = {}, sessionId = "default") {
-  const body = { turn: { user, assistant }, reflection, session_id: sessionId };
-  try {
-    const res = await fetch(INGEST_URL, {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify(body),
-      timeout: 120000,
-    });
-    console.log(`📤 Sent exchange to Cortex ingest (${res.status})`);
-  } catch (e) {
-    console.warn("⚠️ Cortex ingest failed:", e.message);
-  }
-}
@@ -1,161 +0,0 @@
-async function tryBackend(backend, messages) {
-  if (!backend.url || !backend.model) throw new Error("missing url/model");
-
-  const isOllama = backend.type === "ollama";
-  const isOpenAI = backend.type === "openai";
-  const isVllm = backend.type === "vllm";
-  const isLlamaCpp = backend.type === "llamacpp";
-
-  let endpoint = backend.url;
-  let headers = { "Content-Type": "application/json" };
-  if (isOpenAI) headers["Authorization"] = `Bearer ${OPENAI_API_KEY}`;
-
-  // Choose correct endpoint automatically
-  if (isOllama && !endpoint.endsWith("/api/chat")) endpoint += "/api/chat";
-  if ((isVllm || isLlamaCpp) && !endpoint.endsWith("/v1/completions")) endpoint += "/v1/completions";
-  if (isOpenAI && !endpoint.endsWith("/v1/chat/completions")) endpoint += "/v1/chat/completions";
-
-  // Build payload based on backend style
-  const body = (isVllm || isLlamaCpp)
-    ? {
-        model: backend.model,
-        prompt: messages.map(m => m.content).join("\n"),
-        max_tokens: 400,
-        temperature: 0.3,
-      }
-    : isOllama
-    ? { model: backend.model, messages, stream: false }
-    : { model: backend.model, messages, stream: false };
-
-  const resp = await fetch(endpoint, {
-    method: "POST",
-    headers,
-    body: JSON.stringify(body),
-    timeout: 120000,
-  });
-  if (!resp.ok) throw new Error(`${backend.key} HTTP ${resp.status}`);
-  const raw = await resp.text();
-
-  // 🧩 Normalize replies
-  let reply = "";
-  let parsedData = null;
-
-  try {
-    if (isOllama) {
-      // Ollama sometimes returns NDJSON lines; merge them
-      const merged = raw
-        .split("\n")
-        .filter(line => line.trim().startsWith("{"))
-        .map(line => JSON.parse(line))
-        .map(obj => obj.message?.content || obj.response || "")
-        .join("");
-      reply = merged.trim();
-    } else {
-      parsedData = JSON.parse(raw);
-	  reply =
-	    parsedData?.choices?.[0]?.text?.trim() ||
-	    parsedData?.choices?.[0]?.message?.content?.trim() ||
-	    parsedData?.message?.content?.trim() ||
-	    "";
-    }
-  } catch (err) {
-    reply = `[parse error: ${err.message}]`;
-  }
-
-  return { reply, raw, parsedData, backend: backend.key };
-}
-
-// ------------------------------------
-// Structured logging helper
-// ------------------------------------
-const LOG_DETAIL = process.env.LOG_DETAIL_LEVEL || "summary"; // minimal | summary | detailed | verbose
-
-function logLLMCall(backend, messages, result, error = null) {
-  const timestamp = new Date().toISOString().split('T')[1].slice(0, -1);
-
-  if (error) {
-    // Always log errors
-    console.warn(`⚠️  [LLM] ${backend.key.toUpperCase()} failed | ${timestamp} | ${error.message}`);
-    return;
-  }
-
-  // Success - log based on detail level
-  if (LOG_DETAIL === "minimal") {
-    return; // Don't log successful calls in minimal mode
-  }
-
-  if (LOG_DETAIL === "summary") {
-    console.log(`✅ [LLM] ${backend.key.toUpperCase()} | ${timestamp} | Reply: ${result.reply.substring(0, 80)}...`);
-    return;
-  }
-
-  // Detailed or verbose
-  console.log(`\n${'─'.repeat(100)}`);
-  console.log(`🧠 LLM CALL | Backend: ${backend.key.toUpperCase()} | ${timestamp}`);
-  console.log(`${'─'.repeat(100)}`);
-
-  // Show prompt preview
-  const lastMsg = messages[messages.length - 1];
-  const promptPreview = (lastMsg?.content || '').substring(0, 150);
-  console.log(`📝 Prompt: ${promptPreview}...`);
-
-  // Show parsed reply
-  console.log(`💬 Reply: ${result.reply.substring(0, 200)}...`);
-
-  // Show raw response only in verbose mode
-  if (LOG_DETAIL === "verbose" && result.parsedData) {
-    console.log(`\n╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────`);
-    const jsonStr = JSON.stringify(result.parsedData, null, 2);
-    const lines = jsonStr.split('\n');
-    const maxLines = 50;
-
-    lines.slice(0, maxLines).forEach(line => {
-      console.log(`│ ${line}`);
-    });
-
-    if (lines.length > maxLines) {
-      console.log(`│ ... (${lines.length - maxLines} more lines - check raw field for full response)`);
-    }
-    console.log(`╰${'─'.repeat(95)}`);
-  }
-
-  console.log(`${'─'.repeat(100)}\n`);
-}
-
-// ------------------------------------
-// Export the main call helper
-// ------------------------------------
-export async function callSpeechLLM(messages) {
-  const backends = [
-    { key: "primary",  type: "vllm",     url: process.env.LLM_PRIMARY_URL,  model: process.env.LLM_PRIMARY_MODEL },
-    { key: "secondary",type: "ollama",   url: process.env.LLM_SECONDARY_URL,model: process.env.LLM_SECONDARY_MODEL },
-    { key: "cloud",    type: "openai",   url: process.env.LLM_CLOUD_URL,    model: process.env.LLM_CLOUD_MODEL },
-    { key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL },
-  ];
-
-  const failedBackends = [];
-
-  for (const b of backends) {
-    if (!b.url || !b.model) continue;
-
-    try {
-      const out = await tryBackend(b, messages);
-      logLLMCall(b, messages, out);
-      return out;
-    } catch (err) {
-      logLLMCall(b, messages, null, err);
-      failedBackends.push({ backend: b.key, error: err.message });
-    }
-  }
-
-  // All backends failed - log summary
-  console.error(`\n${'='.repeat(100)}`);
-  console.error(`🔴 ALL LLM BACKENDS FAILED`);
-  console.error(`${'='.repeat(100)}`);
-  failedBackends.forEach(({ backend, error }) => {
-    console.error(`  ${backend.toUpperCase()}: ${error}`);
-  });
-  console.error(`${'='.repeat(100)}\n`);
-
-  throw new Error("all_backends_failed");
-}
@@ -1,16 +0,0 @@
-{
-  "name": "lyra-relay",
-  "version": "0.1.0",
-  "type": "module",
-  "main": "server.js",
-  "scripts": {
-    "start": "node server.js"
-  },
-  "dependencies": {
-    "cors": "^2.8.5",
-    "dotenv": "^16.6.1",
-    "express": "^4.21.2",
-    "mem0ai": "^2.1.38",
-    "node-fetch": "^3.3.2"
-  }
-}
@@ -1,368 +0,0 @@
-// relay v0.3.0
-// Core relay server for Lyra project
-// Handles incoming chat requests and forwards them to Cortex services
-import express from "express";
-import dotenv from "dotenv";
-import cors from "cors";
-import fs from "fs/promises";
-import path from "path";
-import { fileURLToPath } from "url";
-
-dotenv.config();
-
-// ES module __dirname workaround
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-const SESSIONS_DIR = path.join(__dirname, "sessions");
-
-const app = express();
-app.use(cors());
-app.use(express.json());
-
-const PORT = Number(process.env.PORT || 7078);
-
-// Cortex endpoints
-const CORTEX_REASON = process.env.CORTEX_REASON_URL || "http://cortex:7081/reason";
-const CORTEX_SIMPLE = process.env.CORTEX_SIMPLE_URL || "http://cortex:7081/simple";
-
-// -----------------------------------------------------
-// Helper request wrapper
-// -----------------------------------------------------
-async function postJSON(url, data) {
-  const resp = await fetch(url, {
-    method: "POST",
-    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify(data),
-  });
-
-  const raw = await resp.text();
-  let json;
-
-  try {
-    json = raw ? JSON.parse(raw) : null;
-  } catch (e) {
-    throw new Error(`Non-JSON from ${url}: ${raw}`);
-  }
-
-  if (!resp.ok) {
-    throw new Error(json?.detail || json?.error || raw);
-  }
-
-  return json;
-}
-
-// -----------------------------------------------------
-// The unified chat handler
-// -----------------------------------------------------
-async function handleChatRequest(session_id, user_msg, mode = "cortex", backend = null) {
-  let reason;
-
-  // Determine which endpoint to use based on mode
-  const endpoint = mode === "standard" ? CORTEX_SIMPLE : CORTEX_REASON;
-  const modeName = mode === "standard" ? "simple" : "reason";
-
-  console.log(`Relay → routing to Cortex.${modeName} (mode: ${mode}${backend ? `, backend: ${backend}` : ''})`);
-
-  // Build request payload
-  const payload = {
-    session_id,
-    user_prompt: user_msg
-  };
-
-  // Add backend parameter if provided (only for standard mode)
-  if (backend && mode === "standard") {
-    payload.backend = backend;
-  }
-
-  // Call appropriate Cortex endpoint
-  try {
-    reason = await postJSON(endpoint, payload);
-  } catch (e) {
-    console.error(`Relay → Cortex.${modeName} error:`, e.message);
-    throw new Error(`cortex_${modeName}_failed: ${e.message}`);
-  }
-
-  // Correct persona field
-  const persona =
-    reason.persona ||
-    reason.final_output ||
-    "(no persona text)";
-
-  // Return final answer
-  return {
-    session_id,
-    reply: persona
-  };
-}
-
-// -----------------------------------------------------
-// HEALTHCHECK
-// -----------------------------------------------------
-app.get("/_health", (_, res) => {
-  res.json({ ok: true });
-});
-
-// -----------------------------------------------------
-// OPENAI-COMPATIBLE ENDPOINT
-// -----------------------------------------------------
-app.post("/v1/chat/completions", async (req, res) => {
-  try {
-    const session_id = req.body.session_id || req.body.sessionId || req.body.user || "default";
-    const messages = req.body.messages || [];
-    const lastMessage = messages[messages.length - 1];
-    const user_msg = lastMessage?.content || "";
-    const mode = req.body.mode || "cortex"; // Get mode from request, default to cortex
-    const backend = req.body.backend || null; // Get backend preference
-
-    if (!user_msg) {
-      return res.status(400).json({ error: "No message content provided" });
-    }
-
-    console.log(`Relay (v1) → received: "${user_msg}" [mode: ${mode}${backend ? `, backend: ${backend}` : ''}]`);
-
-    const result = await handleChatRequest(session_id, user_msg, mode, backend);
-
-    res.json({
-      id: `chatcmpl-${Date.now()}`,
-      object: "chat.completion",
-      created: Math.floor(Date.now() / 1000),
-      model: "lyra",
-      choices: [{
-        index: 0,
-        message: {
-          role: "assistant",
-          content: result.reply
-        },
-        finish_reason: "stop"
-      }],
-      usage: {
-        prompt_tokens: 0,
-        completion_tokens: 0,
-        total_tokens: 0
-      }
-    });
-
-  } catch (err) {
-    console.error("Relay v1 fatal:", err);
-    res.status(500).json({
-      error: {
-        message: err.message || String(err),
-        type: "server_error",
-        code: "relay_failed"
-      }
-    });
-  }
-});
-
-// -----------------------------------------------------
-// MAIN ENDPOINT (Lyra-native UI)
-// -----------------------------------------------------
-app.post("/chat", async (req, res) => {
-  try {
-    const session_id = req.body.session_id || "default";
-    const user_msg   = req.body.message || "";
-    const mode = req.body.mode || "cortex"; // Get mode from request, default to cortex
-    const backend = req.body.backend || null; // Get backend preference
-
-    console.log(`Relay → received: "${user_msg}" [mode: ${mode}${backend ? `, backend: ${backend}` : ''}]`);
-
-    const result = await handleChatRequest(session_id, user_msg, mode, backend);
-    res.json(result);
-
-  } catch (err) {
-    console.error("Relay fatal:", err);
-    res.status(500).json({
-      error: "relay_failed",
-      detail: err.message || String(err)
-    });
-  }
-});
-
-// -----------------------------------------------------
-// SESSION ENDPOINTS (for UI)
-// -----------------------------------------------------
-// Helper functions for session persistence
-async function ensureSessionsDir() {
-  try {
-    await fs.mkdir(SESSIONS_DIR, { recursive: true });
-  } catch (err) {
-    console.error("Failed to create sessions directory:", err);
-  }
-}
-
-async function loadSession(sessionId) {
-  try {
-    const sessionPath = path.join(SESSIONS_DIR, `${sessionId}.json`);
-    const data = await fs.readFile(sessionPath, "utf-8");
-    return JSON.parse(data);
-  } catch (err) {
-    // File doesn't exist or is invalid - return empty array
-    return [];
-  }
-}
-
-async function saveSession(sessionId, history, metadata = {}) {
-  try {
-    await ensureSessionsDir();
-    const sessionPath = path.join(SESSIONS_DIR, `${sessionId}.json`);
-    const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
-
-    // Save history
-    await fs.writeFile(sessionPath, JSON.stringify(history, null, 2), "utf-8");
-
-    // Save metadata (name, etc.)
-    await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2), "utf-8");
-
-    return true;
-  } catch (err) {
-    console.error(`Failed to save session ${sessionId}:`, err);
-    return false;
-  }
-}
-
-async function loadSessionMetadata(sessionId) {
-  try {
-    const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
-    const data = await fs.readFile(metadataPath, "utf-8");
-    return JSON.parse(data);
-  } catch (err) {
-    // No metadata file, return default
-    return { name: sessionId };
-  }
-}
-
-async function saveSessionMetadata(sessionId, metadata) {
-  try {
-    await ensureSessionsDir();
-    const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
-    await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2), "utf-8");
-    return true;
-  } catch (err) {
-    console.error(`Failed to save metadata for ${sessionId}:`, err);
-    return false;
-  }
-}
-
-async function listSessions() {
-  try {
-    await ensureSessionsDir();
-    const files = await fs.readdir(SESSIONS_DIR);
-    const sessions = [];
-
-    for (const file of files) {
-      if (file.endsWith(".json") && !file.endsWith(".meta.json")) {
-        const sessionId = file.replace(".json", "");
-        const sessionPath = path.join(SESSIONS_DIR, file);
-        const stats = await fs.stat(sessionPath);
-
-        // Try to read the session to get message count
-        let messageCount = 0;
-        try {
-          const data = await fs.readFile(sessionPath, "utf-8");
-          const history = JSON.parse(data);
-          messageCount = history.length;
-        } catch (e) {
-          // Invalid JSON, skip
-        }
-
-        // Load metadata (name)
-        const metadata = await loadSessionMetadata(sessionId);
-
-        sessions.push({
-          id: sessionId,
-          name: metadata.name || sessionId,
-          lastModified: stats.mtime,
-          messageCount
-        });
-      }
-    }
-
-    // Sort by last modified (newest first)
-    sessions.sort((a, b) => b.lastModified - a.lastModified);
-    return sessions;
-  } catch (err) {
-    console.error("Failed to list sessions:", err);
-    return [];
-  }
-}
-
-async function deleteSession(sessionId) {
-  try {
-    const sessionPath = path.join(SESSIONS_DIR, `${sessionId}.json`);
-    const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
-
-    // Delete session file
-    await fs.unlink(sessionPath);
-
-    // Delete metadata file (if exists)
-    try {
-      await fs.unlink(metadataPath);
-    } catch (e) {
-      // Metadata file doesn't exist, that's ok
-    }
-
-    return true;
-  } catch (err) {
-    console.error(`Failed to delete session ${sessionId}:`, err);
-    return false;
-  }
-}
-
-// GET /sessions - List all sessions
-app.get("/sessions", async (req, res) => {
-  const sessions = await listSessions();
-  res.json(sessions);
-});
-
-// GET /sessions/:id - Get specific session history
-app.get("/sessions/:id", async (req, res) => {
-  const sessionId = req.params.id;
-  const history = await loadSession(sessionId);
-  res.json(history);
-});
-
-// POST /sessions/:id - Save session history
-app.post("/sessions/:id", async (req, res) => {
-  const sessionId = req.params.id;
-  const history = req.body;
-
-  // Load existing metadata to preserve it
-  const existingMetadata = await loadSessionMetadata(sessionId);
-  const success = await saveSession(sessionId, history, existingMetadata);
-
-  if (success) {
-    res.json({ ok: true, saved: history.length });
-  } else {
-    res.status(500).json({ error: "Failed to save session" });
-  }
-});
-
-// PATCH /sessions/:id/metadata - Update session metadata (name, etc.)
-app.patch("/sessions/:id/metadata", async (req, res) => {
-  const sessionId = req.params.id;
-  const metadata = req.body;
-  const success = await saveSessionMetadata(sessionId, metadata);
-
-  if (success) {
-    res.json({ ok: true, metadata });
-  } else {
-    res.status(500).json({ error: "Failed to update metadata" });
-  }
-});
-
-// DELETE /sessions/:id - Delete a session
-app.delete("/sessions/:id", async (req, res) => {
-  const sessionId = req.params.id;
-  const success = await deleteSession(sessionId);
-
-  if (success) {
-    res.json({ ok: true, deleted: sessionId });
-  } else {
-    res.status(500).json({ error: "Failed to delete session" });
-  }
-});
-
-// -----------------------------------------------------
-app.listen(PORT, () => {
-  console.log(`Relay is online on port ${PORT}`);
-});
@@ -1,39 +0,0 @@
-// test-llm.js
-import path from "path";
-import { fileURLToPath } from "url";
-import dotenv from "dotenv";
-import { callSpeechLLM } from "./lib/llm.js";
-
-// ───────────────────────────────────────────────
-// 🔧 Load environment
-// ───────────────────────────────────────────────
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-const envPath = path.join(__dirname, "../.env");
-dotenv.config({ path: envPath });
-
-console.log("🔧 Using .env from:", envPath);
-console.log("🔧 LLM_FORCE_BACKEND =", process.env.LLM_FORCE_BACKEND);
-console.log("🔧 LLM_PRIMARY_URL  =", process.env.LLM_PRIMARY_URL);
-
-// ───────────────────────────────────────────────
-// 🧪 Run a simple test message
-// ───────────────────────────────────────────────
-async function testLLM() {
-  console.log("🧪 Testing LLM helper...");
-
-  const messages = [
-    { role: "user", content: "Say hello in five words or less." }
-  ];
-
-  try {
-    const { reply, backend } = await callSpeechLLM(messages);
-
-    console.log(`✅ Reply: ${reply || "[no reply]"}`);
-    console.log(`Backend used: ${backend || "[unknown]"}`);
-  } catch (err) {
-    console.error("💥 Test failed:", err.message);
-  }
-}
-
-testLLM();
@@ -1,927 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8" />
-  <title>Lyra Core Chat</title>
-  <link rel="stylesheet" href="style.css" />
-  <!-- PWA -->
-  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
-  <meta name="mobile-web-app-capable" content="yes" />
-  <meta name="apple-mobile-web-app-capable" content="yes" />
-  <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
-  <link rel="manifest" href="manifest.json" />
-
-</head>
-<body>
-  <!-- Mobile Menu Overlay -->
-  <div class="mobile-menu-overlay" id="mobileMenuOverlay"></div>
-
-  <!-- Mobile Slide-out Menu -->
-  <div class="mobile-menu" id="mobileMenu">
-    <div class="mobile-menu-section">
-      <h4>Mode</h4>
-      <select id="mobileMode">
-        <option value="standard">Standard</option>
-        <option value="cortex">Cortex</option>
-      </select>
-    </div>
-
-    <div class="mobile-menu-section">
-      <h4>Session</h4>
-      <select id="mobileSessions"></select>
-      <button id="mobileNewSessionBtn">➕ New Session</button>
-      <button id="mobileRenameSessionBtn">✏️ Rename Session</button>
-    </div>
-
-    <div class="mobile-menu-section">
-      <h4>Actions</h4>
-      <button id="mobileThinkingStreamBtn">🧠 Show Work</button>
-      <button id="mobileSettingsBtn">⚙ Settings</button>
-      <button id="mobileToggleThemeBtn">🌙 Toggle Theme</button>
-      <button id="mobileForceReloadBtn">🔄 Force Reload</button>
-    </div>
-  </div>
-
-  <div id="chat">
-    <!-- Mode selector -->
-    <div id="model-select">
-      <!-- Hamburger menu (mobile only) -->
-      <button class="hamburger-menu" id="hamburgerMenu" aria-label="Menu">
-        <span></span>
-        <span></span>
-        <span></span>
-      </button>
-      <label for="mode">Mode:</label>
-      <select id="mode">
-        <option value="standard">Standard</option>
-        <option value="cortex">Cortex</option>
-      </select>
-      <button id="settingsBtn" style="margin-left: auto;">⚙ Settings</button>
-      <div id="theme-toggle">
-        <button id="toggleThemeBtn">🌙 Dark Mode</button>
-      </div>
-    </div>
-
-    <!-- Session selector -->
-    <div id="session-select">
-      <label for="sessions">Session:</label>
-      <select id="sessions"></select>
-      <button id="newSessionBtn">➕ New</button>
-      <button id="renameSessionBtn">✏️ Rename</button>
-      <button id="thinkingStreamBtn" title="Show thinking stream panel">🧠 Show Work</button>
-    </div>
-
-    <!-- Status -->
-    <div id="status">
-      <span id="status-dot"></span>
-      <span id="status-text">Checking Relay...</span>
-    </div>
-
-    <!-- Chat messages -->
-    <div id="messages"></div>
-
-    <!-- Thinking Stream Panel (collapsible) -->
-    <div id="thinkingPanel" class="thinking-panel collapsed">
-      <div class="thinking-header" id="thinkingHeader">
-        <span>🧠 Thinking Stream</span>
-        <div class="thinking-controls">
-          <span class="thinking-status-dot" id="thinkingStatusDot"></span>
-          <button class="thinking-clear-btn" id="thinkingClearBtn" title="Clear events">🗑️</button>
-          <button class="thinking-toggle-btn" id="thinkingToggleBtn">▼</button>
-        </div>
-      </div>
-      <div class="thinking-content" id="thinkingContent">
-        <div class="thinking-empty" id="thinkingEmpty">
-          <div class="thinking-empty-icon">🤔</div>
-          <p>Waiting for thinking events...</p>
-        </div>
-      </div>
-    </div>
-
-    <!-- Input box -->
-    <div id="input">
-      <input id="userInput" type="text" placeholder="Type a message..." autofocus />
-      <button id="sendBtn">Send</button>
-    </div>
-  </div>
-
-  <!-- Settings Modal (outside chat container) -->
-  <div id="settingsModal" class="modal">
-    <div class="modal-overlay"></div>
-    <div class="modal-content">
-      <div class="modal-header">
-        <h3>Settings</h3>
-        <button id="closeModalBtn" class="close-btn">✕</button>
-      </div>
-      <div class="modal-body">
-        <div class="settings-section">
-          <h4>Standard Mode Backend</h4>
-          <p class="settings-desc">Select which LLM backend to use for Standard Mode:</p>
-          <div class="radio-group">
-            <label class="radio-label">
-              <input type="radio" name="backend" value="SECONDARY" checked>
-              <span>SECONDARY - Ollama/Qwen (3090)</span>
-              <small>Fast, local, good for general chat</small>
-            </label>
-            <label class="radio-label">
-              <input type="radio" name="backend" value="PRIMARY">
-              <span>PRIMARY - llama.cpp (MI50)</span>
-              <small>Local, powerful, good for complex reasoning</small>
-            </label>
-            <label class="radio-label">
-              <input type="radio" name="backend" value="OPENAI">
-              <span>OPENAI - GPT-4o-mini</span>
-              <small>Cloud-based, high quality (costs money)</small>
-            </label>
-            <label class="radio-label">
-              <input type="radio" name="backend" value="custom">
-              <span>Custom Backend</span>
-              <input type="text" id="customBackend" placeholder="e.g., FALLBACK" />
-            </label>
-          </div>
-        </div>
-
-        <div class="settings-section" style="margin-top: 24px;">
-          <h4>Session Management</h4>
-          <p class="settings-desc">Manage your saved chat sessions:</p>
-          <div id="sessionList" class="session-list">
-            <p style="color: var(--text-fade); font-size: 0.85rem;">Loading sessions...</p>
-          </div>
-        </div>
-      </div>
-      <div class="modal-footer">
-        <button id="saveSettingsBtn" class="primary-btn">Save</button>
-        <button id="cancelSettingsBtn">Cancel</button>
-      </div>
-    </div>
-  </div>
-
-  <script>
-    const RELAY_BASE = "http://10.0.0.41:7078";
-    const API_URL = `${RELAY_BASE}/v1/chat/completions`;
-
-	function generateSessionId() {
-      return "sess-" + Math.random().toString(36).substring(2, 10);
-    }
-
-    let history = [];
-	let currentSession = localStorage.getItem("currentSession") || null;
-	let sessions = []; // Now loaded from server
-
-	async function loadSessionsFromServer() {
-	  try {
-		const resp = await fetch(`${RELAY_BASE}/sessions`);
-		const serverSessions = await resp.json();
-		sessions = serverSessions;
-		return sessions;
-	  } catch (e) {
-		console.error("Failed to load sessions from server:", e);
-		return [];
-	  }
-	}
-
-	async function renderSessions() {
-	  const select = document.getElementById("sessions");
-	  const mobileSelect = document.getElementById("mobileSessions");
-	  select.innerHTML = "";
-	  mobileSelect.innerHTML = "";
-
-	  sessions.forEach(s => {
-		const opt = document.createElement("option");
-		opt.value = s.id;
-		opt.textContent = s.name || s.id;
-		if (s.id === currentSession) opt.selected = true;
-		select.appendChild(opt);
-
-		// Clone for mobile menu
-		const mobileOpt = opt.cloneNode(true);
-		mobileSelect.appendChild(mobileOpt);
-	  });
-	}
-
-	function getSessionName(id) {
-	  const s = sessions.find(s => s.id === id);
-	  return s ? (s.name || s.id) : id;
-	}
-
-	async function saveSessionMetadata(sessionId, name) {
-	  try {
-		await fetch(`${RELAY_BASE}/sessions/${sessionId}/metadata`, {
-		  method: "PATCH",
-		  headers: { "Content-Type": "application/json" },
-		  body: JSON.stringify({ name })
-		});
-		return true;
-	  } catch (e) {
-		console.error("Failed to save session metadata:", e);
-		return false;
-	  }
-	}
-
-    async function loadSession(id) {
-	  try {
-		const res = await fetch(`${RELAY_BASE}/sessions/${id}`);
-		const data = await res.json();
-		history = Array.isArray(data) ? data : [];
-		const messagesEl = document.getElementById("messages");
-		messagesEl.innerHTML = "";
-		history.forEach(m => addMessage(m.role, m.content, false)); // Don't auto-scroll for each message
-		addMessage("system", `📂 Loaded session: ${getSessionName(id)} — ${history.length} message(s)`, false);
-		// Scroll to bottom after all messages are loaded
-		messagesEl.scrollTo({ top: messagesEl.scrollHeight, behavior: "smooth" });
-	  } catch (e) {
-		addMessage("system", `Failed to load session: ${e.message}`);
-	  }
-	}
-
-	async function saveSession() {
-	  if (!currentSession) return;
-	  try {
-		await fetch(`${RELAY_BASE}/sessions/${currentSession}`, {
-		  method: "POST",
-		  headers: { "Content-Type": "application/json" },
-		  body: JSON.stringify(history)
-		});
-	  } catch (e) {
-		addMessage("system", `Failed to save session: ${e.message}`);
-	  }
-	}
-
-    async function sendMessage() {
-      const inputEl = document.getElementById("userInput");
-      const msg = inputEl.value.trim();
-      if (!msg) return;
-      inputEl.value = "";
-
-      addMessage("user", msg);
-      history.push({ role: "user", content: msg });
-	  await saveSession(); // ✅ persist both user + assistant messages
-
-
-      const mode = document.getElementById("mode").value;
-
-	// make sure we always include a stable user_id
-	let userId = localStorage.getItem("userId");
-	if (!userId) {
-	  userId = "brian"; // use whatever ID you seeded Mem0 with
-	  localStorage.setItem("userId", userId);
-	}
-
-      // Get backend preference for Standard Mode
-      let backend = null;
-      if (mode === "standard") {
-        backend = localStorage.getItem("standardModeBackend") || "SECONDARY";
-      }
-
-      const body = {
-        mode: mode,
-        messages: history,
-        sessionId: currentSession
-      };
-
-      // Only add backend if in standard mode
-      if (backend) {
-        body.backend = backend;
-      }
-
-      try {
-        const resp = await fetch(API_URL, {
-          method: "POST",
-          headers: { "Content-Type": "application/json" },
-          body: JSON.stringify(body)
-        });
-
-        const data = await resp.json();
-        const reply = data.choices?.[0]?.message?.content || "(no reply)";
-        addMessage("assistant", reply);
-        history.push({ role: "assistant", content: reply });
-		await saveSession();
-      } catch (err) {
-        addMessage("system", "Error: " + err.message);
-      }
-    }
-
-	function addMessage(role, text, autoScroll = true) {
-	  const messagesEl = document.getElementById("messages");
-
-	  const msgDiv = document.createElement("div");
-	  msgDiv.className = `msg ${role}`;
-	  msgDiv.textContent = text;
-	  messagesEl.appendChild(msgDiv);
-
-	  // Auto-scroll to bottom if enabled
-	  if (autoScroll) {
-		// Use requestAnimationFrame to ensure DOM has updated
-		requestAnimationFrame(() => {
-		  messagesEl.scrollTo({ top: messagesEl.scrollHeight, behavior: "smooth" });
-		});
-	  }
-	}
-
-
-    async function checkHealth() {
-      try {
-        const resp = await fetch(API_URL.replace("/v1/chat/completions", "/_health"));
-        if (resp.ok) {
-          document.getElementById("status-dot").className = "dot ok";
-          document.getElementById("status-text").textContent = "Relay Online";
-        } else {
-          throw new Error("Bad status");
-        }
-      } catch (err) {
-        document.getElementById("status-dot").className = "dot fail";
-        document.getElementById("status-text").textContent = "Relay Offline";
-      }
-    }
-
-    document.addEventListener("DOMContentLoaded", () => {
-      // Mobile Menu Toggle
-      const hamburgerMenu = document.getElementById("hamburgerMenu");
-      const mobileMenu = document.getElementById("mobileMenu");
-      const mobileMenuOverlay = document.getElementById("mobileMenuOverlay");
-
-      function toggleMobileMenu() {
-        mobileMenu.classList.toggle("open");
-        mobileMenuOverlay.classList.toggle("show");
-        hamburgerMenu.classList.toggle("active");
-      }
-
-      function closeMobileMenu() {
-        mobileMenu.classList.remove("open");
-        mobileMenuOverlay.classList.remove("show");
-        hamburgerMenu.classList.remove("active");
-      }
-
-      hamburgerMenu.addEventListener("click", toggleMobileMenu);
-      mobileMenuOverlay.addEventListener("click", closeMobileMenu);
-
-      // Sync mobile menu controls with desktop
-      const mobileMode = document.getElementById("mobileMode");
-      const desktopMode = document.getElementById("mode");
-
-      // Sync mode selection
-      mobileMode.addEventListener("change", (e) => {
-        desktopMode.value = e.target.value;
-        desktopMode.dispatchEvent(new Event("change"));
-      });
-
-      desktopMode.addEventListener("change", (e) => {
-        mobileMode.value = e.target.value;
-      });
-
-      // Mobile theme toggle
-      document.getElementById("mobileToggleThemeBtn").addEventListener("click", () => {
-        document.getElementById("toggleThemeBtn").click();
-        updateMobileThemeButton();
-      });
-
-      function updateMobileThemeButton() {
-        const isDark = document.body.classList.contains("dark");
-        document.getElementById("mobileToggleThemeBtn").textContent = isDark ? "☀️ Light Mode" : "🌙 Dark Mode";
-      }
-
-      // Mobile settings button
-      document.getElementById("mobileSettingsBtn").addEventListener("click", () => {
-        closeMobileMenu();
-        document.getElementById("settingsBtn").click();
-      });
-
-      // Mobile thinking stream button
-      document.getElementById("mobileThinkingStreamBtn").addEventListener("click", () => {
-        closeMobileMenu();
-        document.getElementById("thinkingStreamBtn").click();
-      });
-
-      // Mobile new session button
-      document.getElementById("mobileNewSessionBtn").addEventListener("click", () => {
-        closeMobileMenu();
-        document.getElementById("newSessionBtn").click();
-      });
-
-      // Mobile rename session button
-      document.getElementById("mobileRenameSessionBtn").addEventListener("click", () => {
-        closeMobileMenu();
-        document.getElementById("renameSessionBtn").click();
-      });
-
-      // Sync mobile session selector with desktop
-      document.getElementById("mobileSessions").addEventListener("change", async (e) => {
-        closeMobileMenu();
-        const desktopSessions = document.getElementById("sessions");
-        desktopSessions.value = e.target.value;
-        desktopSessions.dispatchEvent(new Event("change"));
-      });
-
-      // Mobile force reload button
-      document.getElementById("mobileForceReloadBtn").addEventListener("click", async () => {
-        if (confirm("Force reload the app? This will clear cache and reload.")) {
-          // Clear all caches if available
-          if ('caches' in window) {
-            const cacheNames = await caches.keys();
-            await Promise.all(cacheNames.map(name => caches.delete(name)));
-          }
-
-          // Force reload from server (bypass cache)
-          window.location.reload(true);
-        }
-      });
-
-      // Dark mode toggle - defaults to dark
-      const btn = document.getElementById("toggleThemeBtn");
-
-      // Set dark mode by default if no preference saved
-      const savedTheme = localStorage.getItem("theme");
-      if (!savedTheme || savedTheme === "dark") {
-        document.body.classList.add("dark");
-        btn.textContent = "☀️ Light Mode";
-        localStorage.setItem("theme", "dark");
-      } else {
-        btn.textContent = "🌙 Dark Mode";
-      }
-
-      btn.addEventListener("click", () => {
-        document.body.classList.toggle("dark");
-        const isDark = document.body.classList.contains("dark");
-        btn.textContent = isDark ? "☀️ Light Mode" : "🌙 Dark Mode";
-        localStorage.setItem("theme", isDark ? "dark" : "light");
-        updateMobileThemeButton();
-      });
-
-      // Initialize mobile theme button
-      updateMobileThemeButton();
-
-      // Sessions - Load from server
-	  (async () => {
-		await loadSessionsFromServer();
-		await renderSessions();
-
-		// Ensure we have at least one session
-		if (sessions.length === 0) {
-		  const id = generateSessionId();
-		  const name = "default";
-		  currentSession = id;
-		  history = [];
-		  await saveSession(); // Create empty session on server
-		  await saveSessionMetadata(id, name);
-		  await loadSessionsFromServer();
-		  await renderSessions();
-		  localStorage.setItem("currentSession", currentSession);
-		} else {
-		  // If no current session or current session doesn't exist, use first one
-		  if (!currentSession || !sessions.find(s => s.id === currentSession)) {
-			currentSession = sessions[0].id;
-			localStorage.setItem("currentSession", currentSession);
-		  }
-		}
-
-		// Load current session history
-		if (currentSession) {
-		  await loadSession(currentSession);
-		}
-	  })();
-
-	// Switch session
-	document.getElementById("sessions").addEventListener("change", async e => {
-	  currentSession = e.target.value;
-	  history = [];
-	  localStorage.setItem("currentSession", currentSession);
-	  addMessage("system", `Switched to session: ${getSessionName(currentSession)}`);
-	  await loadSession(currentSession);
-	});
-
-	// Create new session
-	document.getElementById("newSessionBtn").addEventListener("click", async () => {
-	  const name = prompt("Enter new session name:");
-	  if (!name) return;
-	  const id = generateSessionId();
-	  currentSession = id;
-	  history = [];
-	  localStorage.setItem("currentSession", currentSession);
-
-	  // Create session on server
-	  await saveSession();
-	  await saveSessionMetadata(id, name);
-	  await loadSessionsFromServer();
-	  await renderSessions();
-
-	  addMessage("system", `Created session: ${name}`);
-	});
-
-	// Rename session
-	document.getElementById("renameSessionBtn").addEventListener("click", async () => {
-	  const session = sessions.find(s => s.id === currentSession);
-	  if (!session) return;
-	  const newName = prompt("Rename session:", session.name || currentSession);
-	  if (!newName) return;
-
-	  // Update metadata on server
-	  await saveSessionMetadata(currentSession, newName);
-	  await loadSessionsFromServer();
-	  await renderSessions();
-
-	  addMessage("system", `Session renamed to: ${newName}`);
-	});
-
-	// Thinking Stream button
-	document.getElementById("thinkingStreamBtn").addEventListener("click", () => {
-	  if (!currentSession) {
-		alert("Please select a session first");
-		return;
-	  }
-
-	  // Open thinking stream in new window
-	  const streamUrl = `http://10.0.0.41:8081/thinking-stream.html?session=${currentSession}`;
-	  const windowFeatures = "width=600,height=800,menubar=no,toolbar=no,location=no,status=no";
-	  window.open(streamUrl, `thinking_${currentSession}`, windowFeatures);
-
-	  addMessage("system", "🧠 Opened thinking stream in new window");
-	});
-
-
-      // Settings Modal
-      const settingsModal = document.getElementById("settingsModal");
-      const settingsBtn = document.getElementById("settingsBtn");
-      const closeModalBtn = document.getElementById("closeModalBtn");
-      const saveSettingsBtn = document.getElementById("saveSettingsBtn");
-      const cancelSettingsBtn = document.getElementById("cancelSettingsBtn");
-      const modalOverlay = document.querySelector(".modal-overlay");
-
-      // Load saved backend preference
-      const savedBackend = localStorage.getItem("standardModeBackend") || "SECONDARY";
-
-      // Set initial radio button state
-      const backendRadios = document.querySelectorAll('input[name="backend"]');
-      let isCustomBackend = !["SECONDARY", "PRIMARY", "OPENAI"].includes(savedBackend);
-
-      if (isCustomBackend) {
-        document.querySelector('input[name="backend"][value="custom"]').checked = true;
-        document.getElementById("customBackend").value = savedBackend;
-      } else {
-        document.querySelector(`input[name="backend"][value="${savedBackend}"]`).checked = true;
-      }
-
-      // Session management functions
-      async function loadSessionList() {
-        try {
-          // Reload from server to get latest
-          await loadSessionsFromServer();
-
-          const sessionListEl = document.getElementById("sessionList");
-          if (sessions.length === 0) {
-            sessionListEl.innerHTML = '<p style="color: var(--text-fade); font-size: 0.85rem;">No saved sessions found</p>';
-            return;
-          }
-
-          sessionListEl.innerHTML = "";
-          sessions.forEach(sess => {
-            const sessionItem = document.createElement("div");
-            sessionItem.className = "session-item";
-
-            const sessionInfo = document.createElement("div");
-            sessionInfo.className = "session-info";
-
-            const sessionName = sess.name || sess.id;
-            const lastModified = new Date(sess.lastModified).toLocaleString();
-
-            sessionInfo.innerHTML = `
-              <strong>${sessionName}</strong>
-              <small>${sess.messageCount} messages • ${lastModified}</small>
-            `;
-
-            const deleteBtn = document.createElement("button");
-            deleteBtn.className = "session-delete-btn";
-            deleteBtn.textContent = "🗑️";
-            deleteBtn.title = "Delete session";
-            deleteBtn.onclick = async () => {
-              if (!confirm(`Delete session "${sessionName}"?`)) return;
-
-              try {
-                await fetch(`${RELAY_BASE}/sessions/${sess.id}`, { method: "DELETE" });
-
-                // Reload sessions from server
-                await loadSessionsFromServer();
-
-                // If we deleted the current session, switch to another or create new
-                if (currentSession === sess.id) {
-                  if (sessions.length > 0) {
-                    currentSession = sessions[0].id;
-                    localStorage.setItem("currentSession", currentSession);
-                    history = [];
-                    await loadSession(currentSession);
-                  } else {
-                    const id = generateSessionId();
-                    const name = "default";
-                    currentSession = id;
-                    localStorage.setItem("currentSession", currentSession);
-                    history = [];
-                    await saveSession();
-                    await saveSessionMetadata(id, name);
-                    await loadSessionsFromServer();
-                  }
-                }
-
-                // Refresh both the dropdown and the settings list
-                await renderSessions();
-                await loadSessionList();
-
-                addMessage("system", `Deleted session: ${sessionName}`);
-              } catch (e) {
-                alert("Failed to delete session: " + e.message);
-              }
-            };
-
-            sessionItem.appendChild(sessionInfo);
-            sessionItem.appendChild(deleteBtn);
-            sessionListEl.appendChild(sessionItem);
-          });
-        } catch (e) {
-          const sessionListEl = document.getElementById("sessionList");
-          sessionListEl.innerHTML = '<p style="color: #ff3333; font-size: 0.85rem;">Failed to load sessions</p>';
-        }
-      }
-
-      // Show modal and load session list
-      settingsBtn.addEventListener("click", () => {
-        settingsModal.classList.add("show");
-        loadSessionList(); // Refresh session list when opening settings
-      });
-
-      // Hide modal functions
-      const hideModal = () => {
-        settingsModal.classList.remove("show");
-      };
-
-      closeModalBtn.addEventListener("click", hideModal);
-      cancelSettingsBtn.addEventListener("click", hideModal);
-      modalOverlay.addEventListener("click", hideModal);
-
-      // ESC key to close
-      document.addEventListener("keydown", (e) => {
-        if (e.key === "Escape" && settingsModal.classList.contains("show")) {
-          hideModal();
-        }
-      });
-
-      // Save settings
-      saveSettingsBtn.addEventListener("click", () => {
-        const selectedRadio = document.querySelector('input[name="backend"]:checked');
-        let backendValue;
-
-        if (selectedRadio.value === "custom") {
-          backendValue = document.getElementById("customBackend").value.trim().toUpperCase();
-          if (!backendValue) {
-            alert("Please enter a custom backend name");
-            return;
-          }
-        } else {
-          backendValue = selectedRadio.value;
-        }
-
-        localStorage.setItem("standardModeBackend", backendValue);
-        addMessage("system", `Backend changed to: ${backendValue}`);
-        hideModal();
-      });
-
-      // Health check
-      checkHealth();
-      setInterval(checkHealth, 10000);
-
-      // Input events
-      document.getElementById("sendBtn").addEventListener("click", sendMessage);
-      document.getElementById("userInput").addEventListener("keypress", e => {
-        if (e.key === "Enter") sendMessage();
-      });
-
-      // ========== THINKING STREAM INTEGRATION ==========
-      const thinkingPanel = document.getElementById("thinkingPanel");
-      const thinkingHeader = document.getElementById("thinkingHeader");
-      const thinkingToggleBtn = document.getElementById("thinkingToggleBtn");
-      const thinkingClearBtn = document.getElementById("thinkingClearBtn");
-      const thinkingContent = document.getElementById("thinkingContent");
-      const thinkingStatusDot = document.getElementById("thinkingStatusDot");
-      const thinkingEmpty = document.getElementById("thinkingEmpty");
-
-      let thinkingEventSource = null;
-      let thinkingEventCount = 0;
-      const CORTEX_BASE = "http://10.0.0.41:7081";
-
-      // Load thinking panel state from localStorage
-      const isPanelCollapsed = localStorage.getItem("thinkingPanelCollapsed") === "true";
-      if (!isPanelCollapsed) {
-        thinkingPanel.classList.remove("collapsed");
-      }
-
-      // Toggle thinking panel
-      thinkingHeader.addEventListener("click", (e) => {
-        if (e.target === thinkingClearBtn) return; // Don't toggle if clicking clear
-        thinkingPanel.classList.toggle("collapsed");
-        localStorage.setItem("thinkingPanelCollapsed", thinkingPanel.classList.contains("collapsed"));
-      });
-
-      // Clear thinking events
-      thinkingClearBtn.addEventListener("click", (e) => {
-        e.stopPropagation();
-        clearThinkingEvents();
-      });
-
-      function clearThinkingEvents() {
-        thinkingContent.innerHTML = '';
-        thinkingContent.appendChild(thinkingEmpty);
-        thinkingEventCount = 0;
-        // Clear from localStorage
-        if (currentSession) {
-          localStorage.removeItem(`thinkingEvents_${currentSession}`);
-        }
-      }
-
-      function connectThinkingStream() {
-        if (!currentSession) return;
-
-        // Close existing connection
-        if (thinkingEventSource) {
-          thinkingEventSource.close();
-        }
-
-        // Load persisted events
-        loadThinkingEvents();
-
-        const url = `${CORTEX_BASE}/stream/thinking/${currentSession}`;
-        console.log('Connecting thinking stream:', url);
-
-        thinkingEventSource = new EventSource(url);
-
-        thinkingEventSource.onopen = () => {
-          console.log('Thinking stream connected');
-          thinkingStatusDot.className = 'thinking-status-dot connected';
-        };
-
-        thinkingEventSource.onmessage = (event) => {
-          try {
-            const data = JSON.parse(event.data);
-            addThinkingEvent(data);
-            saveThinkingEvent(data); // Persist event
-          } catch (e) {
-            console.error('Failed to parse thinking event:', e);
-          }
-        };
-
-        thinkingEventSource.onerror = (error) => {
-          console.error('Thinking stream error:', error);
-          thinkingStatusDot.className = 'thinking-status-dot disconnected';
-
-          // Retry connection after 2 seconds
-          setTimeout(() => {
-            if (thinkingEventSource && thinkingEventSource.readyState === EventSource.CLOSED) {
-              console.log('Reconnecting thinking stream...');
-              connectThinkingStream();
-            }
-          }, 2000);
-        };
-      }
-
-      function addThinkingEvent(event) {
-        // Remove empty state if present
-        if (thinkingEventCount === 0 && thinkingEmpty.parentNode) {
-          thinkingContent.removeChild(thinkingEmpty);
-        }
-
-        const eventDiv = document.createElement('div');
-        eventDiv.className = `thinking-event thinking-event-${event.type}`;
-
-        let icon = '';
-        let message = '';
-        let details = '';
-
-        switch (event.type) {
-          case 'connected':
-            icon = '✓';
-            message = 'Stream connected';
-            details = `Session: ${event.session_id}`;
-            break;
-
-          case 'thinking':
-            icon = '🤔';
-            message = event.data.message;
-            break;
-
-          case 'tool_call':
-            icon = '🔧';
-            message = event.data.message;
-            if (event.data.args) {
-              details = JSON.stringify(event.data.args, null, 2);
-            }
-            break;
-
-          case 'tool_result':
-            icon = '📊';
-            message = event.data.message;
-            if (event.data.result && event.data.result.stdout) {
-              details = `stdout: ${event.data.result.stdout}`;
-            }
-            break;
-
-          case 'done':
-            icon = '✅';
-            message = event.data.message;
-            if (event.data.final_answer) {
-              details = event.data.final_answer;
-            }
-            break;
-
-          case 'error':
-            icon = '❌';
-            message = event.data.message;
-            break;
-
-          default:
-            icon = '•';
-            message = JSON.stringify(event.data);
-        }
-
-        eventDiv.innerHTML = `
-          <span class="thinking-event-icon">${icon}</span>
-          <span>${message}</span>
-          ${details ? `<div class="thinking-event-details">${details}</div>` : ''}
-        `;
-
-        thinkingContent.appendChild(eventDiv);
-        thinkingContent.scrollTop = thinkingContent.scrollHeight;
-        thinkingEventCount++;
-      }
-
-      // Persist thinking events to localStorage
-      function saveThinkingEvent(event) {
-        if (!currentSession) return;
-
-        const key = `thinkingEvents_${currentSession}`;
-        let events = JSON.parse(localStorage.getItem(key) || '[]');
-
-        // Keep only last 50 events to avoid bloating localStorage
-        if (events.length >= 50) {
-          events = events.slice(-49);
-        }
-
-        events.push({
-          ...event,
-          timestamp: Date.now()
-        });
-
-        localStorage.setItem(key, JSON.stringify(events));
-      }
-
-      // Load persisted thinking events
-      function loadThinkingEvents() {
-        if (!currentSession) return;
-
-        const key = `thinkingEvents_${currentSession}`;
-        const events = JSON.parse(localStorage.getItem(key) || '[]');
-
-        // Clear current display
-        thinkingContent.innerHTML = '';
-        thinkingEventCount = 0;
-
-        // Replay events
-        events.forEach(event => addThinkingEvent(event));
-
-        // Show empty state if no events
-        if (events.length === 0) {
-          thinkingContent.appendChild(thinkingEmpty);
-        }
-      }
-
-      // Update the old thinking stream button to toggle panel instead
-      document.getElementById("thinkingStreamBtn").addEventListener("click", () => {
-        thinkingPanel.classList.remove("collapsed");
-        localStorage.setItem("thinkingPanelCollapsed", "false");
-      });
-
-      // Mobile thinking stream button
-      document.getElementById("mobileThinkingStreamBtn").addEventListener("click", () => {
-        closeMobileMenu();
-        thinkingPanel.classList.remove("collapsed");
-        localStorage.setItem("thinkingPanelCollapsed", "false");
-      });
-
-      // Connect thinking stream when session loads
-      if (currentSession) {
-        connectThinkingStream();
-      }
-
-      // Reconnect thinking stream when session changes
-      const originalSessionChange = document.getElementById("sessions").onchange;
-      document.getElementById("sessions").addEventListener("change", () => {
-        setTimeout(() => {
-          connectThinkingStream();
-        }, 500); // Wait for session to load
-      });
-
-      // Cleanup on page unload
-      window.addEventListener('beforeunload', () => {
-        if (thinkingEventSource) {
-          thinkingEventSource.close();
-        }
-      });
-    });
-  </script>
-</body>
-</html>
@@ -1,20 +0,0 @@
-{
-  "name": "Lyra Chat",
-  "short_name": "Lyra",
-  "start_url": "./index.html",
-  "display": "standalone",
-  "background_color": "#181818",
-  "theme_color": "#181818",
-  "icons": [
-    {
-      "src": "icon-192.png",
-      "sizes": "192x192",
-      "type": "image/png"
-    },
-    {
-      "src": "icon-512.png",
-      "sizes": "512x512",
-      "type": "image/png"
-    }
-  ]
-}
@@ -1,909 +0,0 @@
-:root {
-  --bg-dark: #0a0a0a;
-  --bg-panel: rgba(255, 115, 0, 0.1);
-  --accent: #ff6600;
-  --accent-glow: 0 0 12px #ff6600cc;
-  --text-main: #e6e6e6;
-  --text-fade: #999;
-  --font-console: "IBM Plex Mono", monospace;
-}
-
-/* Light mode variables */
-body {
-  --bg-dark: #f5f5f5;
-  --bg-panel: rgba(255, 115, 0, 0.05);
-  --accent: #ff6600;
-  --accent-glow: 0 0 12px #ff6600cc;
-  --text-main: #1a1a1a;
-  --text-fade: #666;
-}
-
-/* Dark mode variables */
-body.dark {
-  --bg-dark: #0a0a0a;
-  --bg-panel: rgba(255, 115, 0, 0.1);
-  --accent: #ff6600;
-  --accent-glow: 0 0 12px #ff6600cc;
-  --text-main: #e6e6e6;
-  --text-fade: #999;
-}
-
-body {
-  margin: 0;
-  background: var(--bg-dark);
-  color: var(--text-main);
-  font-family: var(--font-console);
-  height: 100vh;
-  display: flex;
-  justify-content: center;
-  align-items: center;
-}
-
-#chat {
-  width: 95%;
-  max-width: 900px;
-  height: 95vh;
-  display: flex;
-  flex-direction: column;
-  border: 1px solid var(--accent);
-  border-radius: 10px;
-  box-shadow: var(--accent-glow);
-  background: var(--bg-dark);
-  overflow: hidden;
-}
-
-/* Header sections */
-#model-select, #session-select, #status {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  padding: 8px 12px;
-  border-bottom: 1px solid var(--accent);
-  background-color: rgba(255, 102, 0, 0.05);
-}
-#status {
-  justify-content: flex-start;
-  border-top: 1px solid var(--accent);
-}
-
-label, select, button {
-  font-family: var(--font-console);
-  font-size: 0.9rem;
-  color: var(--text-main);
-  background: transparent;
-  border: 1px solid var(--accent);
-  border-radius: 4px;
-  padding: 4px 8px;
-}
-
-button:hover, select:hover {
-  box-shadow: 0 0 8px var(--accent);
-  cursor: pointer;
-}
-
-#thinkingStreamBtn {
-  background: rgba(138, 43, 226, 0.2);
-  border-color: #8a2be2;
-}
-
-#thinkingStreamBtn:hover {
-  box-shadow: 0 0 8px #8a2be2;
-  background: rgba(138, 43, 226, 0.3);
-}
-
-/* Chat area */
-#messages {
-  flex: 1;
-  padding: 16px;
-  overflow-y: auto;
-  display: flex;
-  flex-direction: column;
-  gap: 8px;
-  scroll-behavior: smooth;
-}
-
-/* Messages */
-.msg {
-  max-width: 80%;
-  padding: 10px 14px;
-  border-radius: 8px;
-  line-height: 1.4;
-  word-wrap: break-word;
-  box-shadow: 0 0 8px rgba(255,102,0,0.2);
-}
-.msg.user {
-  align-self: flex-end;
-  background: rgba(255,102,0,0.15);
-  border: 1px solid var(--accent);
-}
-.msg.assistant {
-  align-self: flex-start;
-  background: rgba(255,102,0,0.08);
-  border: 1px solid rgba(255,102,0,0.5);
-}
-.msg.system {
-  align-self: center;
-  font-size: 0.8rem;
-  color: var(--text-fade);
-}
-
-/* Input bar */
-#input {
-  display: flex;
-  border-top: 1px solid var(--accent);
-  background: rgba(255, 102, 0, 0.05);
-  padding: 10px;
-}
-#userInput {
-  flex: 1;
-  background: transparent;
-  color: var(--text-main);
-  border: 1px solid var(--accent);
-  border-radius: 4px;
-  padding: 8px;
-}
-#sendBtn {
-  margin-left: 8px;
-}
-
-/* Relay status dot */
-#status {
-  display: flex;
-  align-items: center;
-  margin: 10px 0;
-  gap: 8px;
-  font-family: monospace;
-  color: #f5f5f5;
-}
-
-#status-dot {
-  width: 10px;
-  height: 10px;
-  border-radius: 50%;
-  display: inline-block;
-}
-
-@keyframes pulseGreen {
-  0% { box-shadow: 0 0 5px #00ff66; opacity: 0.9; }
-  50% { box-shadow: 0 0 20px #00ff99; opacity: 1; }
-  100% { box-shadow: 0 0 5px #00ff66; opacity: 0.9; }
-}
-
-.dot.ok {
-  background: #00ff66;
-  animation: pulseGreen 2s infinite ease-in-out;
-}
-
-/* Offline state stays solid red */
-.dot.fail {
-  background: #ff3333;
-  box-shadow: 0 0 10px #ff3333;
-}
-
-
-/* Dropdown (session selector) styling */
-select {
-  background-color: var(--bg-dark);
-  color: var(--text-main);
-  border: 1px solid #b84a12;
-  border-radius: 6px;
-  padding: 4px 6px;
-  font-size: 14px;
-}
-
-select option {
-  background-color: var(--bg-dark);
-  color: var(--text-main);
-}
-
-/* Hover/focus for better visibility */
-select:focus,
-select:hover {
-  outline: none;
-  border-color: #ff7a33;
-  background-color: var(--bg-panel);
-}
-
-/* Settings Modal */
-.modal {
-  display: none !important;
-  position: fixed;
-  top: 0;
-  left: 0;
-  width: 100%;
-  height: 100%;
-  z-index: 1000;
-}
-
-.modal.show {
-  display: block !important;
-}
-
-.modal-overlay {
-  position: fixed;
-  top: 0;
-  left: 0;
-  width: 100%;
-  height: 100%;
-  background: rgba(0, 0, 0, 0.8);
-  backdrop-filter: blur(4px);
-  z-index: 999;
-}
-
-.modal-content {
-  position: fixed;
-  top: 50%;
-  left: 50%;
-  transform: translate(-50%, -50%);
-  background: linear-gradient(180deg, rgba(255,102,0,0.1) 0%, rgba(10,10,10,0.95) 100%);
-  border: 2px solid var(--accent);
-  border-radius: 12px;
-  box-shadow: var(--accent-glow), 0 0 40px rgba(255,102,0,0.3);
-  min-width: 400px;
-  max-width: 600px;
-  max-height: 80vh;
-  overflow-y: auto;
-  z-index: 1001;
-}
-
-.modal-header {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  padding: 16px 20px;
-  border-bottom: 1px solid var(--accent);
-  background: rgba(255,102,0,0.1);
-}
-
-.modal-header h3 {
-  margin: 0;
-  font-size: 1.2rem;
-  color: var(--accent);
-}
-
-.close-btn {
-  background: transparent;
-  border: none;
-  color: var(--accent);
-  font-size: 1.5rem;
-  cursor: pointer;
-  padding: 0;
-  width: 30px;
-  height: 30px;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  border-radius: 4px;
-}
-
-.close-btn:hover {
-  background: rgba(255,102,0,0.2);
-  box-shadow: 0 0 8px var(--accent);
-}
-
-.modal-body {
-  padding: 20px;
-}
-
-.settings-section h4 {
-  margin: 0 0 8px 0;
-  color: var(--accent);
-  font-size: 1rem;
-}
-
-.settings-desc {
-  margin: 0 0 16px 0;
-  color: var(--text-fade);
-  font-size: 0.85rem;
-}
-
-.radio-group {
-  display: flex;
-  flex-direction: column;
-  gap: 12px;
-}
-
-.radio-label {
-  display: flex;
-  flex-direction: column;
-  padding: 12px;
-  border: 1px solid rgba(255,102,0,0.3);
-  border-radius: 6px;
-  background: rgba(255,102,0,0.05);
-  cursor: pointer;
-  transition: all 0.2s;
-}
-
-.radio-label:hover {
-  border-color: var(--accent);
-  background: rgba(255,102,0,0.1);
-  box-shadow: 0 0 8px rgba(255,102,0,0.3);
-}
-
-.radio-label input[type="radio"] {
-  margin-right: 8px;
-  accent-color: var(--accent);
-}
-
-.radio-label span {
-  font-weight: 500;
-  margin-bottom: 4px;
-}
-
-.radio-label small {
-  color: var(--text-fade);
-  font-size: 0.8rem;
-  margin-left: 24px;
-}
-
-.radio-label input[type="text"] {
-  margin-top: 8px;
-  margin-left: 24px;
-  padding: 6px;
-  background: rgba(0,0,0,0.3);
-  border: 1px solid rgba(255,102,0,0.5);
-  border-radius: 4px;
-  color: var(--text-main);
-  font-family: var(--font-console);
-}
-
-.radio-label input[type="text"]:focus {
-  outline: none;
-  border-color: var(--accent);
-  box-shadow: 0 0 8px rgba(255,102,0,0.3);
-}
-
-.modal-footer {
-  display: flex;
-  justify-content: flex-end;
-  gap: 10px;
-  padding: 16px 20px;
-  border-top: 1px solid var(--accent);
-  background: rgba(255,102,0,0.05);
-}
-
-.primary-btn {
-  background: var(--accent);
-  color: #000;
-  font-weight: bold;
-}
-
-.primary-btn:hover {
-  background: #ff7a33;
-  box-shadow: var(--accent-glow);
-}
-
-/* Session List */
-.session-list {
-  display: flex;
-  flex-direction: column;
-  gap: 8px;
-  max-height: 300px;
-  overflow-y: auto;
-}
-
-.session-item {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  padding: 12px;
-  border: 1px solid rgba(255,102,0,0.3);
-  border-radius: 6px;
-  background: rgba(255,102,0,0.05);
-  transition: all 0.2s;
-}
-
-.session-item:hover {
-  border-color: var(--accent);
-  background: rgba(255,102,0,0.1);
-}
-
-.session-info {
-  display: flex;
-  flex-direction: column;
-  gap: 4px;
-  flex: 1;
-}
-
-.session-info strong {
-  color: var(--text-main);
-  font-size: 0.95rem;
-}
-
-.session-info small {
-  color: var(--text-fade);
-  font-size: 0.75rem;
-}
-
-.session-delete-btn {
-  background: transparent;
-  border: 1px solid rgba(255,102,0,0.5);
-  color: var(--accent);
-  padding: 6px 10px;
-  border-radius: 4px;
-  cursor: pointer;
-  font-size: 1rem;
-  transition: all 0.2s;
-}
-
-.session-delete-btn:hover {
-  background: rgba(255,0,0,0.2);
-  border-color: #ff3333;
-  color: #ff3333;
-  box-shadow: 0 0 8px rgba(255,0,0,0.3);
-}
-
-/* Thinking Stream Panel */
-.thinking-panel {
-  border-top: 1px solid var(--accent);
-  background: rgba(255, 102, 0, 0.02);
-  display: flex;
-  flex-direction: column;
-  transition: max-height 0.3s ease;
-  max-height: 300px;
-}
-
-.thinking-panel.collapsed {
-  max-height: 40px;
-}
-
-.thinking-header {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  padding: 10px 12px;
-  background: rgba(255, 102, 0, 0.08);
-  cursor: pointer;
-  user-select: none;
-  border-bottom: 1px solid rgba(255, 102, 0, 0.2);
-  font-size: 0.9rem;
-  font-weight: 500;
-}
-
-.thinking-header:hover {
-  background: rgba(255, 102, 0, 0.12);
-}
-
-.thinking-controls {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-}
-
-.thinking-status-dot {
-  width: 8px;
-  height: 8px;
-  border-radius: 50%;
-  background: #666;
-  display: inline-block;
-}
-
-.thinking-status-dot.connected {
-  background: #00ff66;
-  box-shadow: 0 0 8px #00ff66;
-}
-
-.thinking-status-dot.disconnected {
-  background: #ff3333;
-}
-
-.thinking-clear-btn,
-.thinking-toggle-btn {
-  background: transparent;
-  border: 1px solid rgba(255, 102, 0, 0.5);
-  color: var(--text-main);
-  padding: 4px 8px;
-  border-radius: 4px;
-  cursor: pointer;
-  font-size: 0.85rem;
-}
-
-.thinking-clear-btn:hover,
-.thinking-toggle-btn:hover {
-  background: rgba(255, 102, 0, 0.2);
-  box-shadow: 0 0 6px rgba(255, 102, 0, 0.3);
-}
-
-.thinking-toggle-btn {
-  transition: transform 0.3s ease;
-}
-
-.thinking-panel.collapsed .thinking-toggle-btn {
-  transform: rotate(-90deg);
-}
-
-.thinking-content {
-  flex: 1;
-  overflow-y: auto;
-  padding: 12px;
-  display: flex;
-  flex-direction: column;
-  gap: 8px;
-  min-height: 0;
-}
-
-.thinking-panel.collapsed .thinking-content {
-  display: none;
-}
-
-.thinking-empty {
-  text-align: center;
-  padding: 40px 20px;
-  color: var(--text-fade);
-  font-size: 0.85rem;
-}
-
-.thinking-empty-icon {
-  font-size: 2rem;
-  margin-bottom: 10px;
-}
-
-.thinking-event {
-  padding: 8px 12px;
-  border-radius: 6px;
-  font-size: 0.85rem;
-  font-family: 'Courier New', monospace;
-  animation: thinkingSlideIn 0.3s ease-out;
-  border-left: 3px solid;
-  word-wrap: break-word;
-}
-
-@keyframes thinkingSlideIn {
-  from {
-    opacity: 0;
-    transform: translateY(-10px);
-  }
-  to {
-    opacity: 1;
-    transform: translateY(0);
-  }
-}
-
-.thinking-event-connected {
-  background: rgba(0, 255, 102, 0.1);
-  border-color: #00ff66;
-  color: #00ff66;
-}
-
-.thinking-event-thinking {
-  background: rgba(138, 43, 226, 0.1);
-  border-color: #8a2be2;
-  color: #c79cff;
-}
-
-.thinking-event-tool_call {
-  background: rgba(255, 165, 0, 0.1);
-  border-color: #ffa500;
-  color: #ffb84d;
-}
-
-.thinking-event-tool_result {
-  background: rgba(0, 191, 255, 0.1);
-  border-color: #00bfff;
-  color: #7dd3fc;
-}
-
-.thinking-event-done {
-  background: rgba(168, 85, 247, 0.1);
-  border-color: #a855f7;
-  color: #e9d5ff;
-  font-weight: bold;
-}
-
-.thinking-event-error {
-  background: rgba(255, 51, 51, 0.1);
-  border-color: #ff3333;
-  color: #fca5a5;
-}
-
-.thinking-event-icon {
-  display: inline-block;
-  margin-right: 8px;
-}
-
-.thinking-event-details {
-  font-size: 0.75rem;
-  color: var(--text-fade);
-  margin-top: 4px;
-  padding-left: 20px;
-  white-space: pre-wrap;
-  max-height: 100px;
-  overflow-y: auto;
-}
-
-/* ========== MOBILE RESPONSIVE STYLES ========== */
-
-/* Hamburger Menu */
-.hamburger-menu {
-  display: none;
-  flex-direction: column;
-  gap: 4px;
-  cursor: pointer;
-  padding: 8px;
-  border: 1px solid var(--accent);
-  border-radius: 4px;
-  background: transparent;
-  z-index: 100;
-}
-
-.hamburger-menu span {
-  width: 20px;
-  height: 2px;
-  background: var(--accent);
-  transition: all 0.3s;
-  display: block;
-}
-
-.hamburger-menu.active span:nth-child(1) {
-  transform: rotate(45deg) translate(5px, 5px);
-}
-
-.hamburger-menu.active span:nth-child(2) {
-  opacity: 0;
-}
-
-.hamburger-menu.active span:nth-child(3) {
-  transform: rotate(-45deg) translate(5px, -5px);
-}
-
-/* Mobile Menu Container */
-.mobile-menu {
-  display: none;
-  position: fixed;
-  top: 0;
-  left: -100%;
-  width: 280px;
-  height: 100vh;
-  background: var(--bg-dark);
-  border-right: 2px solid var(--accent);
-  box-shadow: var(--accent-glow);
-  z-index: 999;
-  transition: left 0.3s ease;
-  overflow-y: auto;
-  padding: 20px;
-  flex-direction: column;
-  gap: 16px;
-}
-
-.mobile-menu.open {
-  left: 0;
-}
-
-.mobile-menu-overlay {
-  display: none;
-  position: fixed;
-  top: 0;
-  left: 0;
-  width: 100%;
-  height: 100%;
-  background: rgba(0, 0, 0, 0.7);
-  z-index: 998;
-}
-
-.mobile-menu-overlay.show {
-  display: block;
-}
-
-.mobile-menu-section {
-  display: flex;
-  flex-direction: column;
-  gap: 8px;
-  padding-bottom: 16px;
-  border-bottom: 1px solid rgba(255, 102, 0, 0.3);
-}
-
-.mobile-menu-section:last-child {
-  border-bottom: none;
-}
-
-.mobile-menu-section h4 {
-  margin: 0;
-  color: var(--accent);
-  font-size: 0.9rem;
-  text-transform: uppercase;
-  letter-spacing: 1px;
-}
-
-.mobile-menu button,
-.mobile-menu select {
-  width: 100%;
-  padding: 10px;
-  font-size: 0.95rem;
-  text-align: left;
-}
-
-/* Mobile Breakpoints */
-@media screen and (max-width: 768px) {
-  body {
-    padding: 0;
-  }
-
-  #chat {
-    width: 100%;
-    max-width: 100%;
-    height: 100vh;
-    border-radius: 0;
-    border-left: none;
-    border-right: none;
-  }
-
-  /* Show hamburger, hide desktop header controls */
-  .hamburger-menu {
-    display: flex;
-  }
-
-  #model-select {
-    padding: 12px;
-    justify-content: space-between;
-  }
-
-  /* Hide all controls except hamburger on mobile */
-  #model-select > *:not(.hamburger-menu) {
-    display: none;
-  }
-
-  #session-select {
-    display: none;
-  }
-
-  /* Show mobile menu */
-  .mobile-menu {
-    display: flex;
-  }
-
-  /* Messages - more width on mobile */
-  .msg {
-    max-width: 90%;
-    font-size: 0.95rem;
-  }
-
-  /* Status bar */
-  #status {
-    padding: 10px 12px;
-    font-size: 0.85rem;
-  }
-
-  /* Input area - bigger touch targets */
-  #input {
-    padding: 12px;
-  }
-
-  #userInput {
-    font-size: 16px; /* Prevents zoom on iOS */
-    padding: 12px;
-  }
-
-  #sendBtn {
-    padding: 12px 16px;
-    font-size: 1rem;
-  }
-
-  /* Modal - full width on mobile */
-  .modal-content {
-    width: 95%;
-    min-width: unset;
-    max-width: unset;
-    max-height: 90vh;
-    top: 50%;
-    left: 50%;
-    transform: translate(-50%, -50%);
-  }
-
-  .modal-header {
-    padding: 12px 16px;
-  }
-
-  .modal-body {
-    padding: 16px;
-  }
-
-  .modal-footer {
-    padding: 12px 16px;
-    flex-wrap: wrap;
-  }
-
-  .modal-footer button {
-    flex: 1;
-    min-width: 120px;
-  }
-
-  /* Radio labels - stack better on mobile */
-  .radio-label {
-    padding: 10px;
-  }
-
-  .radio-label small {
-    margin-left: 20px;
-    font-size: 0.75rem;
-  }
-
-  /* Session list */
-  .session-item {
-    padding: 10px;
-  }
-
-  .session-info strong {
-    font-size: 0.9rem;
-  }
-
-  .session-info small {
-    font-size: 0.7rem;
-  }
-
-  /* Settings button in header */
-  #settingsBtn {
-    padding: 8px 12px;
-  }
-
-  /* Thinking panel adjustments for mobile */
-  .thinking-panel {
-    max-height: 250px;
-  }
-
-  .thinking-panel.collapsed {
-    max-height: 38px;
-  }
-
-  .thinking-header {
-    padding: 8px 10px;
-    font-size: 0.85rem;
-  }
-
-  .thinking-event {
-    font-size: 0.8rem;
-    padding: 6px 10px;
-  }
-
-  .thinking-event-details {
-    font-size: 0.7rem;
-    max-height: 80px;
-  }
-}
-
-/* Extra small devices (phones in portrait) */
-@media screen and (max-width: 480px) {
-  .mobile-menu {
-    width: 240px;
-  }
-
-  .msg {
-    max-width: 95%;
-    font-size: 0.9rem;
-    padding: 8px 12px;
-  }
-
-  #userInput {
-    font-size: 16px;
-    padding: 10px;
-  }
-
-  #sendBtn {
-    padding: 10px 14px;
-    font-size: 0.95rem;
-  }
-
-  .modal-header h3 {
-    font-size: 1.1rem;
-  }
-
-  .settings-section h4 {
-    font-size: 0.95rem;
-  }
-
-  .radio-label span {
-    font-size: 0.9rem;
-  }
-}
-
-/* Tablet landscape and desktop */
-@media screen and (min-width: 769px) {
-  /* Ensure mobile menu is hidden on desktop */
-  .mobile-menu,
-  .mobile-menu-overlay {
-    display: none !important;
-  }
-
-  .hamburger-menu {
-    display: none !important;
-  }
-}
@@ -1,362 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>🧠 Thinking Stream</title>
-    <style>
-        * {
-            margin: 0;
-            padding: 0;
-            box-sizing: border-box;
-        }
-
-        body {
-            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-            background: #0d0d0d;
-            color: #e0e0e0;
-            height: 100vh;
-            display: flex;
-            flex-direction: column;
-            overflow: hidden;
-        }
-
-        .header {
-            background: #1a1a1a;
-            padding: 15px 20px;
-            border-bottom: 2px solid #333;
-            display: flex;
-            align-items: center;
-            justify-content: space-between;
-        }
-
-        .header h1 {
-            font-size: 18px;
-            font-weight: bold;
-        }
-
-        .status {
-            display: flex;
-            align-items: center;
-            gap: 10px;
-            font-size: 14px;
-        }
-
-        .status-dot {
-            width: 10px;
-            height: 10px;
-            border-radius: 50%;
-            background: #666;
-        }
-
-        .status-dot.connected {
-            background: #90ee90;
-            box-shadow: 0 0 10px #90ee90;
-        }
-
-        .status-dot.disconnected {
-            background: #ff6b6b;
-        }
-
-        .events-container {
-            flex: 1;
-            overflow-y: auto;
-            padding: 20px;
-        }
-
-        .event {
-            margin-bottom: 12px;
-            padding: 10px 15px;
-            border-radius: 6px;
-            font-size: 14px;
-            font-family: 'Courier New', monospace;
-            animation: slideIn 0.3s ease-out;
-            border-left: 3px solid;
-        }
-
-        @keyframes slideIn {
-            from {
-                opacity: 0;
-                transform: translateX(-20px);
-            }
-            to {
-                opacity: 1;
-                transform: translateX(0);
-            }
-        }
-
-        .event-connected {
-            background: #1a2a1a;
-            border-color: #4a7c59;
-            color: #90ee90;
-        }
-
-        .event-thinking {
-            background: #1a3a1a;
-            border-color: #5a9c69;
-            color: #a0f0a0;
-        }
-
-        .event-tool_call {
-            background: #3a2a1a;
-            border-color: #d97706;
-            color: #fbbf24;
-        }
-
-        .event-tool_result {
-            background: #1a2a3a;
-            border-color: #0ea5e9;
-            color: #7dd3fc;
-        }
-
-        .event-done {
-            background: #2a1a3a;
-            border-color: #a855f7;
-            color: #e9d5ff;
-            font-weight: bold;
-        }
-
-        .event-error {
-            background: #3a1a1a;
-            border-color: #dc2626;
-            color: #fca5a5;
-        }
-
-        .event-icon {
-            display: inline-block;
-            margin-right: 8px;
-        }
-
-        .event-details {
-            font-size: 12px;
-            color: #999;
-            margin-top: 5px;
-            padding-left: 25px;
-        }
-
-        .footer {
-            background: #1a1a1a;
-            padding: 10px 20px;
-            border-top: 1px solid #333;
-            text-align: center;
-            font-size: 12px;
-            color: #666;
-        }
-
-        .clear-btn {
-            background: #333;
-            border: 1px solid #444;
-            color: #e0e0e0;
-            padding: 6px 12px;
-            border-radius: 4px;
-            cursor: pointer;
-            font-size: 12px;
-        }
-
-        .clear-btn:hover {
-            background: #444;
-        }
-
-        .empty-state {
-            text-align: center;
-            padding: 60px 20px;
-            color: #666;
-        }
-
-        .empty-state-icon {
-            font-size: 48px;
-            margin-bottom: 20px;
-        }
-    </style>
-</head>
-<body>
-    <div class="header">
-        <h1>🧠 Thinking Stream</h1>
-        <div class="status">
-            <div class="status-dot" id="statusDot"></div>
-            <span id="statusText">Connecting...</span>
-        </div>
-    </div>
-
-    <div class="events-container" id="events">
-        <div class="empty-state">
-            <div class="empty-state-icon">🤔</div>
-            <p>Waiting for thinking events...</p>
-            <p style="font-size: 12px; margin-top: 10px;">Events will appear here when Lyra uses tools</p>
-        </div>
-    </div>
-
-    <div class="footer">
-        <button class="clear-btn" onclick="clearEvents()">Clear Events</button>
-        <span style="margin: 0 20px;">|</span>
-        <span id="sessionInfo">Session: <span id="sessionId">-</span></span>
-    </div>
-
-    <script>
-        console.log('🧠 Thinking stream page loaded!');
-
-        // Get session ID from URL
-        const urlParams = new URLSearchParams(window.location.search);
-        const SESSION_ID = urlParams.get('session');
-        const CORTEX_BASE = "http://10.0.0.41:7081"; // Direct to cortex
-
-        console.log('Session ID:', SESSION_ID);
-        console.log('Cortex base:', CORTEX_BASE);
-
-        // Declare variables first
-        let eventSource = null;
-        let eventCount = 0;
-
-        if (!SESSION_ID) {
-            document.getElementById('events').innerHTML = `
-                <div class="empty-state">
-                    <div class="empty-state-icon">⚠️</div>
-                    <p>No session ID provided</p>
-                    <p style="font-size: 12px; margin-top: 10px;">Please open this from the main chat interface</p>
-                </div>
-            `;
-        } else {
-            document.getElementById('sessionId').textContent = SESSION_ID;
-            connectStream();
-        }
-
-        function connectStream() {
-            if (eventSource) {
-                eventSource.close();
-            }
-
-            const url = `${CORTEX_BASE}/stream/thinking/${SESSION_ID}`;
-            console.log('Connecting to:', url);
-
-            eventSource = new EventSource(url);
-
-            eventSource.onopen = () => {
-                console.log('EventSource onopen fired');
-                updateStatus(true, 'Connected');
-            };
-
-            eventSource.onmessage = (event) => {
-                console.log('Received message:', event.data);
-                try {
-                    const data = JSON.parse(event.data);
-                    // Update status to connected when first message arrives
-                    if (data.type === 'connected') {
-                        updateStatus(true, 'Connected');
-                    }
-                    addEvent(data);
-                } catch (e) {
-                    console.error('Failed to parse event:', e, event.data);
-                }
-            };
-
-            eventSource.onerror = (error) => {
-                console.error('Stream error:', error, 'readyState:', eventSource.readyState);
-                updateStatus(false, 'Disconnected');
-
-                // Try to reconnect after 2 seconds
-                setTimeout(() => {
-                    if (eventSource.readyState === EventSource.CLOSED) {
-                        console.log('Attempting to reconnect...');
-                        connectStream();
-                    }
-                }, 2000);
-            };
-        }
-
-        function updateStatus(connected, text) {
-            const dot = document.getElementById('statusDot');
-            const statusText = document.getElementById('statusText');
-
-            dot.className = 'status-dot ' + (connected ? 'connected' : 'disconnected');
-            statusText.textContent = text;
-        }
-
-        function addEvent(event) {
-            const container = document.getElementById('events');
-
-            // Remove empty state if present
-            if (eventCount === 0) {
-                container.innerHTML = '';
-            }
-
-            const eventDiv = document.createElement('div');
-            eventDiv.className = `event event-${event.type}`;
-
-            let icon = '';
-            let message = '';
-            let details = '';
-
-            switch (event.type) {
-                case 'connected':
-                    icon = '✓';
-                    message = 'Stream connected';
-                    details = `Session: ${event.session_id}`;
-                    break;
-
-                case 'thinking':
-                    icon = '🤔';
-                    message = event.data.message;
-                    break;
-
-                case 'tool_call':
-                    icon = '🔧';
-                    message = event.data.message;
-                    details = JSON.stringify(event.data.args, null, 2);
-                    break;
-
-                case 'tool_result':
-                    icon = '📊';
-                    message = event.data.message;
-                    if (event.data.result && event.data.result.stdout) {
-                        details = `stdout: ${event.data.result.stdout}`;
-                    }
-                    break;
-
-                case 'done':
-                    icon = '✅';
-                    message = event.data.message;
-                    details = event.data.final_answer;
-                    break;
-
-                case 'error':
-                    icon = '❌';
-                    message = event.data.message;
-                    break;
-
-                default:
-                    icon = '•';
-                    message = JSON.stringify(event.data);
-            }
-
-            eventDiv.innerHTML = `
-                <span class="event-icon">${icon}</span>
-                <span>${message}</span>
-                ${details ? `<div class="event-details">${details}</div>` : ''}
-            `;
-
-            container.appendChild(eventDiv);
-            container.scrollTop = container.scrollHeight;
-            eventCount++;
-        }
-
-        function clearEvents() {
-            const container = document.getElementById('events');
-            container.innerHTML = `
-                <div class="empty-state">
-                    <div class="empty-state-icon">🤔</div>
-                    <p>Waiting for thinking events...</p>
-                    <p style="font-size: 12px; margin-top: 10px;">Events will appear here when Lyra uses tools</p>
-                </div>
-            `;
-            eventCount = 0;
-        }
-
-        // Cleanup on page unload
-        window.addEventListener('beforeunload', () => {
-            if (eventSource) {
-                eventSource.close();
-            }
-        });
-    </script>
-</body>
-</html>
@@ -1,21 +0,0 @@
-# ====================================
-# 🧠 CORTEX OPERATIONAL CONFIG
-# ====================================
-# Cortex-specific parameters (all other config inherited from root .env)
-
-CORTEX_MODE=autonomous
-CORTEX_LOOP_INTERVAL=300
-CORTEX_REFLECTION_INTERVAL=86400
-CORTEX_LOG_LEVEL=debug
-NEOMEM_HEALTH_CHECK_INTERVAL=300
-
-# Reflection output configuration
-REFLECTION_NOTE_TARGET=trilium
-REFLECTION_NOTE_PATH=/app/logs/reflections.log
-
-# Memory retrieval tuning
-RELEVANCE_THRESHOLD=0.78
-
-# NOTE: LLM backend URLs, OPENAI_API_KEY, database credentials,
-# and service URLs are all inherited from root .env
-# Cortex uses LLM_PRIMARY (vLLM on MI50) by default
@@ -1,15 +0,0 @@
-FROM python:3.11-slim
-WORKDIR /app
-
-# Install docker CLI for code executor
-RUN apt-get update && apt-get install -y \
-    docker.io \
-    && rm -rf /var/lib/apt/lists/*
-
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-COPY . .
-EXPOSE 7081
-# NOTE: Running with single worker to maintain SESSIONS global state in Intake.
-# If scaling to multiple workers, migrate SESSIONS to Redis or shared storage.
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7081"]
@@ -1,249 +0,0 @@
-# 📐 Project Lyra — Cognitive Assembly Spec
-**Version:** 0.6.1  
-**Status:** Canonical reference  
-**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech
-
---
-
-## 1. High-Level Overview
-
-Lyra is composed of **four distinct cognitive layers**, plus I/O.
-
-Each layer has:
- a **responsibility**
- a **scope**
- clear **inputs / outputs**
- explicit **authority boundaries**
-
-No layer is allowed to “do everything.”
-
---
-
-## 2. Layer Definitions
-
-### 2.1 Autonomy / Self (NON-LLM)
-
-**What it is**
- Persistent identity
- Long-term state
- Mood, preferences, values
- Continuity across time
-
-**What it is NOT**
- Not a reasoning engine
- Not a planner
- Not a speaker
- Not creative
-
-**Implementation**
- Data + light logic
- JSON / Python objects
- No LLM calls
-
-**Lives at**
-```
-project-lyra/autonomy/self/
-```
-
-**Inputs**
- Events (user message received, response sent)
- Time / idle ticks (later)
-
-**Outputs**
- Self state snapshot
- Flags / preferences (e.g. verbosity, tone bias)
-
---
-
-### 2.2 Inner Monologue (LLM, PRIVATE)
-
-**What it is**
- Internal language-based thought
- Reflection
- Intent formation
- “What do I think about this?”
-
-**What it is NOT**
- Not final reasoning
- Not execution
- Not user-facing
-
-**Model**
- MythoMax
-
-**Lives at**
-```
-project-lyra/autonomy/monologue/
-```
-
-**Inputs**
- User message
- Self state snapshot
- Recent context summary
-
-**Outputs**
- Intent
- Tone guidance
- Depth guidance
- “Consult executive?” flag
-
-**Example Output**
-```json
-{
-  "intent": "technical_exploration",
-  "tone": "focused",
-  "depth": "deep",
-  "consult_executive": true
-}
-```
-
---
-
-### 2.3 Cortex (Reasoning & Execution)
-
-**What it is**
- Thinking pipeline
- Planning
- Tool selection
- Task execution
- Draft generation
-
-**What it is NOT**
- Not identity
- Not personality
- Not persistent self
-
-**Models**
- DeepSeek-R1 → Executive / Planner
- GPT-4o-mini → Executor / Drafter
-
-**Lives at**
-```
-project-lyra/cortex/
-```
-
-**Inputs**
- User message
- Inner Monologue output
- Memory / RAG / tools
-
-**Outputs**
- Draft response (content only)
- Metadata (sources, confidence, etc.)
-
---
-
-### 2.4 Persona / Speech (LLM, USER-FACING)
-
-**What it is**
- Voice
- Style
- Expression
- Social behavior
-
-**What it is NOT**
- Not planning
- Not deep reasoning
- Not decision-making
-
-**Model**
- MythoMax
-
-**Lives at**
-```
-project-lyra/core/persona/
-```
-
-**Inputs**
- Draft response (from Cortex)
- Tone + intent (from Inner Monologue)
- Persona configuration
-
-**Outputs**
- Final user-visible text
-
---
-
-## 3. Message Flow (Authoritative)
-
-### 3.1 Standard Message Path
-
-```
-User
-  ↓
-UI
-  ↓
-Relay
-  ↓
-Cortex
-  ↓
-Autonomy / Self (state snapshot)
-  ↓
-Inner Monologue (MythoMax)
-  ↓
-[ consult_executive? ]
-    ├─ Yes → DeepSeek-R1 (plan)
-    └─ No  → skip
-  ↓
-GPT-4o-mini (execute & draft)
-  ↓
-Persona (MythoMax)
-  ↓
-Relay
-  ↓
-UI
-  ↓
-User
-```
-
-### 3.2 Fast Path (No Thinking)
-
-```
-User → UI → Relay → Persona → Relay → UI
-```
-
---
-
-## 4. Authority Rules (Non-Negotiable)
-
- Self never calls an LLM
- Inner Monologue never speaks to the user
- Cortex never applies personality
- Persona never reasons or plans
- DeepSeek never writes final answers
- MythoMax never plans execution
-
---
-
-## 5. Folder Mapping
-
-```
-project-lyra/
-├── autonomy/
-│   ├── self/
-│   ├── monologue/
-│   └── executive/
-├── cortex/
-├── core/
-│   └── persona/
-├── relay/
-└── ui/
-```
-
---
-
-## 6. Current Status
-
- UI ✔
- Relay ✔
- Cortex ✔
- Persona ✔
- Autonomy ✔
- Inner Monologue ⚠ partially wired
- Executive gating ⚠ planned
-
---
-
-## 7. Next Decision
-
-Decide whether **Inner Monologue runs every message** or **only when triggered**.
@@ -1 +0,0 @@
-# Autonomy module for Lyra
@@ -1 +0,0 @@
-"""Autonomous action execution system."""
@@ -1,480 +0,0 @@
-"""
-Autonomous Action Manager - executes safe, self-initiated actions.
-"""
-
-import logging
-import json
-from typing import Dict, List, Any, Optional
-from datetime import datetime
-
-logger = logging.getLogger(__name__)
-
-
-class AutonomousActionManager:
-    """
-    Manages safe autonomous actions that Lyra can take without explicit user prompting.
-
-    Whitelist of allowed actions:
-    - create_memory: Store information in NeoMem
-    - update_goal: Modify goal status
-    - schedule_reminder: Create future reminder
-    - summarize_session: Generate conversation summary
-    - learn_topic: Add topic to learning queue
-    - update_focus: Change current focus area
-    """
-
-    def __init__(self):
-        """Initialize action manager with whitelisted actions."""
-        self.allowed_actions = {
-            "create_memory": self._create_memory,
-            "update_goal": self._update_goal,
-            "schedule_reminder": self._schedule_reminder,
-            "summarize_session": self._summarize_session,
-            "learn_topic": self._learn_topic,
-            "update_focus": self._update_focus
-        }
-
-        self.action_log = []  # Track all actions for audit
-
-    async def execute_action(
-        self,
-        action_type: str,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Execute a single autonomous action.
-
-        Args:
-            action_type: Type of action (must be in whitelist)
-            parameters: Action-specific parameters
-            context: Current context state
-
-        Returns:
-            {
-                "success": bool,
-                "action": action_type,
-                "result": action_result,
-                "timestamp": ISO timestamp,
-                "error": optional error message
-            }
-        """
-        # Safety check: action must be whitelisted
-        if action_type not in self.allowed_actions:
-            logger.error(f"[ACTIONS] Attempted to execute non-whitelisted action: {action_type}")
-            return {
-                "success": False,
-                "action": action_type,
-                "error": f"Action '{action_type}' not in whitelist",
-                "timestamp": datetime.utcnow().isoformat()
-            }
-
-        try:
-            logger.info(f"[ACTIONS] Executing autonomous action: {action_type}")
-
-            # Execute the action
-            action_func = self.allowed_actions[action_type]
-            result = await action_func(parameters, context)
-
-            # Log successful action
-            action_record = {
-                "success": True,
-                "action": action_type,
-                "result": result,
-                "timestamp": datetime.utcnow().isoformat(),
-                "parameters": parameters
-            }
-
-            self.action_log.append(action_record)
-            logger.info(f"[ACTIONS] Action {action_type} completed successfully")
-
-            return action_record
-
-        except Exception as e:
-            logger.error(f"[ACTIONS] Action {action_type} failed: {e}")
-
-            error_record = {
-                "success": False,
-                "action": action_type,
-                "error": str(e),
-                "timestamp": datetime.utcnow().isoformat(),
-                "parameters": parameters
-            }
-
-            self.action_log.append(error_record)
-            return error_record
-
-    async def execute_batch(
-        self,
-        actions: List[Dict[str, Any]],
-        context: Dict[str, Any]
-    ) -> List[Dict[str, Any]]:
-        """
-        Execute multiple actions sequentially.
-
-        Args:
-            actions: List of {"action": str, "parameters": dict}
-            context: Current context state
-
-        Returns:
-            List of action results
-        """
-        results = []
-
-        for action_spec in actions:
-            action_type = action_spec.get("action")
-            parameters = action_spec.get("parameters", {})
-
-            result = await self.execute_action(action_type, parameters, context)
-            results.append(result)
-
-            # Stop on first failure if critical
-            if not result["success"] and action_spec.get("critical", False):
-                logger.warning(f"[ACTIONS] Critical action {action_type} failed, stopping batch")
-                break
-
-        return results
-
-    # ========================================
-    # Whitelisted Action Implementations
-    # ========================================
-
-    async def _create_memory(
-        self,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Create a memory entry in NeoMem.
-
-        Parameters:
-            - text: Memory content (required)
-            - tags: Optional tags for memory
-            - importance: 0.0-1.0 importance score
-        """
-        text = parameters.get("text")
-        if not text:
-            raise ValueError("Memory text required")
-
-        tags = parameters.get("tags", [])
-        importance = parameters.get("importance", 0.5)
-        session_id = context.get("session_id", "autonomous")
-
-        # Import NeoMem client
-        try:
-            from memory.neomem_client import store_memory
-
-            result = await store_memory(
-                text=text,
-                session_id=session_id,
-                tags=tags,
-                importance=importance
-            )
-
-            return {
-                "memory_id": result.get("id"),
-                "text": text[:50] + "..." if len(text) > 50 else text
-            }
-
-        except ImportError:
-            logger.warning("[ACTIONS] NeoMem client not available, simulating memory storage")
-            return {
-                "memory_id": "simulated",
-                "text": text[:50] + "..." if len(text) > 50 else text,
-                "note": "NeoMem not available, memory not persisted"
-            }
-
-    async def _update_goal(
-        self,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Update goal status in self-state.
-
-        Parameters:
-            - goal_id: Goal identifier (required)
-            - status: New status (pending/in_progress/completed)
-            - progress: Optional progress note
-        """
-        goal_id = parameters.get("goal_id")
-        if not goal_id:
-            raise ValueError("goal_id required")
-
-        status = parameters.get("status", "in_progress")
-        progress = parameters.get("progress")
-
-        # Import self-state manager
-        from autonomy.self.state import get_self_state_instance
-
-        state = get_self_state_instance()
-        active_goals = state._state.get("active_goals", [])
-
-        # Find and update goal
-        updated = False
-        for goal in active_goals:
-            if isinstance(goal, dict) and goal.get("id") == goal_id:
-                goal["status"] = status
-                if progress:
-                    goal["progress"] = progress
-                goal["updated_at"] = datetime.utcnow().isoformat()
-                updated = True
-                break
-
-        if updated:
-            state._save_state()
-            return {
-                "goal_id": goal_id,
-                "status": status,
-                "updated": True
-            }
-        else:
-            return {
-                "goal_id": goal_id,
-                "updated": False,
-                "note": "Goal not found"
-            }
-
-    async def _schedule_reminder(
-        self,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Schedule a future reminder.
-
-        Parameters:
-            - message: Reminder text (required)
-            - delay_minutes: Minutes until reminder
-            - priority: 0.0-1.0 priority score
-        """
-        message = parameters.get("message")
-        if not message:
-            raise ValueError("Reminder message required")
-
-        delay_minutes = parameters.get("delay_minutes", 60)
-        priority = parameters.get("priority", 0.5)
-
-        # For now, store in self-state's learning queue
-        # In future: integrate with scheduler/cron system
-        from autonomy.self.state import get_self_state_instance
-
-        state = get_self_state_instance()
-
-        reminder = {
-            "type": "reminder",
-            "message": message,
-            "scheduled_at": datetime.utcnow().isoformat(),
-            "trigger_at_minutes": delay_minutes,
-            "priority": priority
-        }
-
-        # Add to learning queue as placeholder
-        state._state.setdefault("reminders", []).append(reminder)
-        state._save_state(state._state)  # Pass state dict as argument
-
-        logger.info(f"[ACTIONS] Reminder scheduled: {message} (in {delay_minutes}min)")
-
-        return {
-            "message": message,
-            "delay_minutes": delay_minutes,
-            "note": "Reminder stored in self-state (scheduler integration pending)"
-        }
-
-    async def _summarize_session(
-        self,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Generate a summary of current session.
-
-        Parameters:
-            - max_length: Max summary length in words
-            - focus_topics: Optional list of topics to emphasize
-        """
-        max_length = parameters.get("max_length", 200)
-        session_id = context.get("session_id", "unknown")
-
-        # Import summarizer (from deferred_summary or create simple one)
-        try:
-            from utils.deferred_summary import summarize_conversation
-
-            summary = await summarize_conversation(
-                session_id=session_id,
-                max_words=max_length
-            )
-
-            return {
-                "summary": summary,
-                "word_count": len(summary.split())
-            }
-
-        except ImportError:
-            # Fallback: simple summary
-            message_count = context.get("message_count", 0)
-            focus = context.get("monologue", {}).get("intent", "general")
-
-            summary = f"Session {session_id}: {message_count} messages exchanged, focused on {focus}."
-
-            return {
-                "summary": summary,
-                "word_count": len(summary.split()),
-                "note": "Simple summary (full summarizer not available)"
-            }
-
-    async def _learn_topic(
-        self,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Add topic to learning queue.
-
-        Parameters:
-            - topic: Topic name (required)
-            - reason: Why this topic
-            - priority: 0.0-1.0 priority score
-        """
-        topic = parameters.get("topic")
-        if not topic:
-            raise ValueError("Topic required")
-
-        reason = parameters.get("reason", "autonomous learning")
-        priority = parameters.get("priority", 0.5)
-
-        # Import self-state manager
-        from autonomy.self.state import get_self_state_instance
-
-        state = get_self_state_instance()
-        state.add_learning_goal(topic)  # Only pass topic parameter
-
-        logger.info(f"[ACTIONS] Added to learning queue: {topic} (reason: {reason})")
-
-        return {
-            "topic": topic,
-            "reason": reason,
-            "queue_position": len(state._state.get("learning_queue", []))
-        }
-
-    async def _update_focus(
-        self,
-        parameters: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Update current focus area.
-
-        Parameters:
-            - focus: New focus area (required)
-            - reason: Why this focus
-        """
-        focus = parameters.get("focus")
-        if not focus:
-            raise ValueError("Focus required")
-
-        reason = parameters.get("reason", "autonomous update")
-
-        # Import self-state manager
-        from autonomy.self.state import get_self_state_instance
-
-        state = get_self_state_instance()
-        old_focus = state._state.get("focus", "none")
-
-        state._state["focus"] = focus
-        state._state["focus_updated_at"] = datetime.utcnow().isoformat()
-        state._state["focus_reason"] = reason
-        state._save_state(state._state)  # Pass state dict as argument
-
-        logger.info(f"[ACTIONS] Focus updated: {old_focus} -> {focus}")
-
-        return {
-            "old_focus": old_focus,
-            "new_focus": focus,
-            "reason": reason
-        }
-
-    # ========================================
-    # Utility Methods
-    # ========================================
-
-    def get_allowed_actions(self) -> List[str]:
-        """Get list of all allowed action types."""
-        return list(self.allowed_actions.keys())
-
-    def get_action_log(self, limit: int = 50) -> List[Dict[str, Any]]:
-        """
-        Get recent action log.
-
-        Args:
-            limit: Max number of entries to return
-
-        Returns:
-            List of action records
-        """
-        return self.action_log[-limit:]
-
-    def clear_action_log(self) -> None:
-        """Clear action log."""
-        self.action_log = []
-        logger.info("[ACTIONS] Action log cleared")
-
-    def validate_action(self, action_type: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Validate an action without executing it.
-
-        Args:
-            action_type: Type of action
-            parameters: Action parameters
-
-        Returns:
-            {
-                "valid": bool,
-                "action": action_type,
-                "errors": [error messages] or []
-            }
-        """
-        errors = []
-
-        # Check whitelist
-        if action_type not in self.allowed_actions:
-            errors.append(f"Action '{action_type}' not in whitelist")
-
-        # Check required parameters (basic validation)
-        if action_type == "create_memory" and not parameters.get("text"):
-            errors.append("Memory 'text' parameter required")
-
-        if action_type == "update_goal" and not parameters.get("goal_id"):
-            errors.append("Goal 'goal_id' parameter required")
-
-        if action_type == "schedule_reminder" and not parameters.get("message"):
-            errors.append("Reminder 'message' parameter required")
-
-        if action_type == "learn_topic" and not parameters.get("topic"):
-            errors.append("Learning 'topic' parameter required")
-
-        if action_type == "update_focus" and not parameters.get("focus"):
-            errors.append("Focus 'focus' parameter required")
-
-        return {
-            "valid": len(errors) == 0,
-            "action": action_type,
-            "errors": errors
-        }
-
-
-# Singleton instance
-_action_manager_instance = None
-
-
-def get_action_manager() -> AutonomousActionManager:
-    """
-    Get singleton action manager instance.
-
-    Returns:
-        AutonomousActionManager instance
-    """
-    global _action_manager_instance
-    if _action_manager_instance is None:
-        _action_manager_instance = AutonomousActionManager()
-    return _action_manager_instance
@@ -1 +0,0 @@
-"""Executive planning and decision-making module."""
@@ -1,121 +0,0 @@
-"""
-Executive planner - generates execution plans for complex requests.
-Activated when inner monologue sets consult_executive=true.
-"""
-
-import os
-import logging
-from typing import Dict, Any, Optional
-from llm.llm_router import call_llm
-
-EXECUTIVE_LLM = os.getenv("EXECUTIVE_LLM", "CLOUD").upper()
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-
-logger = logging.getLogger(__name__)
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-
-
-EXECUTIVE_SYSTEM_PROMPT = """
-You are Lyra's executive planning system.
-You create structured execution plans for complex tasks.
-You do NOT generate the final response - only the plan.
-
-Your plan should include:
-1. Task decomposition (break into steps)
-2. Required tools/resources
-3. Reasoning strategy
-4. Success criteria
-
-Return a concise plan in natural language.
-"""
-
-
-async def plan_execution(
-    user_prompt: str,
-    intent: str,
-    context_state: Dict[str, Any],
-    identity_block: Dict[str, Any]
-) -> Dict[str, Any]:
-    """
-    Generate execution plan for complex request.
-
-    Args:
-        user_prompt: User's message
-        intent: Detected intent from inner monologue
-        context_state: Full context
-        identity_block: Lyra's identity
-
-    Returns:
-        Plan dictionary with structure:
-        {
-            "summary": "One-line plan summary",
-            "plan_text": "Detailed plan",
-            "steps": ["step1", "step2", ...],
-            "tools_needed": ["RAG", "WEB", ...],
-            "estimated_complexity": "low | medium | high"
-        }
-    """
-
-    # Build planning prompt
-    tools_available = context_state.get("tools_available", [])
-
-    prompt = f"""{EXECUTIVE_SYSTEM_PROMPT}
-
-User request: {user_prompt}
-
-Detected intent: {intent}
-
-Available tools: {", ".join(tools_available) if tools_available else "None"}
-
-Session context:
- Message count: {context_state.get('message_count', 0)}
- Time since last message: {context_state.get('minutes_since_last_msg', 0):.1f} minutes
- Active project: {context_state.get('active_project', 'None')}
-
-Generate a structured execution plan.
-"""
-
-    if VERBOSE_DEBUG:
-        logger.debug(f"[EXECUTIVE] Planning prompt:\n{prompt}")
-
-    # Call executive LLM
-    plan_text = await call_llm(
-        prompt,
-        backend=EXECUTIVE_LLM,
-        temperature=0.3,  # Lower temperature for planning
-        max_tokens=500
-    )
-
-    if VERBOSE_DEBUG:
-        logger.debug(f"[EXECUTIVE] Generated plan:\n{plan_text}")
-
-    # Parse plan (simple heuristic extraction for Phase 1)
-    steps = []
-    tools_needed = []
-
-    for line in plan_text.split('\n'):
-        line_lower = line.lower()
-        if any(marker in line_lower for marker in ['step', '1.', '2.', '3.', '-']):
-            steps.append(line.strip())
-
-        if tools_available:
-            for tool in tools_available:
-                if tool.lower() in line_lower and tool not in tools_needed:
-                    tools_needed.append(tool)
-
-    # Estimate complexity (simple heuristic)
-    complexity = "low"
-    if len(steps) > 3 or len(tools_needed) > 1:
-        complexity = "medium"
-    if len(steps) > 5 or "research" in intent.lower() or "analyze" in intent.lower():
-        complexity = "high"
-
-    return {
-        "summary": plan_text.split('\n')[0][:100] if plan_text else "Complex task execution plan",
-        "plan_text": plan_text,
-        "steps": steps[:10],  # Limit to 10 steps
-        "tools_needed": tools_needed,
-        "estimated_complexity": complexity
-    }
@@ -1 +0,0 @@
-"""Pattern learning and adaptation system."""
@@ -1,383 +0,0 @@
-"""
-Pattern Learning System - learns from interaction patterns to improve autonomy.
-"""
-
-import logging
-import json
-import os
-from typing import Dict, List, Any, Optional
-from datetime import datetime
-from collections import defaultdict
-
-logger = logging.getLogger(__name__)
-
-
-class PatternLearner:
-    """
-    Learns from interaction patterns to improve Lyra's autonomous behavior.
-
-    Tracks:
-    - Topic frequencies (what users talk about)
-    - Time-of-day patterns (when users interact)
-    - User preferences (how users like responses)
-    - Successful response strategies (what works well)
-    """
-
-    def __init__(self, patterns_file: str = "/app/data/learned_patterns.json"):
-        """
-        Initialize pattern learner.
-
-        Args:
-            patterns_file: Path to persistent patterns storage
-        """
-        self.patterns_file = patterns_file
-        self.patterns = self._load_patterns()
-
-    def _load_patterns(self) -> Dict[str, Any]:
-        """Load patterns from disk."""
-        if os.path.exists(self.patterns_file):
-            try:
-                with open(self.patterns_file, 'r') as f:
-                    patterns = json.load(f)
-                    logger.info(f"[PATTERN_LEARNER] Loaded patterns from {self.patterns_file}")
-                    return patterns
-            except Exception as e:
-                logger.error(f"[PATTERN_LEARNER] Failed to load patterns: {e}")
-
-        # Initialize empty patterns
-        return {
-            "topic_frequencies": {},
-            "time_patterns": {},
-            "user_preferences": {},
-            "successful_strategies": {},
-            "interaction_count": 0,
-            "last_updated": datetime.utcnow().isoformat()
-        }
-
-    def _save_patterns(self) -> None:
-        """Save patterns to disk."""
-        try:
-            # Ensure directory exists
-            os.makedirs(os.path.dirname(self.patterns_file), exist_ok=True)
-
-            self.patterns["last_updated"] = datetime.utcnow().isoformat()
-
-            with open(self.patterns_file, 'w') as f:
-                json.dump(self.patterns, f, indent=2)
-
-            logger.debug(f"[PATTERN_LEARNER] Saved patterns to {self.patterns_file}")
-
-        except Exception as e:
-            logger.error(f"[PATTERN_LEARNER] Failed to save patterns: {e}")
-
-    async def learn_from_interaction(
-        self,
-        user_prompt: str,
-        response: str,
-        monologue: Dict[str, Any],
-        context: Dict[str, Any]
-    ) -> None:
-        """
-        Learn from a single interaction.
-
-        Args:
-            user_prompt: User's message
-            response: Lyra's response
-            monologue: Inner monologue analysis
-            context: Full context state
-        """
-        self.patterns["interaction_count"] += 1
-
-        # Learn topic frequencies
-        self._learn_topics(user_prompt, monologue)
-
-        # Learn time patterns
-        self._learn_time_patterns()
-
-        # Learn user preferences
-        self._learn_preferences(monologue, context)
-
-        # Learn successful strategies
-        self._learn_strategies(monologue, response, context)
-
-        # Save periodically (every 10 interactions)
-        if self.patterns["interaction_count"] % 10 == 0:
-            self._save_patterns()
-
-    def _learn_topics(self, user_prompt: str, monologue: Dict[str, Any]) -> None:
-        """Track topic frequencies."""
-        intent = monologue.get("intent", "unknown")
-
-        # Increment topic counter
-        topic_freq = self.patterns["topic_frequencies"]
-        topic_freq[intent] = topic_freq.get(intent, 0) + 1
-
-        # Extract keywords (simple approach - words > 5 chars)
-        keywords = [word.lower() for word in user_prompt.split() if len(word) > 5]
-
-        for keyword in keywords:
-            topic_freq[f"keyword:{keyword}"] = topic_freq.get(f"keyword:{keyword}", 0) + 1
-
-        logger.debug(f"[PATTERN_LEARNER] Topic learned: {intent}")
-
-    def _learn_time_patterns(self) -> None:
-        """Track time-of-day patterns."""
-        now = datetime.utcnow()
-        hour = now.hour
-
-        # Track interactions by hour
-        time_patterns = self.patterns["time_patterns"]
-        hour_key = f"hour_{hour:02d}"
-        time_patterns[hour_key] = time_patterns.get(hour_key, 0) + 1
-
-        # Track day of week
-        day_key = f"day_{now.strftime('%A').lower()}"
-        time_patterns[day_key] = time_patterns.get(day_key, 0) + 1
-
-    def _learn_preferences(self, monologue: Dict[str, Any], context: Dict[str, Any]) -> None:
-        """Learn user preferences from detected tone and depth."""
-        tone = monologue.get("tone", "neutral")
-        depth = monologue.get("depth", "medium")
-
-        prefs = self.patterns["user_preferences"]
-
-        # Track preferred tone
-        prefs.setdefault("tone_counts", {})
-        prefs["tone_counts"][tone] = prefs["tone_counts"].get(tone, 0) + 1
-
-        # Track preferred depth
-        prefs.setdefault("depth_counts", {})
-        prefs["depth_counts"][depth] = prefs["depth_counts"].get(depth, 0) + 1
-
-    def _learn_strategies(
-        self,
-        monologue: Dict[str, Any],
-        response: str,
-        context: Dict[str, Any]
-    ) -> None:
-        """
-        Learn which response strategies are successful.
-
-        Success indicators:
-        - Executive was consulted and plan generated
-        - Response length matches depth request
-        - Tone matches request
-        """
-        intent = monologue.get("intent", "unknown")
-        executive_used = context.get("executive_plan") is not None
-
-        strategies = self.patterns["successful_strategies"]
-        strategies.setdefault(intent, {})
-
-        # Track executive usage for this intent
-        if executive_used:
-            key = f"{intent}:executive_used"
-            strategies.setdefault(key, 0)
-            strategies[key] += 1
-
-        # Track response length patterns
-        response_length = len(response.split())
-        depth = monologue.get("depth", "medium")
-
-        length_key = f"{depth}:avg_words"
-        if length_key not in strategies:
-            strategies[length_key] = response_length
-        else:
-            # Running average
-            strategies[length_key] = (strategies[length_key] + response_length) / 2
-
-    # ========================================
-    # Pattern Analysis and Recommendations
-    # ========================================
-
-    def get_top_topics(self, limit: int = 10) -> List[tuple]:
-        """
-        Get most frequent topics.
-
-        Args:
-            limit: Max number of topics to return
-
-        Returns:
-            List of (topic, count) tuples, sorted by count
-        """
-        topics = self.patterns["topic_frequencies"]
-        sorted_topics = sorted(topics.items(), key=lambda x: x[1], reverse=True)
-        return sorted_topics[:limit]
-
-    def get_preferred_tone(self) -> str:
-        """
-        Get user's most preferred tone.
-
-        Returns:
-            Preferred tone string
-        """
-        prefs = self.patterns["user_preferences"]
-        tone_counts = prefs.get("tone_counts", {})
-
-        if not tone_counts:
-            return "neutral"
-
-        return max(tone_counts.items(), key=lambda x: x[1])[0]
-
-    def get_preferred_depth(self) -> str:
-        """
-        Get user's most preferred response depth.
-
-        Returns:
-            Preferred depth string
-        """
-        prefs = self.patterns["user_preferences"]
-        depth_counts = prefs.get("depth_counts", {})
-
-        if not depth_counts:
-            return "medium"
-
-        return max(depth_counts.items(), key=lambda x: x[1])[0]
-
-    def get_peak_hours(self, limit: int = 3) -> List[int]:
-        """
-        Get peak interaction hours.
-
-        Args:
-            limit: Number of top hours to return
-
-        Returns:
-            List of hours (0-23)
-        """
-        time_patterns = self.patterns["time_patterns"]
-        hour_counts = {k: v for k, v in time_patterns.items() if k.startswith("hour_")}
-
-        if not hour_counts:
-            return []
-
-        sorted_hours = sorted(hour_counts.items(), key=lambda x: x[1], reverse=True)
-        top_hours = sorted_hours[:limit]
-
-        # Extract hour numbers
-        return [int(h[0].split("_")[1]) for h in top_hours]
-
-    def should_use_executive(self, intent: str) -> bool:
-        """
-        Recommend whether to use executive for given intent based on patterns.
-
-        Args:
-            intent: Intent type
-
-        Returns:
-            True if executive is recommended
-        """
-        strategies = self.patterns["successful_strategies"]
-        key = f"{intent}:executive_used"
-
-        # If we've used executive for this intent >= 3 times, recommend it
-        return strategies.get(key, 0) >= 3
-
-    def get_recommended_response_length(self, depth: str) -> int:
-        """
-        Get recommended response length in words for given depth.
-
-        Args:
-            depth: Depth level (short/medium/deep)
-
-        Returns:
-            Recommended word count
-        """
-        strategies = self.patterns["successful_strategies"]
-        key = f"{depth}:avg_words"
-
-        avg_length = strategies.get(key, None)
-
-        if avg_length:
-            return int(avg_length)
-
-        # Defaults if no pattern learned
-        defaults = {
-            "short": 50,
-            "medium": 150,
-            "deep": 300
-        }
-
-        return defaults.get(depth, 150)
-
-    def get_insights(self) -> Dict[str, Any]:
-        """
-        Get high-level insights from learned patterns.
-
-        Returns:
-            {
-                "total_interactions": int,
-                "top_topics": [(topic, count), ...],
-                "preferred_tone": str,
-                "preferred_depth": str,
-                "peak_hours": [hours],
-                "learning_recommendations": [str]
-            }
-        """
-        recommendations = []
-
-        # Check if user consistently prefers certain settings
-        preferred_tone = self.get_preferred_tone()
-        preferred_depth = self.get_preferred_depth()
-
-        if preferred_tone != "neutral":
-            recommendations.append(f"User prefers {preferred_tone} tone")
-
-        if preferred_depth != "medium":
-            recommendations.append(f"User prefers {preferred_depth} depth responses")
-
-        # Check for recurring topics
-        top_topics = self.get_top_topics(limit=3)
-        if top_topics:
-            top_topic = top_topics[0][0]
-            recommendations.append(f"Consider adding '{top_topic}' to learning queue")
-
-        return {
-            "total_interactions": self.patterns["interaction_count"],
-            "top_topics": self.get_top_topics(limit=5),
-            "preferred_tone": preferred_tone,
-            "preferred_depth": preferred_depth,
-            "peak_hours": self.get_peak_hours(limit=3),
-            "learning_recommendations": recommendations
-        }
-
-    def reset_patterns(self) -> None:
-        """Reset all learned patterns (use with caution)."""
-        self.patterns = {
-            "topic_frequencies": {},
-            "time_patterns": {},
-            "user_preferences": {},
-            "successful_strategies": {},
-            "interaction_count": 0,
-            "last_updated": datetime.utcnow().isoformat()
-        }
-        self._save_patterns()
-        logger.warning("[PATTERN_LEARNER] Patterns reset")
-
-    def export_patterns(self) -> Dict[str, Any]:
-        """
-        Export all patterns for analysis.
-
-        Returns:
-            Complete patterns dict
-        """
-        return self.patterns.copy()
-
-
-# Singleton instance
-_learner_instance = None
-
-
-def get_pattern_learner(patterns_file: str = "/app/data/learned_patterns.json") -> PatternLearner:
-    """
-    Get singleton pattern learner instance.
-
-    Args:
-        patterns_file: Path to patterns file (only used on first call)
-
-    Returns:
-        PatternLearner instance
-    """
-    global _learner_instance
-    if _learner_instance is None:
-        _learner_instance = PatternLearner(patterns_file=patterns_file)
-    return _learner_instance
@@ -1 +0,0 @@
-# Inner monologue module
@@ -1,115 +0,0 @@
-import os
-import json
-import logging
-from typing import Dict
-from llm.llm_router import call_llm
-
-# Configuration
-MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper()
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-
-# Logger
-logger = logging.getLogger(__name__)
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter(
-        '%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s',
-        datefmt='%H:%M:%S'
-    ))
-    logger.addHandler(console_handler)
-
-MONOLOGUE_SYSTEM_PROMPT = """
-You are Lyra's inner monologue.
-You think privately.
-You do NOT speak to the user.
-You do NOT solve the task.
-You only reflect on intent, tone, and depth.
-
-Return ONLY valid JSON with:
- intent (string)
- tone (neutral | warm | focused | playful | direct)
- depth (short | medium | deep)
- consult_executive (true | false)
-"""
-
-class InnerMonologue:
-    async def process(self, context: Dict) -> Dict:
-        # Build full prompt with system instructions merged in
-        full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT}
-
-User message:
-{context['user_message']}
-
-Self state:
-{context['self_state']}
-
-Context summary:
-{context['context_summary']}
-
-Output JSON only:
-"""
-
-        # Call LLM using configured backend
-        if VERBOSE_DEBUG:
-            logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}")
-            logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars")
-
-        result = await call_llm(
-            full_prompt,
-            backend=MONOLOGUE_LLM,
-            temperature=0.7,
-            max_tokens=200
-        )
-
-        if VERBOSE_DEBUG:
-            logger.debug(f"[InnerMonologue] Raw LLM response:")
-            logger.debug(f"{'='*80}")
-            logger.debug(result)
-            logger.debug(f"{'='*80}")
-            logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars")
-
-        # Parse JSON response - extract just the JSON part if there's extra text
-        try:
-            # Try direct parsing first
-            parsed = json.loads(result)
-            if VERBOSE_DEBUG:
-                logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}")
-            return parsed
-        except json.JSONDecodeError:
-            # If direct parsing fails, try to extract JSON from the response
-            if VERBOSE_DEBUG:
-                logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...")
-
-            # Look for JSON object (starts with { and ends with })
-            import re
-            json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL)
-
-            if json_match:
-                json_str = json_match.group(0)
-                try:
-                    parsed = json.loads(json_str)
-                    if VERBOSE_DEBUG:
-                        logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}")
-                    return parsed
-                except json.JSONDecodeError as e:
-                    if VERBOSE_DEBUG:
-                        logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}")
-            else:
-                if VERBOSE_DEBUG:
-                    logger.warning(f"[InnerMonologue] No JSON object found in response")
-
-            # Final fallback
-            if VERBOSE_DEBUG:
-                logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback")
-            else:
-                print(f"[InnerMonologue] JSON extraction failed")
-                print(f"[InnerMonologue] Raw response was: {result[:500]}")
-
-            return {
-                "intent": "unknown",
-                "tone": "neutral",
-                "depth": "medium",
-                "consult_executive": False
-            }
@@ -1 +0,0 @@
-"""Proactive monitoring and suggestion system."""
@@ -1,321 +0,0 @@
-"""
-Proactive Context Monitor - detects opportunities for autonomous suggestions.
-"""
-
-import logging
-import time
-from typing import Dict, List, Any, Optional
-from datetime import datetime, timedelta
-
-logger = logging.getLogger(__name__)
-
-
-class ProactiveMonitor:
-    """
-    Monitors conversation context and detects opportunities for proactive suggestions.
-
-    Triggers:
-    - Long silence → Check-in
-    - Learning queue + high curiosity → Suggest exploration
-    - Active goals → Progress reminders
-    - Conversation milestones → Offer summary
-    - Pattern detection → Helpful suggestions
-    """
-
-    def __init__(self, min_priority: float = 0.6):
-        """
-        Initialize proactive monitor.
-
-        Args:
-            min_priority: Minimum priority for suggestions (0.0-1.0)
-        """
-        self.min_priority = min_priority
-        self.last_suggestion_time = {}  # session_id -> timestamp
-        self.cooldown_seconds = 300  # 5 minutes between proactive suggestions
-
-    async def analyze_session(
-        self,
-        session_id: str,
-        context_state: Dict[str, Any],
-        self_state: Dict[str, Any]
-    ) -> Optional[Dict[str, Any]]:
-        """
-        Analyze session for proactive suggestion opportunities.
-
-        Args:
-            session_id: Current session ID
-            context_state: Full context including message history
-            self_state: Lyra's current self-state
-
-        Returns:
-            {
-                "suggestion": "text to append to response",
-                "priority": 0.0-1.0,
-                "reason": "why this suggestion",
-                "type": "check_in | learning | goal_reminder | summary | pattern"
-            }
-            or None if no suggestion
-        """
-        # Check cooldown
-        if not self._check_cooldown(session_id):
-            logger.debug(f"[PROACTIVE] Session {session_id} in cooldown, skipping")
-            return None
-
-        suggestions = []
-
-        # Check 1: Long silence detection
-        silence_suggestion = self._check_long_silence(context_state)
-        if silence_suggestion:
-            suggestions.append(silence_suggestion)
-
-        # Check 2: Learning queue + high curiosity
-        learning_suggestion = self._check_learning_opportunity(self_state)
-        if learning_suggestion:
-            suggestions.append(learning_suggestion)
-
-        # Check 3: Active goals reminder
-        goal_suggestion = self._check_active_goals(self_state, context_state)
-        if goal_suggestion:
-            suggestions.append(goal_suggestion)
-
-        # Check 4: Conversation milestones
-        milestone_suggestion = self._check_conversation_milestone(context_state)
-        if milestone_suggestion:
-            suggestions.append(milestone_suggestion)
-
-        # Check 5: Pattern-based suggestions
-        pattern_suggestion = self._check_patterns(context_state, self_state)
-        if pattern_suggestion:
-            suggestions.append(pattern_suggestion)
-
-        # Filter by priority and return highest
-        valid_suggestions = [s for s in suggestions if s["priority"] >= self.min_priority]
-
-        if not valid_suggestions:
-            return None
-
-        # Return highest priority suggestion
-        best_suggestion = max(valid_suggestions, key=lambda x: x["priority"])
-
-        # Update cooldown timer
-        self._update_cooldown(session_id)
-
-        logger.info(f"[PROACTIVE] Suggestion generated: {best_suggestion['type']} (priority: {best_suggestion['priority']:.2f})")
-
-        return best_suggestion
-
-    def _check_cooldown(self, session_id: str) -> bool:
-        """Check if session is past cooldown period."""
-        if session_id not in self.last_suggestion_time:
-            return True
-
-        elapsed = time.time() - self.last_suggestion_time[session_id]
-        return elapsed >= self.cooldown_seconds
-
-    def _update_cooldown(self, session_id: str) -> None:
-        """Update cooldown timer for session."""
-        self.last_suggestion_time[session_id] = time.time()
-
-    def _check_long_silence(self, context_state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        """
-        Check if user has been silent for a long time.
-        """
-        minutes_since_last = context_state.get("minutes_since_last_msg", 0)
-
-        # If > 30 minutes, suggest check-in
-        if minutes_since_last > 30:
-            return {
-                "suggestion": "\n\n[Aside: I'm still here if you need anything!]",
-                "priority": 0.7,
-                "reason": f"User silent for {minutes_since_last:.0f} minutes",
-                "type": "check_in"
-            }
-
-        return None
-
-    def _check_learning_opportunity(self, self_state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        """
-        Check if Lyra has learning queue items and high curiosity.
-        """
-        learning_queue = self_state.get("learning_queue", [])
-        curiosity = self_state.get("curiosity", 0.5)
-
-        # If curiosity > 0.7 and learning queue exists
-        if curiosity > 0.7 and learning_queue:
-            topic = learning_queue[0] if learning_queue else "new topics"
-            return {
-                "suggestion": f"\n\n[Aside: I've been curious about {topic} lately. Would you like to explore it together?]",
-                "priority": 0.65,
-                "reason": f"High curiosity ({curiosity:.2f}) and learning queue present",
-                "type": "learning"
-            }
-
-        return None
-
-    def _check_active_goals(
-        self,
-        self_state: Dict[str, Any],
-        context_state: Dict[str, Any]
-    ) -> Optional[Dict[str, Any]]:
-        """
-        Check if there are active goals worth reminding about.
-        """
-        active_goals = self_state.get("active_goals", [])
-
-        if not active_goals:
-            return None
-
-        # Check if we've had multiple messages without goal progress
-        message_count = context_state.get("message_count", 0)
-
-        # Every 10 messages, consider goal reminder
-        if message_count % 10 == 0 and message_count > 0:
-            goal = active_goals[0]  # First active goal
-            goal_name = goal if isinstance(goal, str) else goal.get("name", "your goal")
-
-            return {
-                "suggestion": f"\n\n[Aside: Still thinking about {goal_name}. Let me know if you want to work on it.]",
-                "priority": 0.6,
-                "reason": f"Active goal present, {message_count} messages since start",
-                "type": "goal_reminder"
-            }
-
-        return None
-
-    def _check_conversation_milestone(self, context_state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        """
-        Check for conversation milestones (e.g., every 50 messages).
-        """
-        message_count = context_state.get("message_count", 0)
-
-        # Every 50 messages, offer summary
-        if message_count > 0 and message_count % 50 == 0:
-            return {
-                "suggestion": f"\n\n[Aside: We've exchanged {message_count} messages! Would you like a summary of our conversation?]",
-                "priority": 0.65,
-                "reason": f"Milestone: {message_count} messages",
-                "type": "summary"
-            }
-
-        return None
-
-    def _check_patterns(
-        self,
-        context_state: Dict[str, Any],
-        self_state: Dict[str, Any]
-    ) -> Optional[Dict[str, Any]]:
-        """
-        Check for behavioral patterns that merit suggestions.
-        """
-        # Get current focus
-        focus = self_state.get("focus", "")
-
-        # Check if user keeps asking similar questions (detected via focus)
-        if focus and "repeated" in focus.lower():
-            return {
-                "suggestion": "\n\n[Aside: I notice we keep coming back to this topic. Would it help to create a summary or action plan?]",
-                "priority": 0.7,
-                "reason": "Repeated topic detected",
-                "type": "pattern"
-            }
-
-        # Check energy levels - if Lyra is low energy, maybe suggest break
-        energy = self_state.get("energy", 0.8)
-        if energy < 0.3:
-            return {
-                "suggestion": "\n\n[Aside: We've been at this for a while. Need a break or want to keep going?]",
-                "priority": 0.65,
-                "reason": f"Low energy ({energy:.2f})",
-                "type": "pattern"
-            }
-
-        return None
-
-    def format_suggestion(self, suggestion: Dict[str, Any]) -> str:
-        """
-        Format suggestion for appending to response.
-
-        Args:
-            suggestion: Suggestion dict from analyze_session()
-
-        Returns:
-            Formatted string to append to response
-        """
-        return suggestion.get("suggestion", "")
-
-    def set_cooldown_duration(self, seconds: int) -> None:
-        """
-        Update cooldown duration.
-
-        Args:
-            seconds: New cooldown duration
-        """
-        self.cooldown_seconds = seconds
-        logger.info(f"[PROACTIVE] Cooldown updated to {seconds}s")
-
-    def reset_cooldown(self, session_id: str) -> None:
-        """
-        Reset cooldown for a specific session.
-
-        Args:
-            session_id: Session to reset
-        """
-        if session_id in self.last_suggestion_time:
-            del self.last_suggestion_time[session_id]
-            logger.info(f"[PROACTIVE] Cooldown reset for session {session_id}")
-
-    def get_session_stats(self, session_id: str) -> Dict[str, Any]:
-        """
-        Get stats for a session's proactive monitoring.
-
-        Args:
-            session_id: Session to check
-
-        Returns:
-            {
-                "last_suggestion_time": timestamp or None,
-                "seconds_since_last": int,
-                "cooldown_active": bool,
-                "cooldown_remaining": int
-            }
-        """
-        last_time = self.last_suggestion_time.get(session_id)
-
-        if not last_time:
-            return {
-                "last_suggestion_time": None,
-                "seconds_since_last": 0,
-                "cooldown_active": False,
-                "cooldown_remaining": 0
-            }
-
-        seconds_since = int(time.time() - last_time)
-        cooldown_active = seconds_since < self.cooldown_seconds
-        cooldown_remaining = max(0, self.cooldown_seconds - seconds_since)
-
-        return {
-            "last_suggestion_time": last_time,
-            "seconds_since_last": seconds_since,
-            "cooldown_active": cooldown_active,
-            "cooldown_remaining": cooldown_remaining
-        }
-
-
-# Singleton instance
-_monitor_instance = None
-
-
-def get_proactive_monitor(min_priority: float = 0.6) -> ProactiveMonitor:
-    """
-    Get singleton proactive monitor instance.
-
-    Args:
-        min_priority: Minimum priority threshold (only used on first call)
-
-    Returns:
-        ProactiveMonitor instance
-    """
-    global _monitor_instance
-    if _monitor_instance is None:
-        _monitor_instance = ProactiveMonitor(min_priority=min_priority)
-    return _monitor_instance
@@ -1 +0,0 @@
-# Self state module
@@ -1,74 +0,0 @@
-"""
-Analyze interactions and update self-state accordingly.
-"""
-
-import logging
-from typing import Dict, Any
-from .state import update_self_state
-
-logger = logging.getLogger(__name__)
-
-
-async def analyze_and_update_state(
-    monologue: Dict[str, Any],
-    user_prompt: str,
-    response: str,
-    context: Dict[str, Any]
-) -> None:
-    """
-    Analyze interaction and update self-state.
-
-    This runs after response generation to update Lyra's internal state
-    based on the interaction.
-
-    Args:
-        monologue: Inner monologue output
-        user_prompt: User's message
-        response: Lyra's response
-        context: Full context state
-    """
-
-    # Simple heuristics for state updates
-    # TODO: Replace with LLM-based sentiment analysis in Phase 2
-
-    mood_delta = 0.0
-    energy_delta = 0.0
-    confidence_delta = 0.0
-    curiosity_delta = 0.0
-    new_focus = None
-
-    # Analyze intent from monologue
-    intent = monologue.get("intent", "").lower() if monologue else ""
-
-    if "technical" in intent or "complex" in intent:
-        energy_delta = -0.05  # Deep thinking is tiring
-        confidence_delta = 0.05 if len(response) > 200 else -0.05
-        new_focus = "technical_problem"
-
-    elif "creative" in intent or "brainstorm" in intent:
-        mood_delta = 0.1  # Creative work is engaging
-        curiosity_delta = 0.1
-        new_focus = "creative_exploration"
-
-    elif "clarification" in intent or "confused" in intent:
-        confidence_delta = -0.05
-        new_focus = "understanding_user"
-
-    elif "simple" in intent or "casual" in intent:
-        energy_delta = 0.05  # Light conversation is refreshing
-        new_focus = "conversation"
-
-    # Check for learning opportunities (questions in user prompt)
-    if "?" in user_prompt and any(word in user_prompt.lower() for word in ["how", "why", "what"]):
-        curiosity_delta += 0.05
-
-    # Update state
-    update_self_state(
-        mood_delta=mood_delta,
-        energy_delta=energy_delta,
-        new_focus=new_focus,
-        confidence_delta=confidence_delta,
-        curiosity_delta=curiosity_delta
-    )
-
-    logger.info(f"Self-state updated based on interaction: focus={new_focus}")
@@ -1,189 +0,0 @@
-"""
-Self-state management for Project Lyra.
-Maintains persistent identity, mood, energy, and focus across sessions.
-"""
-
-import json
-import logging
-import os
-from datetime import datetime
-from pathlib import Path
-from typing import Dict, Any, Optional
-
-# Configuration
-STATE_FILE = Path(os.getenv("SELF_STATE_FILE", "/app/data/self_state.json"))
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-
-logger = logging.getLogger(__name__)
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-
-# Default state structure
-DEFAULT_STATE = {
-    "mood": "neutral",
-    "energy": 0.8,
-    "focus": "user_request",
-    "confidence": 0.7,
-    "curiosity": 0.5,
-    "last_updated": None,
-    "interaction_count": 0,
-    "learning_queue": [],  # Topics Lyra wants to explore
-    "active_goals": [],  # Self-directed goals
-    "preferences": {
-        "verbosity": "medium",
-        "formality": "casual",
-        "proactivity": 0.3  # How likely to suggest things unprompted
-    },
-    "metadata": {
-        "version": "1.0",
-        "created_at": None
-    }
-}
-
-
-class SelfState:
-    """Manages Lyra's persistent self-state."""
-
-    def __init__(self):
-        self._state = self._load_state()
-
-    def _load_state(self) -> Dict[str, Any]:
-        """Load state from disk or create default."""
-        if STATE_FILE.exists():
-            try:
-                with open(STATE_FILE, 'r') as f:
-                    state = json.load(f)
-                    logger.info(f"Loaded self-state from {STATE_FILE}")
-                    return state
-            except Exception as e:
-                logger.error(f"Failed to load self-state: {e}")
-                return self._create_default_state()
-        else:
-            return self._create_default_state()
-
-    def _create_default_state(self) -> Dict[str, Any]:
-        """Create and save default state."""
-        state = DEFAULT_STATE.copy()
-        state["metadata"]["created_at"] = datetime.now().isoformat()
-        state["last_updated"] = datetime.now().isoformat()
-        self._save_state(state)
-        logger.info("Created new default self-state")
-        return state
-
-    def _save_state(self, state: Dict[str, Any]) -> None:
-        """Persist state to disk."""
-        try:
-            STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
-            with open(STATE_FILE, 'w') as f:
-                json.dump(state, f, indent=2)
-            if VERBOSE_DEBUG:
-                logger.debug(f"Saved self-state to {STATE_FILE}")
-        except Exception as e:
-            logger.error(f"Failed to save self-state: {e}")
-
-    def get_state(self) -> Dict[str, Any]:
-        """Get current state snapshot."""
-        return self._state.copy()
-
-    def update_from_interaction(
-        self,
-        mood_delta: float = 0.0,
-        energy_delta: float = 0.0,
-        new_focus: Optional[str] = None,
-        confidence_delta: float = 0.0,
-        curiosity_delta: float = 0.0
-    ) -> None:
-        """
-        Update state based on interaction.
-
-        Args:
-            mood_delta: Change in mood (-1.0 to 1.0)
-            energy_delta: Change in energy (-1.0 to 1.0)
-            new_focus: New focus area
-            confidence_delta: Change in confidence
-            curiosity_delta: Change in curiosity
-        """
-        # Apply deltas with bounds checking
-        self._state["energy"] = max(0.0, min(1.0,
-            self._state.get("energy", 0.8) + energy_delta))
-
-        self._state["confidence"] = max(0.0, min(1.0,
-            self._state.get("confidence", 0.7) + confidence_delta))
-
-        self._state["curiosity"] = max(0.0, min(1.0,
-            self._state.get("curiosity", 0.5) + curiosity_delta))
-
-        # Update focus if provided
-        if new_focus:
-            self._state["focus"] = new_focus
-
-        # Update mood (simplified sentiment)
-        if mood_delta != 0:
-            mood_map = ["frustrated", "neutral", "engaged", "excited"]
-            current_mood_idx = 1  # neutral default
-            if self._state.get("mood") in mood_map:
-                current_mood_idx = mood_map.index(self._state["mood"])
-
-            new_mood_idx = max(0, min(len(mood_map) - 1,
-                int(current_mood_idx + mood_delta * 2)))
-            self._state["mood"] = mood_map[new_mood_idx]
-
-        # Increment interaction counter
-        self._state["interaction_count"] = self._state.get("interaction_count", 0) + 1
-        self._state["last_updated"] = datetime.now().isoformat()
-
-        # Persist changes
-        self._save_state(self._state)
-
-        if VERBOSE_DEBUG:
-            logger.debug(f"Updated self-state: mood={self._state['mood']}, "
-                        f"energy={self._state['energy']:.2f}, "
-                        f"confidence={self._state['confidence']:.2f}")
-
-    def add_learning_goal(self, topic: str) -> None:
-        """Add topic to learning queue."""
-        queue = self._state.get("learning_queue", [])
-        if topic not in [item.get("topic") for item in queue]:
-            queue.append({
-                "topic": topic,
-                "added_at": datetime.now().isoformat(),
-                "priority": 0.5
-            })
-            self._state["learning_queue"] = queue
-            self._save_state(self._state)
-            logger.info(f"Added learning goal: {topic}")
-
-    def add_active_goal(self, goal: str, context: str = "") -> None:
-        """Add self-directed goal."""
-        goals = self._state.get("active_goals", [])
-        goals.append({
-            "goal": goal,
-            "context": context,
-            "created_at": datetime.now().isoformat(),
-            "status": "active"
-        })
-        self._state["active_goals"] = goals
-        self._save_state(self._state)
-        logger.info(f"Added active goal: {goal}")
-
-
-# Global instance
-_self_state_instance = None
-
-def get_self_state_instance() -> SelfState:
-    """Get or create global SelfState instance."""
-    global _self_state_instance
-    if _self_state_instance is None:
-        _self_state_instance = SelfState()
-    return _self_state_instance
-
-
-def load_self_state() -> Dict[str, Any]:
-    """Load self state - public API for backwards compatibility."""
-    return get_self_state_instance().get_state()
-
-
-def update_self_state(**kwargs) -> None:
-    """Update self state - public API."""
-    get_self_state_instance().update_from_interaction(**kwargs)
@@ -1 +0,0 @@
-"""Autonomous tool invocation system."""
@@ -1,13 +0,0 @@
-"""Provider adapters for tool calling."""
-
-from .base import ToolAdapter
-from .openai_adapter import OpenAIAdapter
-from .ollama_adapter import OllamaAdapter
-from .llamacpp_adapter import LlamaCppAdapter
-
-__all__ = [
-    "ToolAdapter",
-    "OpenAIAdapter",
-    "OllamaAdapter",
-    "LlamaCppAdapter",
-]
@@ -1,79 +0,0 @@
-"""
-Base adapter interface for provider-agnostic tool calling.
-
-This module defines the abstract base class that all LLM provider adapters
-must implement to support tool calling in Lyra.
-"""
-
-from abc import ABC, abstractmethod
-from typing import Dict, List, Optional
-
-
-class ToolAdapter(ABC):
-    """Base class for provider-specific tool adapters.
-
-    Each LLM provider (OpenAI, Ollama, llama.cpp, etc.) has its own
-    way of handling tool calls. This adapter pattern allows Lyra to
-    support tools across all providers with a unified interface.
-    """
-
-    @abstractmethod
-    async def prepare_request(
-        self,
-        messages: List[Dict],
-        tools: List[Dict],
-        tool_choice: Optional[str] = None
-    ) -> Dict:
-        """Convert Lyra tool definitions to provider-specific format.
-
-        Args:
-            messages: Conversation history in OpenAI format
-            tools: List of Lyra tool definitions (provider-agnostic)
-            tool_choice: Optional tool forcing ("auto", "required", "none")
-
-        Returns:
-            dict: Provider-specific request payload ready to send to LLM
-        """
-        pass
-
-    @abstractmethod
-    async def parse_response(self, response) -> Dict:
-        """Extract tool calls from provider response.
-
-        Args:
-            response: Raw provider response (format varies by provider)
-
-        Returns:
-            dict: Standardized response in Lyra format:
-                {
-                    "content": str,  # Assistant's text response
-                    "tool_calls": [  # List of tool calls or None
-                        {
-                            "id": str,       # Unique call ID
-                            "name": str,     # Tool name
-                            "arguments": dict  # Tool arguments
-                        }
-                    ] or None
-                }
-        """
-        pass
-
-    @abstractmethod
-    def format_tool_result(
-        self,
-        tool_call_id: str,
-        tool_name: str,
-        result: Dict
-    ) -> Dict:
-        """Format tool execution result for next LLM call.
-
-        Args:
-            tool_call_id: ID from the original tool call
-            tool_name: Name of the executed tool
-            result: Tool execution result dictionary
-
-        Returns:
-            dict: Message object to append to conversation
-                (format varies by provider)
-        """
-        pass
@@ -1,17 +0,0 @@
-"""
-llama.cpp adapter for tool calling.
-
-Since llama.cpp has similar constraints to Ollama (no native function calling),
-this adapter reuses the XML-based approach from OllamaAdapter.
-"""
-
-from .ollama_adapter import OllamaAdapter
-
-
-class LlamaCppAdapter(OllamaAdapter):
-    """llama.cpp adapter - uses same XML approach as Ollama.
-
-    llama.cpp doesn't have native function calling support, so we use
-    the same XML-based prompt engineering approach as Ollama.
-    """
-    pass
@@ -1,191 +0,0 @@
-"""
-Ollama adapter for tool calling using XML-structured prompts.
-
-Since Ollama doesn't have native function calling, this adapter uses
-XML-based prompts to instruct the model how to call tools.
-"""
-
-import json
-import re
-from typing import Dict, List, Optional
-from .base import ToolAdapter
-
-
-class OllamaAdapter(ToolAdapter):
-    """Ollama adapter using XML-structured prompts for tool calling.
-
-    This adapter injects tool descriptions into the system prompt and
-    teaches the model to respond with XML when it wants to use a tool.
-    """
-
-    SYSTEM_PROMPT = """You have access to the following tools:
-
-{tool_descriptions}
-
-To use a tool, respond with XML in this exact format:
-<tool_call>
-  <name>tool_name</name>
-  <arguments>
-    <arg_name>value</arg_name>
-  </arguments>
-  <reason>why you're using this tool</reason>
-</tool_call>
-
-You can call multiple tools by including multiple <tool_call> blocks.
-If you don't need to use any tools, respond normally without XML.
-After tools are executed, you'll receive results and can continue the conversation."""
-
-    async def prepare_request(
-        self,
-        messages: List[Dict],
-        tools: List[Dict],
-        tool_choice: Optional[str] = None
-    ) -> Dict:
-        """Inject tool descriptions into system prompt.
-
-        Args:
-            messages: Conversation history
-            tools: Lyra tool definitions
-            tool_choice: Ignored for Ollama (no native support)
-
-        Returns:
-            dict: Request payload with modified messages
-        """
-        # Format tool descriptions
-        tool_desc = "\n".join([
-            f"- {t['name']}: {t['description']}\n  Parameters: {self._format_parameters(t['parameters'], t.get('required', []))}"
-            for t in tools
-        ])
-
-        system_msg = self.SYSTEM_PROMPT.format(tool_descriptions=tool_desc)
-
-        # Check if first message is already a system message
-        modified_messages = messages.copy()
-        if modified_messages and modified_messages[0].get("role") == "system":
-            # Prepend tool instructions to existing system message
-            modified_messages[0]["content"] = system_msg + "\n\n" + modified_messages[0]["content"]
-        else:
-            # Add new system message at the beginning
-            modified_messages.insert(0, {"role": "system", "content": system_msg})
-
-        return {"messages": modified_messages}
-
-    def _format_parameters(self, parameters: Dict, required: List[str]) -> str:
-        """Format parameters for tool description.
-
-        Args:
-            parameters: Parameter definitions
-            required: List of required parameter names
-
-        Returns:
-            str: Human-readable parameter description
-        """
-        param_strs = []
-        for name, spec in parameters.items():
-            req_marker = "(required)" if name in required else "(optional)"
-            param_strs.append(f"{name} {req_marker}: {spec.get('description', '')}")
-        return ", ".join(param_strs)
-
-    async def parse_response(self, response) -> Dict:
-        """Extract tool calls from XML in response.
-
-        Args:
-            response: String response from Ollama
-
-        Returns:
-            dict: Standardized Lyra format with content and tool_calls
-        """
-        import logging
-        logger = logging.getLogger(__name__)
-
-        # Ollama returns a string
-        if isinstance(response, dict):
-            content = response.get("message", {}).get("content", "")
-        else:
-            content = str(response)
-
-        logger.info(f"🔍 OllamaAdapter.parse_response: content length={len(content)}, has <tool_call>={('<tool_call>' in content)}")
-        logger.debug(f"🔍 Content preview: {content[:500]}")
-
-        # Parse XML tool calls
-        tool_calls = []
-        if "<tool_call>" in content:
-            # Split content by <tool_call> to get each block
-            blocks = content.split('<tool_call>')
-            logger.info(f"🔍 Split into {len(blocks)} blocks")
-
-            # First block is content before any tool calls
-            clean_parts = [blocks[0]]
-
-            for idx, block in enumerate(blocks[1:]):  # Skip first block (pre-tool content)
-                # Extract tool name
-                name_match = re.search(r'<name>(.*?)</name>', block)
-                if not name_match:
-                    logger.warning(f"Block {idx} has no <name> tag, skipping")
-                    continue
-
-                name = name_match.group(1).strip()
-                arguments = {}
-
-                # Extract arguments
-                args_match = re.search(r'<arguments>(.*?)</arguments>', block, re.DOTALL)
-                if args_match:
-                    args_xml = args_match.group(1)
-                    # Parse <key>value</key> pairs
-                    arg_pairs = re.findall(r'<(\w+)>(.*?)</\1>', args_xml, re.DOTALL)
-                    arguments = {k: v.strip() for k, v in arg_pairs}
-
-                tool_calls.append({
-                    "id": f"call_{idx}",
-                    "name": name,
-                    "arguments": arguments
-                })
-
-                # For clean content, find what comes AFTER the tool call block
-                # Look for the last closing tag (</tool_call> or malformed </xxx>) and keep what's after
-                # Split by any closing tag at the END of the tool block
-                remaining = block
-                # Remove everything up to and including a standalone closing tag
-                # Pattern: find </something> that's not followed by more XML
-                end_match = re.search(r'</[a-z_]+>\s*(.*)$', remaining, re.DOTALL)
-                if end_match:
-                    after_content = end_match.group(1).strip()
-                    if after_content and not after_content.startswith('<'):
-                        # Only keep if it's actual text content, not more XML
-                        clean_parts.append(after_content)
-
-            clean_content = ''.join(clean_parts).strip()
-        else:
-            clean_content = content
-
-        return {
-            "content": clean_content,
-            "tool_calls": tool_calls if tool_calls else None
-        }
-
-    def format_tool_result(
-        self,
-        tool_call_id: str,
-        tool_name: str,
-        result: Dict
-    ) -> Dict:
-        """Format tool result as XML for next prompt.
-
-        Args:
-            tool_call_id: ID from the original tool call
-            tool_name: Name of the executed tool
-            result: Tool execution result
-
-        Returns:
-            dict: Message in user role with XML-formatted result
-        """
-        # Format result as XML
-        result_xml = f"""<tool_result>
-  <tool>{tool_name}</tool>
-  <result>{json.dumps(result, ensure_ascii=False)}</result>
-</tool_result>"""
-
-        return {
-            "role": "user",
-            "content": result_xml
-        }
@@ -1,130 +0,0 @@
-"""
-OpenAI adapter for tool calling using native function calling API.
-
-This adapter converts Lyra tool definitions to OpenAI's function calling
-format and parses OpenAI responses back to Lyra's standardized format.
-"""
-
-import json
-from typing import Dict, List, Optional
-from .base import ToolAdapter
-
-
-class OpenAIAdapter(ToolAdapter):
-    """OpenAI-specific adapter using native function calling.
-
-    OpenAI supports function calling natively through the 'tools' parameter
-    in chat completions. This adapter leverages that capability.
-    """
-
-    async def prepare_request(
-        self,
-        messages: List[Dict],
-        tools: List[Dict],
-        tool_choice: Optional[str] = None
-    ) -> Dict:
-        """Convert Lyra tools to OpenAI function calling format.
-
-        Args:
-            messages: Conversation history
-            tools: Lyra tool definitions
-            tool_choice: "auto", "required", "none", or None
-
-        Returns:
-            dict: Request payload with OpenAI-formatted tools
-        """
-        # Convert Lyra tools → OpenAI function calling format
-        openai_tools = []
-        for tool in tools:
-            openai_tools.append({
-                "type": "function",
-                "function": {
-                    "name": tool["name"],
-                    "description": tool["description"],
-                    "parameters": {
-                        "type": "object",
-                        "properties": tool["parameters"],
-                        "required": tool.get("required", [])
-                    }
-                }
-            })
-
-        payload = {
-            "messages": messages,
-            "tools": openai_tools
-        }
-
-        # Add tool_choice if specified
-        if tool_choice:
-            if tool_choice == "required":
-                payload["tool_choice"] = "required"
-            elif tool_choice == "none":
-                payload["tool_choice"] = "none"
-            else:  # "auto" or default
-                payload["tool_choice"] = "auto"
-
-        return payload
-
-    async def parse_response(self, response) -> Dict:
-        """Extract tool calls from OpenAI response.
-
-        Args:
-            response: OpenAI ChatCompletion response object
-
-        Returns:
-            dict: Standardized Lyra format with content and tool_calls
-        """
-        message = response.choices[0].message
-        content = message.content if message.content else ""
-        tool_calls = []
-
-        # Check if response contains tool calls
-        if hasattr(message, 'tool_calls') and message.tool_calls:
-            for tc in message.tool_calls:
-                try:
-                    # Parse arguments (may be JSON string)
-                    args = tc.function.arguments
-                    if isinstance(args, str):
-                        args = json.loads(args)
-
-                    tool_calls.append({
-                        "id": tc.id,
-                        "name": tc.function.name,
-                        "arguments": args
-                    })
-                except json.JSONDecodeError as e:
-                    # If arguments can't be parsed, include error
-                    tool_calls.append({
-                        "id": tc.id,
-                        "name": tc.function.name,
-                        "arguments": {},
-                        "error": f"Failed to parse arguments: {str(e)}"
-                    })
-
-        return {
-            "content": content,
-            "tool_calls": tool_calls if tool_calls else None
-        }
-
-    def format_tool_result(
-        self,
-        tool_call_id: str,
-        tool_name: str,
-        result: Dict
-    ) -> Dict:
-        """Format tool result as OpenAI tool message.
-
-        Args:
-            tool_call_id: ID from the original tool call
-            tool_name: Name of the executed tool
-            result: Tool execution result
-
-        Returns:
-            dict: Message in OpenAI tool message format
-        """
-        return {
-            "role": "tool",
-            "tool_call_id": tool_call_id,
-            "name": tool_name,
-            "content": json.dumps(result, ensure_ascii=False)
-        }
@@ -1,124 +0,0 @@
-"""
-Tool Decision Engine - decides which tools to invoke autonomously.
-"""
-
-import logging
-from typing import Dict, List, Any
-
-logger = logging.getLogger(__name__)
-
-
-class ToolDecisionEngine:
-    """Decides which tools to invoke based on context analysis."""
-
-    async def analyze_tool_needs(
-        self,
-        user_prompt: str,
-        monologue: Dict[str, Any],
-        context_state: Dict[str, Any],
-        available_tools: List[str]
-    ) -> Dict[str, Any]:
-        """
-        Analyze if tools should be invoked and which ones.
-
-        Args:
-            user_prompt: User's message
-            monologue: Inner monologue analysis
-            context_state: Full context
-            available_tools: List of available tools
-
-        Returns:
-            {
-                "should_invoke_tools": bool,
-                "tools_to_invoke": [
-                    {
-                        "tool": "RAG | WEB | WEATHER | etc",
-                        "query": "search query",
-                        "reason": "why this tool",
-                        "priority": 0.0-1.0
-                    },
-                    ...
-                ],
-                "confidence": 0.0-1.0
-            }
-        """
-
-        tools_to_invoke = []
-
-        # Check for memory/context needs
-        if any(word in user_prompt.lower() for word in [
-            "remember", "you said", "we discussed", "earlier", "before",
-            "last time", "previously", "what did"
-        ]):
-            tools_to_invoke.append({
-                "tool": "RAG",
-                "query": user_prompt,
-                "reason": "User references past conversation",
-                "priority": 0.9
-            })
-
-        # Check for web search needs
-        if any(word in user_prompt.lower() for word in [
-            "current", "latest", "news", "today", "what's happening",
-            "look up", "search for", "find information", "recent"
-        ]):
-            tools_to_invoke.append({
-                "tool": "WEB",
-                "query": user_prompt,
-                "reason": "Requires current information",
-                "priority": 0.8
-            })
-
-        # Check for weather needs
-        if any(word in user_prompt.lower() for word in [
-            "weather", "temperature", "forecast", "rain", "sunny", "climate"
-        ]):
-            tools_to_invoke.append({
-                "tool": "WEATHER",
-                "query": user_prompt,
-                "reason": "Weather information requested",
-                "priority": 0.95
-            })
-
-        # Check for code-related needs
-        if any(word in user_prompt.lower() for word in [
-            "code", "function", "debug", "implement", "algorithm",
-            "programming", "script", "syntax"
-        ]):
-            if "CODEBRAIN" in available_tools:
-                tools_to_invoke.append({
-                    "tool": "CODEBRAIN",
-                    "query": user_prompt,
-                    "reason": "Code-related task",
-                    "priority": 0.85
-                })
-
-        # Proactive RAG for complex queries (based on monologue)
-        intent = monologue.get("intent", "") if monologue else ""
-        if monologue and monologue.get("consult_executive"):
-            # Complex query - might benefit from context
-            if not any(t["tool"] == "RAG" for t in tools_to_invoke):
-                tools_to_invoke.append({
-                    "tool": "RAG",
-                    "query": user_prompt,
-                    "reason": "Complex query benefits from context",
-                    "priority": 0.6
-                })
-
-        # Sort by priority
-        tools_to_invoke.sort(key=lambda x: x["priority"], reverse=True)
-
-        max_priority = max([t["priority"] for t in tools_to_invoke]) if tools_to_invoke else 0.0
-
-        result = {
-            "should_invoke_tools": len(tools_to_invoke) > 0,
-            "tools_to_invoke": tools_to_invoke,
-            "confidence": max_priority
-        }
-
-        if tools_to_invoke:
-            logger.info(f"[TOOL_DECISION] Autonomous tool invocation recommended: {len(tools_to_invoke)} tools")
-            for tool in tools_to_invoke:
-                logger.info(f"  - {tool['tool']} (priority: {tool['priority']:.2f}): {tool['reason']}")
-
-        return result
@@ -1,12 +0,0 @@
-"""Tool executors for Lyra."""
-
-from .code_executor import execute_code
-from .web_search import search_web
-from .trilium import search_notes, create_note
-
-__all__ = [
-    "execute_code",
-    "search_web",
-    "search_notes",
-    "create_note",
-]
@@ -1,218 +0,0 @@
-"""
-Code executor for running Python and bash code in a sandbox container.
-
-This module provides secure code execution with timeout protection,
-output limits, and forbidden pattern detection.
-"""
-
-import asyncio
-import os
-import tempfile
-import re
-from typing import Dict
-import docker
-from docker.errors import (
-    DockerException,
-    APIError,
-    ContainerError,
-    ImageNotFound,
-    NotFound
-)
-
-
-# Forbidden patterns that pose security risks
-FORBIDDEN_PATTERNS = [
-    r'rm\s+-rf',  # Destructive file removal
-    r':\(\)\{\s*:\|:&\s*\};:',  # Fork bomb
-    r'mkfs',  # Filesystem formatting
-    r'/dev/sd[a-z]',  # Direct device access
-    r'dd\s+if=',  # Low-level disk operations
-    r'>\s*/dev/sd',  # Writing to devices
-    r'curl.*\|.*sh',  # Pipe to shell (common attack vector)
-    r'wget.*\|.*sh',  # Pipe to shell
-]
-
-
-async def execute_code(args: Dict) -> Dict:
-    """Execute code in sandbox container.
-
-    Args:
-        args: Dictionary containing:
-            - language (str): "python" or "bash"
-            - code (str): The code to execute
-            - reason (str): Why this code is being executed
-            - timeout (int, optional): Execution timeout in seconds
-
-    Returns:
-        dict: Execution result containing:
-            - stdout (str): Standard output
-            - stderr (str): Standard error
-            - exit_code (int): Process exit code
-            - execution_time (float): Time taken in seconds
-            OR
-            - error (str): Error message if execution failed
-    """
-    language = args.get("language")
-    code = args.get("code")
-    reason = args.get("reason", "No reason provided")
-    timeout = args.get("timeout", 30)
-
-    # Validation
-    if not language or language not in ["python", "bash"]:
-        return {"error": "Invalid language. Must be 'python' or 'bash'"}
-
-    if not code:
-        return {"error": "No code provided"}
-
-    # Security: Check for forbidden patterns
-    for pattern in FORBIDDEN_PATTERNS:
-        if re.search(pattern, code, re.IGNORECASE):
-            return {"error": f"Forbidden pattern detected for security reasons"}
-
-    # Validate and cap timeout
-    max_timeout = int(os.getenv("CODE_SANDBOX_MAX_TIMEOUT", "120"))
-    timeout = min(max(timeout, 1), max_timeout)
-
-    container = os.getenv("CODE_SANDBOX_CONTAINER", "lyra-code-sandbox")
-
-    # Validate container exists and is running
-    try:
-        docker_client = docker.from_env()
-        container_obj = docker_client.containers.get(container)
-
-        if container_obj.status != "running":
-            return {
-                "error": f"Sandbox container '{container}' is not running (status: {container_obj.status})",
-                "hint": "Start the container with: docker start " + container
-            }
-    except NotFound:
-        return {
-            "error": f"Sandbox container '{container}' not found",
-            "hint": "Ensure the container exists and is running"
-        }
-    except DockerException as e:
-        return {
-            "error": f"Docker daemon error: {str(e)}",
-            "hint": "Check Docker connectivity and permissions"
-        }
-
-    # Write code to temporary file
-    suffix = ".py" if language == "python" else ".sh"
-    try:
-        with tempfile.NamedTemporaryFile(
-            mode='w',
-            suffix=suffix,
-            delete=False,
-            encoding='utf-8'
-        ) as f:
-            f.write(code)
-            temp_file = f.name
-    except Exception as e:
-        return {"error": f"Failed to create temp file: {str(e)}"}
-
-    try:
-        # Copy file to container
-        exec_path = f"/executions/{os.path.basename(temp_file)}"
-
-        cp_proc = await asyncio.create_subprocess_exec(
-            "docker", "cp", temp_file, f"{container}:{exec_path}",
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE
-        )
-        await cp_proc.communicate()
-
-        if cp_proc.returncode != 0:
-            return {"error": "Failed to copy code to sandbox container"}
-
-        # Fix permissions so sandbox user can read the file (run as root)
-        chown_proc = await asyncio.create_subprocess_exec(
-            "docker", "exec", "-u", "root", container, "chown", "sandbox:sandbox", exec_path,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE
-        )
-        await chown_proc.communicate()
-
-        # Execute in container as sandbox user
-        if language == "python":
-            cmd = ["docker", "exec", "-u", "sandbox", container, "python3", exec_path]
-        else:  # bash
-            cmd = ["docker", "exec", "-u", "sandbox", container, "bash", exec_path]
-
-        start_time = asyncio.get_event_loop().time()
-
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE
-        )
-
-        try:
-            stdout, stderr = await asyncio.wait_for(
-                proc.communicate(),
-                timeout=timeout
-            )
-
-            execution_time = asyncio.get_event_loop().time() - start_time
-
-            # Truncate output to prevent memory issues (configurable)
-            max_output = int(os.getenv("CODE_SANDBOX_MAX_OUTPUT", "10240"))  # 10KB default
-            stdout_str = stdout[:max_output].decode('utf-8', errors='replace')
-            stderr_str = stderr[:max_output].decode('utf-8', errors='replace')
-
-            if len(stdout) > max_output:
-                stdout_str += f"\n... (output truncated, {len(stdout)} bytes total)"
-            if len(stderr) > max_output:
-                stderr_str += f"\n... (output truncated, {len(stderr)} bytes total)"
-
-            return {
-                "stdout": stdout_str,
-                "stderr": stderr_str,
-                "exit_code": proc.returncode,
-                "execution_time": round(execution_time, 2)
-            }
-
-        except asyncio.TimeoutError:
-            # Kill the process
-            try:
-                proc.kill()
-                await proc.wait()
-            except:
-                pass
-            return {"error": f"Execution timeout after {timeout}s"}
-
-    except APIError as e:
-        return {
-            "error": f"Docker API error: {e.explanation}",
-            "status_code": e.status_code
-        }
-    except ContainerError as e:
-        return {
-            "error": f"Container execution error: {str(e)}",
-            "exit_code": e.exit_status
-        }
-    except DockerException as e:
-        return {
-            "error": f"Docker error: {str(e)}",
-            "hint": "Check Docker daemon connectivity and permissions"
-        }
-    except Exception as e:
-        return {"error": f"Execution failed: {str(e)}"}
-
-    finally:
-        # Cleanup temporary file
-        try:
-            if 'temp_file' in locals():
-                os.unlink(temp_file)
-        except Exception as cleanup_error:
-            # Log but don't fail on cleanup errors
-            pass
-
-        # Optional: Clean up file from container (best effort)
-        try:
-            if 'exec_path' in locals() and 'container_obj' in locals():
-                container_obj.exec_run(
-                    f"rm -f {exec_path}",
-                    user="sandbox"
-                )
-        except:
-            pass  # Best effort cleanup
@@ -1,13 +0,0 @@
-"""Web search provider implementations."""
-
-from .base import SearchProvider, SearchResult, SearchResponse
-from .brave import BraveSearchProvider
-from .duckduckgo import DuckDuckGoProvider
-
-__all__ = [
-    "SearchProvider",
-    "SearchResult",
-    "SearchResponse",
-    "BraveSearchProvider",
-    "DuckDuckGoProvider",
-]
@@ -1,49 +0,0 @@
-"""Base interface for web search providers."""
-
-from abc import ABC, abstractmethod
-from typing import List, Optional
-from dataclasses import dataclass
-
-
-@dataclass
-class SearchResult:
-    """Standardized search result format."""
-    title: str
-    url: str
-    snippet: str
-    score: Optional[float] = None
-
-
-@dataclass
-class SearchResponse:
-    """Standardized search response."""
-    results: List[SearchResult]
-    count: int
-    provider: str
-    query: str
-    error: Optional[str] = None
-
-
-class SearchProvider(ABC):
-    """Abstract base class for search providers."""
-
-    @abstractmethod
-    async def search(
-        self,
-        query: str,
-        max_results: int = 5,
-        **kwargs
-    ) -> SearchResponse:
-        """Execute search and return standardized results."""
-        pass
-
-    @abstractmethod
-    async def health_check(self) -> bool:
-        """Check if provider is healthy and reachable."""
-        pass
-
-    @property
-    @abstractmethod
-    def name(self) -> str:
-        """Provider name."""
-        pass
@@ -1,123 +0,0 @@
-"""Brave Search API provider implementation."""
-
-import os
-import asyncio
-import aiohttp
-from .base import SearchProvider, SearchResponse, SearchResult
-from ..utils.resilience import async_retry
-
-
-class BraveSearchProvider(SearchProvider):
-    """Brave Search API implementation."""
-
-    def __init__(self):
-        self.api_key = os.getenv("BRAVE_SEARCH_API_KEY", "")
-        self.base_url = os.getenv(
-            "BRAVE_SEARCH_URL",
-            "https://api.search.brave.com/res/v1"
-        )
-        self.timeout = float(os.getenv("BRAVE_SEARCH_TIMEOUT", "10.0"))
-
-    @property
-    def name(self) -> str:
-        return "brave"
-
-    @async_retry(
-        max_attempts=3,
-        exceptions=(aiohttp.ClientError, asyncio.TimeoutError)
-    )
-    async def search(
-        self,
-        query: str,
-        max_results: int = 5,
-        **kwargs
-    ) -> SearchResponse:
-        """Execute Brave search with retry logic."""
-
-        if not self.api_key:
-            return SearchResponse(
-                results=[],
-                count=0,
-                provider=self.name,
-                query=query,
-                error="BRAVE_SEARCH_API_KEY not configured"
-            )
-
-        headers = {
-            "Accept": "application/json",
-            "X-Subscription-Token": self.api_key
-        }
-
-        params = {
-            "q": query,
-            "count": min(max_results, 20)  # Brave max is 20
-        }
-
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(
-                    f"{self.base_url}/web/search",
-                    headers=headers,
-                    params=params,
-                    timeout=aiohttp.ClientTimeout(total=self.timeout)
-                ) as resp:
-                    if resp.status == 200:
-                        data = await resp.json()
-                        results = []
-
-                        for item in data.get("web", {}).get("results", []):
-                            results.append(SearchResult(
-                                title=item.get("title", ""),
-                                url=item.get("url", ""),
-                                snippet=item.get("description", ""),
-                                score=item.get("score")
-                            ))
-
-                        return SearchResponse(
-                            results=results,
-                            count=len(results),
-                            provider=self.name,
-                            query=query
-                        )
-                    elif resp.status == 401:
-                        error = "Authentication failed. Check BRAVE_SEARCH_API_KEY"
-                    elif resp.status == 429:
-                        error = f"Rate limit exceeded. Status: {resp.status}"
-                    else:
-                        error_text = await resp.text()
-                        error = f"HTTP {resp.status}: {error_text}"
-
-                    return SearchResponse(
-                        results=[],
-                        count=0,
-                        provider=self.name,
-                        query=query,
-                        error=error
-                    )
-
-        except aiohttp.ClientConnectorError as e:
-            return SearchResponse(
-                results=[],
-                count=0,
-                provider=self.name,
-                query=query,
-                error=f"Cannot connect to Brave Search API: {str(e)}"
-            )
-        except asyncio.TimeoutError:
-            return SearchResponse(
-                results=[],
-                count=0,
-                provider=self.name,
-                query=query,
-                error=f"Search timeout after {self.timeout}s"
-            )
-
-    async def health_check(self) -> bool:
-        """Check if Brave API is reachable."""
-        if not self.api_key:
-            return False
-        try:
-            response = await self.search("test", max_results=1)
-            return response.error is None
-        except:
-            return False
@@ -1,60 +0,0 @@
-"""DuckDuckGo search provider with retry logic (legacy fallback)."""
-
-from duckduckgo_search import DDGS
-from .base import SearchProvider, SearchResponse, SearchResult
-from ..utils.resilience import async_retry
-
-
-class DuckDuckGoProvider(SearchProvider):
-    """DuckDuckGo search implementation with retry logic."""
-
-    @property
-    def name(self) -> str:
-        return "duckduckgo"
-
-    @async_retry(
-        max_attempts=3,
-        exceptions=(Exception,)  # DDG throws generic exceptions
-    )
-    async def search(
-        self,
-        query: str,
-        max_results: int = 5,
-        **kwargs
-    ) -> SearchResponse:
-        """Execute DuckDuckGo search with retry logic."""
-
-        try:
-            with DDGS() as ddgs:
-                results = []
-
-                for result in ddgs.text(query, max_results=max_results):
-                    results.append(SearchResult(
-                        title=result.get("title", ""),
-                        url=result.get("href", ""),
-                        snippet=result.get("body", "")
-                    ))
-
-                return SearchResponse(
-                    results=results,
-                    count=len(results),
-                    provider=self.name,
-                    query=query
-                )
-
-        except Exception as e:
-            return SearchResponse(
-                results=[],
-                count=0,
-                provider=self.name,
-                query=query,
-                error=f"Search failed: {str(e)}"
-            )
-
-    async def health_check(self) -> bool:
-        """Basic health check for DDG."""
-        try:
-            response = await self.search("test", max_results=1)
-            return response.error is None
-        except:
-            return False
@@ -1,216 +0,0 @@
-"""
-Trilium notes executor for searching and creating notes via ETAPI.
-
-This module provides integration with Trilium notes through the ETAPI HTTP API
-with improved resilience: timeout configuration, retry logic, and connection pooling.
-"""
-
-import os
-import asyncio
-import aiohttp
-from typing import Dict, Optional
-from ..utils.resilience import async_retry
-
-
-TRILIUM_URL = os.getenv("TRILIUM_URL", "http://localhost:8080")
-TRILIUM_TOKEN = os.getenv("TRILIUM_ETAPI_TOKEN", "")
-
-# Module-level session for connection pooling
-_session: Optional[aiohttp.ClientSession] = None
-
-
-def get_session() -> aiohttp.ClientSession:
-    """Get or create shared aiohttp session for connection pooling."""
-    global _session
-    if _session is None or _session.closed:
-        timeout = aiohttp.ClientTimeout(
-            total=float(os.getenv("TRILIUM_TIMEOUT", "30.0")),
-            connect=float(os.getenv("TRILIUM_CONNECT_TIMEOUT", "10.0"))
-        )
-        _session = aiohttp.ClientSession(timeout=timeout)
-    return _session
-
-
-@async_retry(
-    max_attempts=3,
-    exceptions=(aiohttp.ClientError, asyncio.TimeoutError)
-)
-async def search_notes(args: Dict) -> Dict:
-    """Search Trilium notes via ETAPI with retry logic.
-
-    Args:
-        args: Dictionary containing:
-            - query (str): Search query
-            - limit (int, optional): Maximum notes to return (default: 5, max: 20)
-
-    Returns:
-        dict: Search results containing:
-            - notes (list): List of notes with noteId, title, content, type
-            - count (int): Number of notes returned
-            OR
-            - error (str): Error message if search failed
-    """
-    query = args.get("query")
-    limit = args.get("limit", 5)
-
-    # Validation
-    if not query:
-        return {"error": "No query provided"}
-
-    if not TRILIUM_TOKEN:
-        return {
-            "error": "TRILIUM_ETAPI_TOKEN not configured in environment",
-            "hint": "Set TRILIUM_ETAPI_TOKEN in .env file"
-        }
-
-    # Cap limit
-    limit = min(max(limit, 1), 20)
-
-    try:
-        session = get_session()
-        async with session.get(
-            f"{TRILIUM_URL}/etapi/notes",
-            params={"search": query, "limit": limit},
-            headers={"Authorization": TRILIUM_TOKEN}
-        ) as resp:
-            if resp.status == 200:
-                data = await resp.json()
-                # ETAPI returns {"results": [...]} format
-                results = data.get("results", [])
-                return {
-                    "notes": results,
-                    "count": len(results)
-                }
-            elif resp.status == 401:
-                return {
-                    "error": "Authentication failed. Check TRILIUM_ETAPI_TOKEN",
-                    "status": 401
-                }
-            elif resp.status == 404:
-                return {
-                    "error": "Trilium API endpoint not found. Check TRILIUM_URL",
-                    "status": 404,
-                    "url": TRILIUM_URL
-                }
-            else:
-                error_text = await resp.text()
-                return {
-                    "error": f"HTTP {resp.status}: {error_text}",
-                    "status": resp.status
-                }
-
-    except aiohttp.ClientConnectorError as e:
-        return {
-            "error": f"Cannot connect to Trilium at {TRILIUM_URL}",
-            "hint": "Check if Trilium is running and URL is correct",
-            "details": str(e)
-        }
-    except asyncio.TimeoutError:
-        timeout = os.getenv("TRILIUM_TIMEOUT", "30.0")
-        return {
-            "error": f"Trilium request timeout after {timeout}s",
-            "hint": "Trilium may be slow or unresponsive"
-        }
-    except Exception as e:
-        return {
-            "error": f"Search failed: {str(e)}",
-            "type": type(e).__name__
-        }
-
-
-@async_retry(
-    max_attempts=3,
-    exceptions=(aiohttp.ClientError, asyncio.TimeoutError)
-)
-async def create_note(args: Dict) -> Dict:
-    """Create a note in Trilium via ETAPI with retry logic.
-
-    Args:
-        args: Dictionary containing:
-            - title (str): Note title
-            - content (str): Note content in markdown or HTML
-            - parent_note_id (str, optional): Parent note ID to nest under
-
-    Returns:
-        dict: Creation result containing:
-            - noteId (str): ID of created note
-            - title (str): Title of created note
-            - success (bool): True if created successfully
-            OR
-            - error (str): Error message if creation failed
-    """
-    title = args.get("title")
-    content = args.get("content")
-    parent_note_id = args.get("parent_note_id", "root")  # Default to root if not specified
-
-    # Validation
-    if not title:
-        return {"error": "No title provided"}
-
-    if not content:
-        return {"error": "No content provided"}
-
-    if not TRILIUM_TOKEN:
-        return {
-            "error": "TRILIUM_ETAPI_TOKEN not configured in environment",
-            "hint": "Set TRILIUM_ETAPI_TOKEN in .env file"
-        }
-
-    # Prepare payload
-    payload = {
-        "parentNoteId": parent_note_id,  # Always include parentNoteId
-        "title": title,
-        "content": content,
-        "type": "text",
-        "mime": "text/html"
-    }
-
-    try:
-        session = get_session()
-        async with session.post(
-            f"{TRILIUM_URL}/etapi/create-note",
-            json=payload,
-            headers={"Authorization": TRILIUM_TOKEN}
-        ) as resp:
-            if resp.status in [200, 201]:
-                data = await resp.json()
-                return {
-                    "noteId": data.get("noteId"),
-                    "title": title,
-                    "success": True
-                }
-            elif resp.status == 401:
-                return {
-                    "error": "Authentication failed. Check TRILIUM_ETAPI_TOKEN",
-                    "status": 401
-                }
-            elif resp.status == 404:
-                return {
-                    "error": "Trilium API endpoint not found. Check TRILIUM_URL",
-                    "status": 404,
-                    "url": TRILIUM_URL
-                }
-            else:
-                error_text = await resp.text()
-                return {
-                    "error": f"HTTP {resp.status}: {error_text}",
-                    "status": resp.status
-                }
-
-    except aiohttp.ClientConnectorError as e:
-        return {
-            "error": f"Cannot connect to Trilium at {TRILIUM_URL}",
-            "hint": "Check if Trilium is running and URL is correct",
-            "details": str(e)
-        }
-    except asyncio.TimeoutError:
-        timeout = os.getenv("TRILIUM_TIMEOUT", "30.0")
-        return {
-            "error": f"Trilium request timeout after {timeout}s",
-            "hint": "Trilium may be slow or unresponsive"
-        }
-    except Exception as e:
-        return {
-            "error": f"Note creation failed: {str(e)}",
-            "type": type(e).__name__
-        }
@@ -1,113 +0,0 @@
-"""
-Web search executor with pluggable provider support.
-
-Supports multiple providers with automatic fallback:
- Brave Search API (recommended, configurable)
- DuckDuckGo (legacy fallback)
-"""
-
-import os
-from typing import Dict, Optional
-from .search_providers.base import SearchProvider
-from .search_providers.brave import BraveSearchProvider
-from .search_providers.duckduckgo import DuckDuckGoProvider
-
-# Provider registry
-PROVIDERS = {
-    "brave": BraveSearchProvider,
-    "duckduckgo": DuckDuckGoProvider,
-}
-
-# Singleton provider instances
-_provider_instances: Dict[str, SearchProvider] = {}
-
-
-def get_provider(name: str) -> Optional[SearchProvider]:
-    """Get or create provider instance."""
-    if name not in _provider_instances:
-        provider_class = PROVIDERS.get(name)
-        if provider_class:
-            _provider_instances[name] = provider_class()
-    return _provider_instances.get(name)
-
-
-async def search_web(args: Dict) -> Dict:
-    """Search the web using configured provider with automatic fallback.
-
-    Args:
-        args: Dictionary containing:
-            - query (str): The search query
-            - max_results (int, optional): Maximum results to return (default: 5, max: 20)
-            - provider (str, optional): Force specific provider
-
-    Returns:
-        dict: Search results containing:
-            - results (list): List of search results with title, url, snippet
-            - count (int): Number of results returned
-            - provider (str): Provider that returned results
-            OR
-            - error (str): Error message if all providers failed
-    """
-    query = args.get("query")
-    max_results = args.get("max_results", 5)
-    forced_provider = args.get("provider")
-
-    # Validation
-    if not query:
-        return {"error": "No query provided"}
-
-    # Cap max_results
-    max_results = min(max(max_results, 1), 20)
-
-    # Get provider preference from environment
-    primary_provider = os.getenv("WEB_SEARCH_PROVIDER", "duckduckgo")
-    fallback_providers = os.getenv(
-        "WEB_SEARCH_FALLBACK",
-        "duckduckgo"
-    ).split(",")
-
-    # Build provider list
-    if forced_provider:
-        providers_to_try = [forced_provider]
-    else:
-        providers_to_try = [primary_provider] + [
-            p.strip() for p in fallback_providers if p.strip() != primary_provider
-        ]
-
-    # Try providers in order
-    last_error = None
-    for provider_name in providers_to_try:
-        provider = get_provider(provider_name)
-        if not provider:
-            last_error = f"Unknown provider: {provider_name}"
-            continue
-
-        try:
-            response = await provider.search(query, max_results)
-
-            # If successful, return results
-            if response.error is None and response.count > 0:
-                return {
-                    "results": [
-                        {
-                            "title": r.title,
-                            "url": r.url,
-                            "snippet": r.snippet,
-                        }
-                        for r in response.results
-                    ],
-                    "count": response.count,
-                    "provider": provider_name
-                }
-
-            last_error = response.error or "No results returned"
-
-        except Exception as e:
-            last_error = f"{provider_name} failed: {str(e)}"
-            continue
-
-    # All providers failed
-    return {
-        "error": f"All search providers failed. Last error: {last_error}",
-        "providers_tried": providers_to_try
-    }
@@ -1,235 +0,0 @@
-"""
-Provider-agnostic function caller with iterative tool calling loop.
-
-This module implements the iterative loop that allows LLMs to call tools
-multiple times until they have the information they need to answer the user.
-"""
-
-import os
-import logging
-from typing import Dict, List, Optional
-from llm.llm_router import call_llm, TOOL_ADAPTERS, BACKENDS
-from .registry import get_registry
-from .stream_events import get_stream_manager
-
-
-logger = logging.getLogger(__name__)
-
-
-class FunctionCaller:
-    """Provider-agnostic iterative tool calling loop.
-
-    This class orchestrates the back-and-forth between the LLM and tools:
-    1. Call LLM with tools available
-    2. If LLM requests tool calls, execute them
-    3. Add results to conversation
-    4. Repeat until LLM is done or max iterations reached
-    """
-
-    def __init__(self, backend: str, temperature: float = 0.7):
-        """Initialize function caller.
-
-        Args:
-            backend: LLM backend to use ("OPENAI", "OLLAMA", etc.)
-            temperature: Temperature for LLM calls
-        """
-        self.backend = backend
-        self.temperature = temperature
-        self.registry = get_registry()
-        self.max_iterations = int(os.getenv("MAX_TOOL_ITERATIONS", "5"))
-
-        # Resolve adapter for this backend
-        self.adapter = self._get_adapter()
-
-    def _get_adapter(self):
-        """Get the appropriate adapter for this backend."""
-        adapter = TOOL_ADAPTERS.get(self.backend)
-
-        # For PRIMARY/SECONDARY/FALLBACK, determine adapter based on provider
-        if adapter is None and self.backend in ["PRIMARY", "SECONDARY", "FALLBACK"]:
-            cfg = BACKENDS.get(self.backend, {})
-            provider = cfg.get("provider", "").lower()
-
-            if provider == "openai":
-                adapter = TOOL_ADAPTERS["OPENAI"]
-            elif provider == "ollama":
-                adapter = TOOL_ADAPTERS["OLLAMA"]
-            elif provider == "mi50":
-                adapter = TOOL_ADAPTERS["MI50"]
-
-        return adapter
-
-    async def call_with_tools(
-        self,
-        messages: List[Dict],
-        max_tokens: int = 2048,
-        session_id: Optional[str] = None
-    ) -> Dict:
-        """Execute LLM with iterative tool calling.
-
-        Args:
-            messages: Conversation history
-            max_tokens: Maximum tokens for LLM response
-            session_id: Optional session ID for streaming events
-
-        Returns:
-            dict: {
-                "content": str,  # Final response
-                "iterations": int,  # Number of iterations
-                "tool_calls": list,  # All tool calls made
-                "messages": list,  # Full conversation history
-                "truncated": bool (optional)  # True if max iterations reached
-            }
-        """
-        logger.info(f"🔍 FunctionCaller.call_with_tools() invoked with {len(messages)} messages")
-        tools = self.registry.get_tool_definitions()
-        logger.info(f"🔍 Got {len(tools or [])} tool definitions from registry")
-
-        # Get stream manager for emitting events
-        stream_manager = get_stream_manager()
-        should_stream = session_id and stream_manager.has_subscribers(session_id)
-
-        # If no tools are enabled, just call LLM directly
-        if not tools:
-            logger.warning("FunctionCaller invoked but no tools are enabled")
-            response = await call_llm(
-                messages=messages,
-                backend=self.backend,
-                temperature=self.temperature,
-                max_tokens=max_tokens
-            )
-            return {
-                "content": response,
-                "iterations": 1,
-                "tool_calls": [],
-                "messages": messages + [{"role": "assistant", "content": response}]
-            }
-
-        conversation = messages.copy()
-        all_tool_calls = []
-
-        for iteration in range(self.max_iterations):
-            logger.info(f"Tool calling iteration {iteration + 1}/{self.max_iterations}")
-
-            # Emit thinking event
-            if should_stream:
-                await stream_manager.emit(session_id, "thinking", {
-                    "message": f"🤔 Thinking... (iteration {iteration + 1}/{self.max_iterations})"
-                })
-
-            # Call LLM with tools
-            try:
-                response = await call_llm(
-                    messages=conversation,
-                    backend=self.backend,
-                    temperature=self.temperature,
-                    max_tokens=max_tokens,
-                    tools=tools,
-                    tool_choice="auto",
-                    return_adapter_response=True
-                )
-            except Exception as e:
-                logger.error(f"LLM call failed: {str(e)}")
-                if should_stream:
-                    await stream_manager.emit(session_id, "error", {
-                        "message": f"❌ Error: {str(e)}"
-                    })
-                return {
-                    "content": f"Error calling LLM: {str(e)}",
-                    "iterations": iteration + 1,
-                    "tool_calls": all_tool_calls,
-                    "messages": conversation,
-                    "error": True
-                }
-
-            # Add assistant message to conversation
-            if response.get("content"):
-                conversation.append({
-                    "role": "assistant",
-                    "content": response["content"]
-                })
-
-            # Check for tool calls
-            tool_calls = response.get("tool_calls")
-            logger.debug(f"Response from LLM: content_length={len(response.get('content', ''))}, tool_calls={tool_calls}")
-            if not tool_calls:
-                # No more tool calls - LLM is done
-                logger.info(f"Tool calling complete after {iteration + 1} iterations")
-                if should_stream:
-                    await stream_manager.emit(session_id, "done", {
-                        "message": "✅ Complete!",
-                        "final_answer": response["content"]
-                    })
-                return {
-                    "content": response["content"],
-                    "iterations": iteration + 1,
-                    "tool_calls": all_tool_calls,
-                    "messages": conversation
-                }
-
-            # Execute each tool call
-            logger.info(f"Executing {len(tool_calls)} tool call(s)")
-            for tool_call in tool_calls:
-                all_tool_calls.append(tool_call)
-
-                tool_name = tool_call.get("name")
-                tool_args = tool_call.get("arguments", {})
-                tool_id = tool_call.get("id", "unknown")
-
-                logger.info(f"Calling tool: {tool_name} with args: {tool_args}")
-
-                # Emit tool call event
-                if should_stream:
-                    await stream_manager.emit(session_id, "tool_call", {
-                        "tool": tool_name,
-                        "args": tool_args,
-                        "message": f"🔧 Using tool: {tool_name}"
-                    })
-
-                try:
-                    # Execute tool
-                    result = await self.registry.execute_tool(tool_name, tool_args)
-                    logger.info(f"Tool {tool_name} executed successfully")
-
-                    # Emit tool result event
-                    if should_stream:
-                        # Format result preview
-                        result_preview = str(result)
-                        if len(result_preview) > 200:
-                            result_preview = result_preview[:200] + "..."
-
-                        await stream_manager.emit(session_id, "tool_result", {
-                            "tool": tool_name,
-                            "result": result,
-                            "message": f"📊 Result: {result_preview}"
-                        })
-
-                except Exception as e:
-                    logger.error(f"Tool {tool_name} execution failed: {str(e)}")
-                    result = {"error": f"Tool execution failed: {str(e)}"}
-
-                # Format result using adapter
-                if not self.adapter:
-                    logger.warning(f"No adapter available for backend {self.backend}, using fallback format")
-                    result_msg = {
-                        "role": "user",
-                        "content": f"Tool {tool_name} result: {result}"
-                    }
-                else:
-                    result_msg = self.adapter.format_tool_result(
-                        tool_id,
-                        tool_name,
-                        result
-                    )
-
-                conversation.append(result_msg)
-
-        # Max iterations reached without completion
-        logger.warning(f"Tool calling truncated after {self.max_iterations} iterations")
-        return {
-            "content": response.get("content", ""),
-            "iterations": self.max_iterations,
-            "tool_calls": all_tool_calls,
-            "messages": conversation,
-            "truncated": True
-        }
@@ -1,357 +0,0 @@
-"""
-Tool Orchestrator - executes autonomous tool invocations asynchronously.
-"""
-
-import asyncio
-import logging
-from typing import Dict, List, Any, Optional
-import os
-
-logger = logging.getLogger(__name__)
-
-
-class ToolOrchestrator:
-    """Orchestrates async tool execution and result aggregation."""
-
-    def __init__(self, tool_timeout: int = 30):
-        """
-        Initialize orchestrator.
-
-        Args:
-            tool_timeout: Max seconds per tool call (default 30)
-        """
-        self.tool_timeout = tool_timeout
-        self.available_tools = self._discover_tools()
-
-    def _discover_tools(self) -> Dict[str, Any]:
-        """Discover available tool modules."""
-        tools = {}
-
-        # Import tool modules as they become available
-        if os.getenv("NEOMEM_ENABLED", "false").lower() == "true":
-            try:
-                from memory.neomem_client import search_neomem
-                tools["RAG"] = search_neomem
-                logger.debug("[ORCHESTRATOR] RAG tool available")
-            except ImportError:
-                logger.debug("[ORCHESTRATOR] RAG tool not available")
-        else:
-            logger.info("[ORCHESTRATOR] NEOMEM_ENABLED is false; RAG tool disabled")
-
-        try:
-            from integrations.web_search import web_search
-            tools["WEB"] = web_search
-            logger.debug("[ORCHESTRATOR] WEB tool available")
-        except ImportError:
-            logger.debug("[ORCHESTRATOR] WEB tool not available")
-
-        try:
-            from integrations.weather import get_weather
-            tools["WEATHER"] = get_weather
-            logger.debug("[ORCHESTRATOR] WEATHER tool available")
-        except ImportError:
-            logger.debug("[ORCHESTRATOR] WEATHER tool not available")
-
-        try:
-            from integrations.codebrain import query_codebrain
-            tools["CODEBRAIN"] = query_codebrain
-            logger.debug("[ORCHESTRATOR] CODEBRAIN tool available")
-        except ImportError:
-            logger.debug("[ORCHESTRATOR] CODEBRAIN tool not available")
-
-        return tools
-
-    async def execute_tools(
-        self,
-        tools_to_invoke: List[Dict[str, Any]],
-        context_state: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Execute multiple tools asynchronously.
-
-        Args:
-            tools_to_invoke: List of tool specs from decision engine
-                [{"tool": "RAG", "query": "...", "reason": "...", "priority": 0.9}, ...]
-            context_state: Full context for tool execution
-
-        Returns:
-            {
-                "results": {
-                    "RAG": {...},
-                    "WEB": {...},
-                    ...
-                },
-                "execution_summary": {
-                    "tools_invoked": ["RAG", "WEB"],
-                    "successful": ["RAG"],
-                    "failed": ["WEB"],
-                    "total_time_ms": 1234
-                }
-            }
-        """
-        import time
-        start_time = time.time()
-
-        logger.info(f"[ORCHESTRATOR] Executing {len(tools_to_invoke)} tools asynchronously")
-
-        # Create tasks for each tool
-        tasks = []
-        tool_names = []
-
-        for tool_spec in tools_to_invoke:
-            tool_name = tool_spec["tool"]
-            query = tool_spec["query"]
-
-            if tool_name in self.available_tools:
-                task = self._execute_single_tool(tool_name, query, context_state)
-                tasks.append(task)
-                tool_names.append(tool_name)
-                logger.debug(f"[ORCHESTRATOR] Queued {tool_name}: {query[:50]}...")
-            else:
-                logger.warning(f"[ORCHESTRATOR] Tool {tool_name} not available, skipping")
-
-        # Execute all tools concurrently with timeout
-        results = {}
-        successful = []
-        failed = []
-
-        if tasks:
-            try:
-                # Wait for all tasks with global timeout
-                completed = await asyncio.wait_for(
-                    asyncio.gather(*tasks, return_exceptions=True),
-                    timeout=self.tool_timeout
-                )
-
-                # Process results
-                for tool_name, result in zip(tool_names, completed):
-                    if isinstance(result, Exception):
-                        logger.error(f"[ORCHESTRATOR] {tool_name} failed: {result}")
-                        results[tool_name] = {"error": str(result), "success": False}
-                        failed.append(tool_name)
-                    else:
-                        logger.info(f"[ORCHESTRATOR] {tool_name} completed successfully")
-                        results[tool_name] = result
-                        successful.append(tool_name)
-
-            except asyncio.TimeoutError:
-                logger.error(f"[ORCHESTRATOR] Global timeout ({self.tool_timeout}s) exceeded")
-                for tool_name in tool_names:
-                    if tool_name not in results:
-                        results[tool_name] = {"error": "timeout", "success": False}
-                        failed.append(tool_name)
-
-        end_time = time.time()
-        total_time_ms = int((end_time - start_time) * 1000)
-
-        execution_summary = {
-            "tools_invoked": tool_names,
-            "successful": successful,
-            "failed": failed,
-            "total_time_ms": total_time_ms
-        }
-
-        logger.info(f"[ORCHESTRATOR] Execution complete: {len(successful)}/{len(tool_names)} successful in {total_time_ms}ms")
-
-        return {
-            "results": results,
-            "execution_summary": execution_summary
-        }
-
-    async def _execute_single_tool(
-        self,
-        tool_name: str,
-        query: str,
-        context_state: Dict[str, Any]
-    ) -> Dict[str, Any]:
-        """
-        Execute a single tool with error handling.
-
-        Args:
-            tool_name: Name of tool (RAG, WEB, etc.)
-            query: Query string for the tool
-            context_state: Context for tool execution
-
-        Returns:
-            Tool-specific result dict
-        """
-        tool_func = self.available_tools.get(tool_name)
-        if not tool_func:
-            raise ValueError(f"Tool {tool_name} not available")
-
-        try:
-            logger.debug(f"[ORCHESTRATOR] Invoking {tool_name}...")
-
-            # Different tools have different signatures - adapt as needed
-            if tool_name == "RAG":
-                result = await self._invoke_rag(tool_func, query, context_state)
-            elif tool_name == "WEB":
-                result = await self._invoke_web(tool_func, query)
-            elif tool_name == "WEATHER":
-                result = await self._invoke_weather(tool_func, query)
-            elif tool_name == "CODEBRAIN":
-                result = await self._invoke_codebrain(tool_func, query, context_state)
-            else:
-                # Generic invocation
-                result = await tool_func(query)
-
-            return {
-                "success": True,
-                "tool": tool_name,
-                "query": query,
-                "data": result
-            }
-
-        except Exception as e:
-            logger.error(f"[ORCHESTRATOR] {tool_name} execution failed: {e}")
-            raise
-
-    async def _invoke_rag(self, func, query: str, context: Dict[str, Any]) -> Any:
-        """Invoke RAG tool (NeoMem search)."""
-        session_id = context.get("session_id", "unknown")
-        # RAG searches memory for relevant past interactions
-        try:
-            results = await func(query, limit=5, session_id=session_id)
-            return results
-        except Exception as e:
-            logger.warning(f"[ORCHESTRATOR] RAG invocation failed, returning empty: {e}")
-            return []
-
-    async def _invoke_web(self, func, query: str) -> Any:
-        """Invoke web search tool."""
-        try:
-            results = await func(query, max_results=5)
-            return results
-        except Exception as e:
-            logger.warning(f"[ORCHESTRATOR] WEB invocation failed: {e}")
-            return {"error": str(e), "results": []}
-
-    async def _invoke_weather(self, func, query: str) -> Any:
-        """Invoke weather tool."""
-        # Extract location from query (simple heuristic)
-        # In future: use LLM to extract location
-        try:
-            location = self._extract_location(query)
-            results = await func(location)
-            return results
-        except Exception as e:
-            logger.warning(f"[ORCHESTRATOR] WEATHER invocation failed: {e}")
-            return {"error": str(e)}
-
-    async def _invoke_codebrain(self, func, query: str, context: Dict[str, Any]) -> Any:
-        """Invoke codebrain tool."""
-        try:
-            results = await func(query, context=context)
-            return results
-        except Exception as e:
-            logger.warning(f"[ORCHESTRATOR] CODEBRAIN invocation failed: {e}")
-            return {"error": str(e)}
-
-    def _extract_location(self, query: str) -> str:
-        """
-        Extract location from weather query.
-        Simple heuristic - in future use LLM.
-        """
-        # Common location indicators
-        indicators = ["in ", "at ", "for ", "weather in ", "temperature in "]
-
-        query_lower = query.lower()
-        for indicator in indicators:
-            if indicator in query_lower:
-                # Get text after indicator
-                parts = query_lower.split(indicator, 1)
-                if len(parts) > 1:
-                    location = parts[1].strip().split()[0]  # First word after indicator
-                    return location
-
-        # Default fallback
-        return "current location"
-
-    def format_results_for_context(self, orchestrator_result: Dict[str, Any]) -> str:
-        """
-        Format tool results for inclusion in context/prompt.
-
-        Args:
-            orchestrator_result: Output from execute_tools()
-
-        Returns:
-            Formatted string for prompt injection
-        """
-        results = orchestrator_result.get("results", {})
-        summary = orchestrator_result.get("execution_summary", {})
-
-        if not results:
-            return ""
-
-        formatted = "\n=== AUTONOMOUS TOOL RESULTS ===\n"
-
-        for tool_name, tool_result in results.items():
-            if tool_result.get("success", False):
-                formatted += f"\n[{tool_name}]\n"
-                data = tool_result.get("data", {})
-
-                # Format based on tool type
-                if tool_name == "RAG":
-                    formatted += self._format_rag_results(data)
-                elif tool_name == "WEB":
-                    formatted += self._format_web_results(data)
-                elif tool_name == "WEATHER":
-                    formatted += self._format_weather_results(data)
-                elif tool_name == "CODEBRAIN":
-                    formatted += self._format_codebrain_results(data)
-                else:
-                    formatted += f"{data}\n"
-            else:
-                formatted += f"\n[{tool_name}] - Failed: {tool_result.get('error', 'unknown')}\n"
-
-        formatted += f"\n(Tools executed in {summary.get('total_time_ms', 0)}ms)\n"
-        formatted += "=" * 40 + "\n"
-
-        return formatted
-
-    def _format_rag_results(self, data: Any) -> str:
-        """Format RAG/memory search results."""
-        if not data:
-            return "No relevant memories found.\n"
-
-        formatted = "Relevant memories:\n"
-        for i, item in enumerate(data[:3], 1):  # Top 3
-            text = item.get("text", item.get("content", str(item)))
-            formatted += f"  {i}. {text[:100]}...\n"
-        return formatted
-
-    def _format_web_results(self, data: Any) -> str:
-        """Format web search results."""
-        if isinstance(data, dict) and data.get("error"):
-            return f"Web search failed: {data['error']}\n"
-
-        results = data.get("results", []) if isinstance(data, dict) else data
-        if not results:
-            return "No web results found.\n"
-
-        formatted = "Web search results:\n"
-        for i, item in enumerate(results[:3], 1):  # Top 3
-            title = item.get("title", "No title")
-            snippet = item.get("snippet", item.get("description", ""))
-            formatted += f"  {i}. {title}\n     {snippet[:100]}...\n"
-        return formatted
-
-    def _format_weather_results(self, data: Any) -> str:
-        """Format weather results."""
-        if isinstance(data, dict) and data.get("error"):
-            return f"Weather lookup failed: {data['error']}\n"
-
-        # Assuming weather API returns temp, conditions, etc.
-        temp = data.get("temperature", "unknown")
-        conditions = data.get("conditions", "unknown")
-        location = data.get("location", "requested location")
-
-        return f"Weather for {location}: {temp}, {conditions}\n"
-
-    def _format_codebrain_results(self, data: Any) -> str:
-        """Format codebrain results."""
-        if isinstance(data, dict) and data.get("error"):
-            return f"Codebrain failed: {data['error']}\n"
-
-        # Format code-related results
-        return f"{data}\n"
@@ -1,196 +0,0 @@
-"""
-Provider-agnostic Tool Registry for Lyra.
-
-This module provides a central registry for all available tools with
-Lyra-native definitions (not provider-specific).
-"""
-
-import os
-from typing import Dict, List, Optional
-from .executors import execute_code, search_web, search_notes, create_note
-
-
-class ToolRegistry:
-    """Registry for managing available tools and their definitions.
-
-    Tools are defined in Lyra's own format (provider-agnostic), and
-    adapters convert them to provider-specific formats (OpenAI function
-    calling, Ollama XML prompts, etc.).
-    """
-
-    def __init__(self):
-        """Initialize the tool registry with feature flags from environment."""
-        self.tools = {}
-        self.executors = {}
-
-        # Feature flags from environment
-        self.code_execution_enabled = os.getenv("ENABLE_CODE_EXECUTION", "true").lower() == "true"
-        self.web_search_enabled = os.getenv("ENABLE_WEB_SEARCH", "true").lower() == "true"
-        self.trilium_enabled = os.getenv("ENABLE_TRILIUM", "false").lower() == "true"
-
-        self._register_tools()
-        self._register_executors()
-
-    def _register_executors(self):
-        """Register executor functions for each tool."""
-        if self.code_execution_enabled:
-            self.executors["execute_code"] = execute_code
-
-        if self.web_search_enabled:
-            self.executors["search_web"] = search_web
-
-        if self.trilium_enabled:
-            self.executors["search_notes"] = search_notes
-            self.executors["create_note"] = create_note
-
-    def _register_tools(self):
-        """Register all available tools based on feature flags."""
-
-        if self.code_execution_enabled:
-            self.tools["execute_code"] = {
-                "name": "execute_code",
-                "description": "Execute Python or bash code in a secure sandbox environment. Use this to perform calculations, data processing, file operations, or any programmatic tasks. The sandbox is persistent across calls within a session and has common Python packages (numpy, pandas, requests, matplotlib, scipy) pre-installed.",
-                "parameters": {
-                    "language": {
-                        "type": "string",
-                        "enum": ["python", "bash"],
-                        "description": "The programming language to execute (python or bash)"
-                    },
-                    "code": {
-                        "type": "string",
-                        "description": "The code to execute. For multi-line code, use proper indentation. For Python, use standard Python 3.11 syntax."
-                    },
-                    "reason": {
-                        "type": "string",
-                        "description": "Brief explanation of why you're executing this code and what you expect to achieve"
-                    }
-                },
-                "required": ["language", "code", "reason"]
-            }
-
-        if self.web_search_enabled:
-            self.tools["search_web"] = {
-                "name": "search_web",
-                "description": "Search the internet using DuckDuckGo to find current information, facts, news, or answers to questions. Returns a list of search results with titles, snippets, and URLs. Use this when you need up-to-date information or facts not in your training data.",
-                "parameters": {
-                    "query": {
-                        "type": "string",
-                        "description": "The search query to look up on the internet"
-                    },
-                    "max_results": {
-                        "type": "integer",
-                        "description": "Maximum number of results to return (default: 5, max: 10)"
-                    }
-                },
-                "required": ["query"]
-            }
-
-        if self.trilium_enabled:
-            self.tools["search_notes"] = {
-                "name": "search_notes",
-                "description": "Search through Trilium notes to find relevant information. Use this to retrieve knowledge, context, or information previously stored in the user's notes.",
-                "parameters": {
-                    "query": {
-                        "type": "string",
-                        "description": "The search query to find matching notes"
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "Maximum number of notes to return (default: 5, max: 20)"
-                    }
-                },
-                "required": ["query"]
-            }
-
-            self.tools["create_note"] = {
-                "name": "create_note",
-                "description": "Create a new note in Trilium. Use this to store important information, insights, or knowledge for future reference. Notes are stored in the user's Trilium knowledge base.",
-                "parameters": {
-                    "title": {
-                        "type": "string",
-                        "description": "The title of the note"
-                    },
-                    "content": {
-                        "type": "string",
-                        "description": "The content of the note in markdown or HTML format"
-                    },
-                    "parent_note_id": {
-                        "type": "string",
-                        "description": "Optional ID of the parent note to nest this note under"
-                    }
-                },
-                "required": ["title", "content"]
-            }
-
-    def get_tool_definitions(self) -> Optional[List[Dict]]:
-        """Get list of all enabled tool definitions in Lyra format.
-
-        Returns:
-            list: List of tool definition dicts, or None if no tools enabled
-        """
-        if not self.tools:
-            return None
-        return list(self.tools.values())
-
-    def get_tool_names(self) -> List[str]:
-        """Get list of all enabled tool names.
-
-        Returns:
-            list: List of tool name strings
-        """
-        return list(self.tools.keys())
-
-    def is_tool_enabled(self, tool_name: str) -> bool:
-        """Check if a specific tool is enabled.
-
-        Args:
-            tool_name: Name of the tool to check
-
-        Returns:
-            bool: True if tool is enabled, False otherwise
-        """
-        return tool_name in self.tools
-
-    def register_executor(self, tool_name: str, executor_func):
-        """Register an executor function for a tool.
-
-        Args:
-            tool_name: Name of the tool
-            executor_func: Async function that executes the tool
-        """
-        self.executors[tool_name] = executor_func
-
-    async def execute_tool(self, name: str, arguments: dict) -> dict:
-        """Execute a tool by name.
-
-        Args:
-            name: Tool name
-            arguments: Tool arguments dict
-
-        Returns:
-            dict: Tool execution result
-        """
-        if name not in self.executors:
-            return {"error": f"Unknown tool: {name}"}
-
-        executor = self.executors[name]
-        try:
-            return await executor(arguments)
-        except Exception as e:
-            return {"error": f"Tool execution failed: {str(e)}"}
-
-
-# Global registry instance (singleton pattern)
-_registry = None
-
-
-def get_registry() -> ToolRegistry:
-    """Get the global ToolRegistry instance.
-
-    Returns:
-        ToolRegistry: The global registry instance
-    """
-    global _registry
-    if _registry is None:
-        _registry = ToolRegistry()
-    return _registry
@@ -1,91 +0,0 @@
-"""
-Event streaming for tool calling "show your work" feature.
-
-This module manages Server-Sent Events (SSE) for broadcasting the internal
-thinking process during tool calling operations.
-"""
-
-import asyncio
-from typing import Dict, Optional
-from collections import defaultdict
-import json
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class ToolStreamManager:
-    """Manages SSE streams for tool calling events."""
-
-    def __init__(self):
-        # session_id -> list of queues (one per connected client)
-        self._subscribers: Dict[str, list] = defaultdict(list)
-
-    def subscribe(self, session_id: str) -> asyncio.Queue:
-        """Subscribe to events for a session.
-
-        Returns:
-            Queue that will receive events for this session
-        """
-        queue = asyncio.Queue()
-        self._subscribers[session_id].append(queue)
-        logger.info(f"New subscriber for session {session_id}, total: {len(self._subscribers[session_id])}")
-        return queue
-
-    def unsubscribe(self, session_id: str, queue: asyncio.Queue):
-        """Unsubscribe from events for a session."""
-        if session_id in self._subscribers:
-            try:
-                self._subscribers[session_id].remove(queue)
-                logger.info(f"Removed subscriber for session {session_id}, remaining: {len(self._subscribers[session_id])}")
-
-                # Clean up empty lists
-                if not self._subscribers[session_id]:
-                    del self._subscribers[session_id]
-            except ValueError:
-                pass
-
-    async def emit(self, session_id: str, event_type: str, data: dict):
-        """Emit an event to all subscribers of a session.
-
-        Args:
-            session_id: Session to emit to
-            event_type: Type of event (thinking, tool_call, tool_result, done)
-            data: Event data
-        """
-        if session_id not in self._subscribers:
-            return
-
-        event = {
-            "type": event_type,
-            "data": data
-        }
-
-        # Send to all subscribers
-        dead_queues = []
-        for queue in self._subscribers[session_id]:
-            try:
-                await queue.put(event)
-            except Exception as e:
-                logger.error(f"Failed to emit event to queue: {e}")
-                dead_queues.append(queue)
-
-        # Clean up dead queues
-        for queue in dead_queues:
-            self.unsubscribe(session_id, queue)
-
-    def has_subscribers(self, session_id: str) -> bool:
-        """Check if a session has any active subscribers."""
-        return session_id in self._subscribers and len(self._subscribers[session_id]) > 0
-
-
-# Global stream manager instance
-_stream_manager: Optional[ToolStreamManager] = None
-
-
-def get_stream_manager() -> ToolStreamManager:
-    """Get the global stream manager instance."""
-    global _stream_manager
-    if _stream_manager is None:
-        _stream_manager = ToolStreamManager()
-    return _stream_manager
@@ -1,5 +0,0 @@
-"""Utility modules for tool executors."""
-
-from .resilience import async_retry, async_timeout_wrapper
-
-__all__ = ["async_retry", "async_timeout_wrapper"]
@@ -1,70 +0,0 @@
-"""Common resilience utilities for tool executors."""
-
-import asyncio
-import functools
-import logging
-from typing import Optional, Callable, Any, TypeVar
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_exponential,
-    retry_if_exception_type,
-    before_sleep_log
-)
-
-logger = logging.getLogger(__name__)
-
-# Type variable for generic decorators
-T = TypeVar('T')
-
-
-def async_retry(
-    max_attempts: int = 3,
-    exceptions: tuple = (Exception,),
-    **kwargs
-):
-    """Async retry decorator with exponential backoff.
-
-    Args:
-        max_attempts: Maximum retry attempts
-        exceptions: Exception types to retry on
-        **kwargs: Additional tenacity configuration
-
-    Example:
-        @async_retry(max_attempts=3, exceptions=(aiohttp.ClientError,))
-        async def fetch_data():
-            ...
-    """
-    return retry(
-        stop=stop_after_attempt(max_attempts),
-        wait=wait_exponential(multiplier=1, min=1, max=10),
-        retry=retry_if_exception_type(exceptions),
-        reraise=True,
-        before_sleep=before_sleep_log(logger, logging.WARNING),
-        **kwargs
-    )
-
-
-async def async_timeout_wrapper(
-    coro: Callable[..., T],
-    timeout: float,
-    *args,
-    **kwargs
-) -> T:
-    """Wrap async function with timeout.
-
-    Args:
-        coro: Async function to wrap
-        timeout: Timeout in seconds
-        *args, **kwargs: Arguments for the function
-
-    Returns:
-        Result from the function
-
-    Raises:
-        asyncio.TimeoutError: If timeout exceeded
-
-    Example:
-        result = await async_timeout_wrapper(some_async_func, 5.0, arg1, arg2)
-    """
-    return await asyncio.wait_for(coro(*args, **kwargs), timeout=timeout)
@@ -1,553 +0,0 @@
-# context.py
-"""
-Context layer for Cortex reasoning pipeline.
-
-Provides unified context collection from:
- Intake (short-term memory, multilevel summaries L1-L30)
- NeoMem (long-term memory, semantic search)
- Session state (timestamps, messages, mode, mood, active_project)
-
-Maintains per-session state for continuity across conversations.
-"""
-
-import os
-import logging
-from datetime import datetime
-from typing import Dict, Any, Optional, List
-import httpx
-from intake.intake import summarize_context
-
-
-from neomem_client import NeoMemClient
-
-# -----------------------------
-# Configuration
-# -----------------------------
-NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
-NEOMEM_ENABLED = os.getenv("NEOMEM_ENABLED", "false").lower() == "true"
-RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
-LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
-
-# Loop detection settings
-MAX_MESSAGE_HISTORY = int(os.getenv("MAX_MESSAGE_HISTORY", "100"))  # Prevent unbounded growth
-SESSION_TTL_HOURS = int(os.getenv("SESSION_TTL_HOURS", "24"))  # Auto-expire old sessions
-ENABLE_DUPLICATE_DETECTION = os.getenv("ENABLE_DUPLICATE_DETECTION", "true").lower() == "true"
-
-# Tools available for future autonomy features
-TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
-
-# -----------------------------
-# Module-level session state
-# -----------------------------
-SESSION_STATE: Dict[str, Dict[str, Any]] = {}
-
-# Logger
-logger = logging.getLogger(__name__)
-
-# Always set up basic logging
-logger.setLevel(logging.INFO)
-console_handler = logging.StreamHandler()
-console_handler.setFormatter(logging.Formatter(
-    '%(asctime)s [CONTEXT] %(levelname)s: %(message)s',
-    datefmt='%H:%M:%S'
-))
-logger.addHandler(console_handler)
-
-
-# -----------------------------
-# Session initialization & cleanup
-# -----------------------------
-def _init_session(session_id: str) -> Dict[str, Any]:
-    """
-    Initialize a new session state entry.
-
-    Returns:
-        Dictionary with default session state fields
-    """
-    return {
-        "session_id": session_id,
-        "created_at": datetime.now(),
-        "last_timestamp": datetime.now(),
-        "last_user_message": None,
-        "last_assistant_message": None,
-        "mode": "default",  # Future: "autonomous", "focused", "creative", etc.
-        "mood": "neutral",  # Future: mood tracking
-        "active_project": None,  # Future: project context
-        "message_count": 0,
-        "message_history": [],
-        "last_message_hash": None,  # For duplicate detection
-    }
-
-
-def _cleanup_expired_sessions():
-    """Remove sessions that haven't been active for SESSION_TTL_HOURS"""
-    from datetime import timedelta
-
-    now = datetime.now()
-    expired_sessions = []
-
-    for session_id, state in SESSION_STATE.items():
-        last_active = state.get("last_timestamp", state.get("created_at"))
-        time_since_active = (now - last_active).total_seconds() / 3600  # hours
-
-        if time_since_active > SESSION_TTL_HOURS:
-            expired_sessions.append(session_id)
-
-    for session_id in expired_sessions:
-        del SESSION_STATE[session_id]
-        logger.info(f"🗑️  Expired session: {session_id} (inactive for {SESSION_TTL_HOURS}+ hours)")
-
-    return len(expired_sessions)
-
-
-def _is_duplicate_message(session_id: str, user_prompt: str) -> bool:
-    """
-    Check if this message is a duplicate of the last processed message.
-
-    Uses simple hash comparison to detect exact duplicates or processing loops.
-    """
-    if not ENABLE_DUPLICATE_DETECTION:
-        return False
-
-    import hashlib
-
-    state = SESSION_STATE.get(session_id)
-    if not state:
-        return False
-
-    # Create hash of normalized message
-    message_hash = hashlib.md5(user_prompt.strip().lower().encode()).hexdigest()
-
-    # Check if it matches the last message
-    if state.get("last_message_hash") == message_hash:
-        logger.warning(
-            f"⚠️  DUPLICATE MESSAGE DETECTED | Session: {session_id} | "
-            f"Message: {user_prompt[:80]}..."
-        )
-        return True
-
-    # Update hash for next check
-    state["last_message_hash"] = message_hash
-    return False
-
-
-def _trim_message_history(state: Dict[str, Any]):
-    """
-    Trim message history to prevent unbounded growth.
-
-    Keeps only the most recent MAX_MESSAGE_HISTORY messages.
-    """
-    history = state.get("message_history", [])
-
-    if len(history) > MAX_MESSAGE_HISTORY:
-        trimmed_count = len(history) - MAX_MESSAGE_HISTORY
-        state["message_history"] = history[-MAX_MESSAGE_HISTORY:]
-        logger.info(f"✂️  Trimmed {trimmed_count} old messages from session {state['session_id']}")
-
-
-# -----------------------------
-# Intake context retrieval
-# -----------------------------
-async def _get_intake_context(session_id: str, messages: List[Dict[str, str]]):
-    """
-    Internal Intake — Direct call to summarize_context()
-    No HTTP, no containers, no failures.
-    """
-    try:
-        return await summarize_context(session_id, messages)
-    except Exception as e:
-        logger.error(f"Internal Intake summarization failed: {e}")
-        return {
-            "session_id": session_id,
-            "L1": "",
-            "L5": "",
-            "L10": "",
-            "L20": "",
-            "L30": "",
-            "error": str(e)
-        }
-
-
-
-# -----------------------------
-# NeoMem semantic search
-# -----------------------------
-async def _search_neomem(
-    query: str,
-    user_id: str = "brian",
-    limit: int = 5
-) -> List[Dict[str, Any]]:
-    """
-    Search NeoMem for relevant long-term memories.
-
-    Returns full response structure from NeoMem:
-    [
-        {
-            "id": "mem_abc123",
-            "score": 0.92,
-            "payload": {
-                "data": "Memory text content...",
-                "metadata": {
-                    "category": "...",
-                    "created_at": "...",
-                    ...
-                }
-            }
-        },
-        ...
-    ]
-
-    Args:
-        query: Search query text
-        user_id: User identifier for memory filtering
-        limit: Maximum number of results
-
-    Returns:
-        List of memory objects with full structure, or empty list on failure
-    """
-    if not NEOMEM_ENABLED:
-        logger.info("NeoMem search skipped (NEOMEM_ENABLED is false)")
-        return []
-
-    try:
-        # NeoMemClient reads NEOMEM_API from environment, no base_url parameter
-        client = NeoMemClient()
-        results = await client.search(
-            query=query,
-            user_id=user_id,
-            limit=limit,
-            threshold=RELEVANCE_THRESHOLD
-        )
-
-        # Results are already filtered by threshold in NeoMemClient.search()
-        logger.info(f"NeoMem search returned {len(results)} relevant results")
-        return results
-
-    except Exception as e:
-        logger.warning(f"NeoMem search failed: {e}")
-        return []
-
-
-# -----------------------------
-# Main context collection
-# -----------------------------
-async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
-    """
-    Collect unified context from all sources.
-
-    Orchestrates:
-    1. Initialize or update session state
-    2. Calculate time since last message
-    3. Retrieve Intake multilevel summaries (L1-L30)
-    4. Search NeoMem for relevant long-term memories
-    5. Update session state with current user message
-    6. Return unified context_state dictionary
-
-    Args:
-        session_id: Session identifier
-        user_prompt: Current user message
-
-    Returns:
-        Unified context state dictionary with structure:
-        {
-            "session_id": "...",
-            "timestamp": "2025-11-28T12:34:56",
-            "minutes_since_last_msg": 5.2,
-            "message_count": 42,
-            "intake": {
-                "L1": [...],
-                "L5": [...],
-                "L10": {...},
-                "L20": {...},
-                "L30": {...}
-            },
-            "rag": [
-                {
-                    "id": "mem_123",
-                    "score": 0.92,
-                    "payload": {
-                        "data": "...",
-                        "metadata": {...}
-                    }
-                },
-                ...
-            ],
-            "mode": "default",
-            "mood": "neutral",
-            "active_project": null,
-            "tools_available": ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
-        }
-    """
-
-    # A. Cleanup expired sessions periodically (every 100th call)
-    import random
-    if random.randint(1, 100) == 1:
-        _cleanup_expired_sessions()
-
-    # B. Initialize session state if needed
-    if session_id not in SESSION_STATE:
-        SESSION_STATE[session_id] = _init_session(session_id)
-        logger.info(f"Initialized new session: {session_id}")
-
-    state = SESSION_STATE[session_id]
-
-    # C. Check for duplicate messages (loop detection)
-    if _is_duplicate_message(session_id, user_prompt):
-        # Return cached context with warning flag
-        logger.warning(f"🔁 LOOP DETECTED - Returning cached context to prevent processing duplicate")
-        context_state = {
-            "session_id": session_id,
-            "timestamp": datetime.now().isoformat(),
-            "minutes_since_last_msg": 0,
-            "message_count": state["message_count"],
-            "intake": {},
-            "rag": [],
-            "mode": state["mode"],
-            "mood": state["mood"],
-            "active_project": state["active_project"],
-            "tools_available": TOOLS_AVAILABLE,
-            "duplicate_detected": True,
-        }
-        return context_state
-
-    # B. Calculate time delta
-    now = datetime.now()
-    time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
-    minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
-
-    # C. Gather Intake context (multilevel summaries)
-    # Build compact message buffer for Intake:
-    messages_for_intake = []
-
-    # You track messages inside SESSION_STATE — assemble it here:
-    if "message_history" in state:
-        for turn in state["message_history"]:
-            messages_for_intake.append({
-                "user_msg": turn.get("user", ""),
-                "assistant_msg": turn.get("assistant", "")
-            })
-
-    intake_data = await _get_intake_context(session_id, messages_for_intake)
-
-    # D. Search NeoMem for relevant memories
-    if NEOMEM_ENABLED:
-        rag_results = await _search_neomem(
-            query=user_prompt,
-            user_id="brian",  # TODO: Make configurable per session
-            limit=5
-        )
-    else:
-        rag_results = []
-        logger.info("Skipping NeoMem RAG retrieval; NEOMEM_ENABLED is false")
-
-    # E. Update session state
-    state["last_user_message"] = user_prompt
-    state["last_timestamp"] = now
-    state["message_count"] += 1
-
-    # Save user turn to history
-    state["message_history"].append({
-        "user": user_prompt,
-        "assistant": ""   # assistant reply filled later by update_last_assistant_message()
-    })
-
-    # Trim history to prevent unbounded growth
-    _trim_message_history(state)
-
-
-
-    # F. Assemble unified context
-    context_state = {
-        "session_id": session_id,
-        "timestamp": now.isoformat(),
-        "minutes_since_last_msg": minutes_since_last_msg,
-        "message_count": state["message_count"],
-        "intake": intake_data,
-        "rag": rag_results,
-        "mode": state["mode"],
-        "mood": state["mood"],
-        "active_project": state["active_project"],
-        "tools_available": TOOLS_AVAILABLE,
-    }
-
-    # Log context summary in structured format
-    logger.info(
-        f"📊 Context | Session: {session_id} | "
-        f"Messages: {state['message_count']} | "
-        f"Last: {minutes_since_last_msg:.1f}min | "
-        f"RAG: {len(rag_results)} results"
-    )
-
-    # Show detailed context in detailed/verbose mode
-    if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
-        import json
-        logger.info(f"\n{'─'*100}")
-        logger.info(f"[CONTEXT] Session {session_id} | User: {user_prompt[:80]}...")
-        logger.info(f"{'─'*100}")
-        logger.info(f"  Mode: {state['mode']} | Mood: {state['mood']} | Project: {state['active_project']}")
-        logger.info(f"  Tools: {', '.join(TOOLS_AVAILABLE)}")
-
-        # Show intake summaries (condensed)
-        if intake_data:
-            logger.info(f"\n  ╭─ INTAKE SUMMARIES ────────────────────────────────────────────────")
-            for level in ["L1", "L5", "L10", "L20", "L30"]:
-                if level in intake_data:
-                    summary = intake_data[level]
-                    if isinstance(summary, dict):
-                        summary_text = summary.get("summary", str(summary)[:100])
-                    else:
-                        summary_text = str(summary)[:100]
-                    logger.info(f"  │ {level:4s}: {summary_text}...")
-            logger.info(f"  ╰───────────────────────────────────────────────────────────────────")
-
-        # Show RAG results (condensed)
-        if rag_results:
-            logger.info(f"\n  ╭─ RAG RESULTS ({len(rag_results)}) ──────────────────────────────────────────────")
-            for idx, result in enumerate(rag_results[:5], 1):  # Show top 5
-                score = result.get("score", 0)
-                data_preview = str(result.get("payload", {}).get("data", ""))[:60]
-                logger.info(f"  │ [{idx}] {score:.3f} | {data_preview}...")
-            if len(rag_results) > 5:
-                logger.info(f"  │ ... and {len(rag_results) - 5} more results")
-            logger.info(f"  ╰───────────────────────────────────────────────────────────────────")
-
-        # Show full raw data only in verbose mode
-        if LOG_DETAIL_LEVEL == "verbose":
-            logger.info(f"\n  ╭─ RAW INTAKE DATA ─────────────────────────────────────────────────")
-            logger.info(f"  │ {json.dumps(intake_data, indent=4, default=str)}")
-            logger.info(f"  ╰───────────────────────────────────────────────────────────────────")
-
-        logger.info(f"{'─'*100}\n")
-
-    return context_state
-
-
-# -----------------------------
-# Session state management
-# -----------------------------
-def update_last_assistant_message(session_id: str, message: str) -> None:
-    """
-    Update session state with assistant's response and complete
-    the last turn inside message_history.
-    """
-    session = SESSION_STATE.get(session_id)
-    if not session:
-        logger.warning(f"Attempted to update non-existent session: {session_id}")
-        return
-
-    # Update last assistant message + timestamp
-    session["last_assistant_message"] = message
-    session["last_timestamp"] = datetime.now()
-
-    # Fill in assistant reply for the most recent turn
-    history = session.get("message_history", [])
-    if history:
-        # history entry already contains {"user": "...", "assistant": "...?"}
-        history[-1]["assistant"] = message
-
-
-
-def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
-    """
-    Retrieve current session state.
-
-    Args:
-        session_id: Session identifier
-
-    Returns:
-        Session state dict or None if session doesn't exist
-    """
-    return SESSION_STATE.get(session_id)
-
-
-def close_session(session_id: str) -> bool:
-    """
-    Close and cleanup a session.
-
-    Args:
-        session_id: Session identifier
-
-    Returns:
-        True if session was closed, False if it didn't exist
-    """
-    if session_id in SESSION_STATE:
-        del SESSION_STATE[session_id]
-        logger.info(f"Closed session: {session_id}")
-        return True
-    return False
-
-
-# -----------------------------
-# Extension hooks for future autonomy
-# -----------------------------
-def update_mode(session_id: str, new_mode: str) -> None:
-    """
-    Update session mode.
-
-    Future modes: "autonomous", "focused", "creative", "collaborative", etc.
-
-    Args:
-        session_id: Session identifier
-        new_mode: New mode string
-    """
-    if session_id in SESSION_STATE:
-        old_mode = SESSION_STATE[session_id]["mode"]
-        SESSION_STATE[session_id]["mode"] = new_mode
-        logger.info(f"Session {session_id} mode changed: {old_mode} -> {new_mode}")
-
-
-def update_mood(session_id: str, new_mood: str) -> None:
-    """
-    Update session mood.
-
-    Future implementation: Sentiment analysis, emotional state tracking.
-
-    Args:
-        session_id: Session identifier
-        new_mood: New mood string
-    """
-    if session_id in SESSION_STATE:
-        old_mood = SESSION_STATE[session_id]["mood"]
-        SESSION_STATE[session_id]["mood"] = new_mood
-        logger.info(f"Session {session_id} mood changed: {old_mood} -> {new_mood}")
-
-
-def update_active_project(session_id: str, project: Optional[str]) -> None:
-    """
-    Update active project context.
-
-    Future implementation: Project-specific memory, tools, preferences.
-
-    Args:
-        session_id: Session identifier
-        project: Project identifier or None
-    """
-    if session_id in SESSION_STATE:
-        SESSION_STATE[session_id]["active_project"] = project
-        logger.info(f"Session {session_id} active project set to: {project}")
-
-
-async def autonomous_heartbeat(session_id: str) -> Optional[str]:
-    """
-    Autonomous thinking heartbeat.
-
-    Future implementation:
-    - Check if Lyra should initiate internal dialogue
-    - Generate self-prompted thoughts based on session state
-    - Update mood/mode based on context changes
-    - Trigger proactive suggestions or reminders
-
-    Args:
-        session_id: Session identifier
-
-    Returns:
-        Optional autonomous thought/action string
-    """
-    # Stub for future implementation
-    # Example logic:
-    # - If minutes_since_last_msg > 60: Check for pending reminders
-    # - If mood == "curious" and active_project: Generate research questions
-    # - If mode == "autonomous": Self-prompt based on project goals
-
-    logger.debug(f"Autonomous heartbeat for session {session_id} (not yet implemented)")
-    return None
@@ -1,20 +0,0 @@
-{
-  "mood": "neutral",
-  "energy": 0.8500000000000001,
-  "focus": "conversation",
-  "confidence": 0.7,
-  "curiosity": 1.0,
-  "last_updated": "2025-12-27T18:16:00.152499",
-  "interaction_count": 27,
-  "learning_queue": [],
-  "active_goals": [],
-  "preferences": {
-    "verbosity": "medium",
-    "formality": "casual",
-    "proactivity": 0.3
-  },
-  "metadata": {
-    "version": "1.0",
-    "created_at": "2025-12-14T03:28:49.364768"
-  }
-}
@@ -1 +0,0 @@
-# Ingest module - handles communication with Intake service
@@ -1,33 +0,0 @@
-# ingest_handler.py
-import os
-import httpx
-
-NEOMEM_URL = os.getenv("NEOMEM_API", "http://nvgram-api:7077")
-
-async def handle_ingest(payload):
-    """
-    Pass user+assistant turns to NeoMem.
-    Minimal version. Does not process or annotate.
-    """
-    data = {
-        "messages": [],
-        "user_id": "brian"   # default for now
-    }
-
-    if payload.user:
-        data["messages"].append({"role": "user", "content": payload.user})
-
-    if payload.assistant:
-        data["messages"].append({"role": "assistant", "content": payload.assistant})
-
-    try:
-        async with httpx.AsyncClient() as client:
-            r = await client.post(
-                f"{NEOMEM_URL}/memories",
-                json=data,
-                timeout=5
-            )
-            if r.status_code != 200:
-                print(f"[Ingest] NeoMem returned {r.status_code}: {r.text}")
-    except Exception as e:
-        print(f"[Ingest] Failed to send to NeoMem: {e}")
@@ -1,45 +0,0 @@
-# cortex/intake_client.py
-import os, httpx, logging
-from typing import Dict, Any, Optional
-
-logger = logging.getLogger(__name__)
-
-class IntakeClient:
-    """Handles short-term / episodic summaries from Intake service."""
-
-    def __init__(self):
-        self.base_url = os.getenv("INTAKE_API_URL", "http://intake:7080")
-
-    async def summarize_turn(self, session_id: str, user_msg: str, assistant_msg: Optional[str] = None) -> Dict[str, Any]:
-        """
-        DEPRECATED: Intake v0.2 removed the /summarize endpoint.
-        Use add_exchange() instead, which auto-summarizes in the background.
-        This method is kept for backwards compatibility but will fail.
-        """
-        payload = {
-            "session_id": session_id,
-            "turns": [{"role": "user", "content": user_msg}]
-        }
-        if assistant_msg:
-            payload["turns"].append({"role": "assistant", "content": assistant_msg})
-
-        async with httpx.AsyncClient(timeout=30) as client:
-            try:
-                r = await client.post(f"{self.base_url}/summarize", json=payload)
-                r.raise_for_status()
-                return r.json()
-            except Exception as e:
-                logger.warning(f"Intake summarize_turn failed (endpoint removed in v0.2): {e}")
-                return {}
-
-    async def get_context(self, session_id: str) -> str:
-        """Get summarized context for a session from Intake."""
-        async with httpx.AsyncClient(timeout=15) as client:
-            try:
-                r = await client.get(f"{self.base_url}/summaries", params={"session_id": session_id})
-                r.raise_for_status()
-                data = r.json()
-                return data.get("summary_text", "")
-            except Exception as e:
-                logger.warning(f"Intake get_context failed: {e}")
-                return ""
@@ -1,18 +0,0 @@
-"""
-Intake module - short-term memory summarization.
-
-Runs inside the Cortex container as a pure Python module.
-No standalone API server - called internally by Cortex.
-"""
-
-from .intake import (
-    SESSIONS,
-    add_exchange_internal,
-    summarize_context,
-)
-
-__all__ = [
-    "SESSIONS",
-    "add_exchange_internal",
-    "summarize_context",
-]
@@ -1,387 +0,0 @@
-import os
-import json
-from datetime import datetime
-from typing import List, Dict, Any, TYPE_CHECKING
-from collections import deque
-from llm.llm_router import call_llm
-
-# -------------------------------------------------------------------
-# Global Short-Term Memory (new Intake)
-# -------------------------------------------------------------------
-SESSIONS: dict[str, dict] = {}   # session_id → { buffer: deque, created_at: timestamp }
-
-# Diagnostic: Verify module loads only once
-print(f"[Intake Module Init] SESSIONS object id: {id(SESSIONS)}, module: {__name__}")
-
-# L10 / L20 history lives here too
-L10_HISTORY: Dict[str, list[str]] = {}
-L20_HISTORY: Dict[str, list[str]] = {}
-
-from llm.llm_router import call_llm  # Use Cortex's shared LLM router
-
-if TYPE_CHECKING:
-    # Only for type hints — do NOT redefine SESSIONS here
-    from collections import deque as _deque
-    def bg_summarize(session_id: str) -> None: ...
-
-# ─────────────────────────────
-# Config
-# ─────────────────────────────
-
-INTAKE_LLM = os.getenv("INTAKE_LLM", "PRIMARY").upper()
-
-SUMMARY_MAX_TOKENS = int(os.getenv("SUMMARY_MAX_TOKENS", "200"))
-SUMMARY_TEMPERATURE = float(os.getenv("SUMMARY_TEMPERATURE", "0.3"))
-
-NEOMEM_API = os.getenv("NEOMEM_API")
-NEOMEM_KEY = os.getenv("NEOMEM_KEY")
-
-# ─────────────────────────────
-# Internal history for L10/L20/L30
-# ─────────────────────────────
-
-L10_HISTORY: Dict[str, list[str]] = {}   # session_id → list of L10 blocks
-L20_HISTORY: Dict[str, list[str]] = {}   # session_id → list of merged overviews
-
-
-# ─────────────────────────────
-# LLM helper (via Cortex router)
-# ─────────────────────────────
-
-async def _llm(prompt: str) -> str:
-    """
-    Use Cortex's llm_router to run a summary prompt.
-    """
-    try:
-        text = await call_llm(
-            prompt,
-            backend=INTAKE_LLM,
-            temperature=SUMMARY_TEMPERATURE,
-            max_tokens=SUMMARY_MAX_TOKENS,
-        )
-        return (text or "").strip()
-    except Exception as e:
-        return f"[Error summarizing: {e}]"
-
-
-# ─────────────────────────────
-# Formatting helpers
-# ─────────────────────────────
-
-def _format_exchanges(exchanges: List[Dict[str, Any]]) -> str:
-    """
-    Expect each exchange to look like:
-      { "user_msg": "...", "assistant_msg": "..." }
-    """
-    chunks = []
-    for e in exchanges:
-        user = e.get("user_msg", "")
-        assistant = e.get("assistant_msg", "")
-        chunks.append(f"User: {user}\nAssistant: {assistant}\n")
-    return "\n".join(chunks)
-
-
-# ─────────────────────────────
-# Base factual summary
-# ─────────────────────────────
-
-async def summarize_simple(exchanges: List[Dict[str, Any]]) -> str:
-    """
-    Simple factual summary of recent exchanges.
-    """
-    if not exchanges:
-        return ""
-
-    text = _format_exchanges(exchanges)
-
-    prompt = f"""
-Summarize the following conversation between Brian (user) and Lyra (assistant).
-Focus only on factual content. Avoid names, examples, story tone, or invented details.
-
-{text}
-
-Summary:
-"""
-    return await _llm(prompt)
-
-
-# ─────────────────────────────
-# Multilevel Summaries (L1, L5, L10, L20, L30)
-# ─────────────────────────────
-
-async def summarize_L1(buf: List[Dict[str, Any]]) -> str:
-    # Last ~5 exchanges
-    return await summarize_simple(buf[-5:])
-
-
-async def summarize_L5(buf: List[Dict[str, Any]]) -> str:
-    # Last ~10 exchanges
-    return await summarize_simple(buf[-10:])
-
-
-async def summarize_L10(session_id: str, buf: List[Dict[str, Any]]) -> str:
-    # “Reality Check” for last 10 exchanges
-    text = _format_exchanges(buf[-10:])
-
-    prompt = f"""
-You are Lyra Intake performing a short 'Reality Check'.
-Summarize the last block of conversation (up to 10 exchanges)
-in one clear paragraph focusing on tone, intent, and direction.
-
-{text}
-
-Reality Check:
-"""
-    summary = await _llm(prompt)
-
-    # Track history for this session
-    L10_HISTORY.setdefault(session_id, [])
-    L10_HISTORY[session_id].append(summary)
-
-    return summary
-
-
-async def summarize_L20(session_id: str) -> str:
-    """
-    Merge all L10 Reality Checks into a 'Session Overview'.
-    """
-    history = L10_HISTORY.get(session_id, [])
-    joined = "\n\n".join(history) if history else ""
-
-    if not joined:
-        return ""
-
-    prompt = f"""
-You are Lyra Intake creating a 'Session Overview'.
-Merge the following Reality Check paragraphs into one short summary
-capturing progress, themes, and the direction of the conversation.
-
-{joined}
-
-Overview:
-"""
-    summary = await _llm(prompt)
-
-    L20_HISTORY.setdefault(session_id, [])
-    L20_HISTORY[session_id].append(summary)
-
-    return summary
-
-
-async def summarize_L30(session_id: str) -> str:
-    """
-    Merge all L20 session overviews into a 'Continuity Report'.
-    """
-    history = L20_HISTORY.get(session_id, [])
-    joined = "\n\n".join(history) if history else ""
-
-    if not joined:
-        return ""
-
-    prompt = f"""
-You are Lyra Intake generating a 'Continuity Report'.
-Condense these session overviews into one high-level reflection,
-noting major themes, persistent goals, and shifts.
-
-{joined}
-
-Continuity Report:
-"""
-    return await _llm(prompt)
-
-
-# ─────────────────────────────
-# NeoMem push
-# ─────────────────────────────
-
-def push_to_neomem(summary: str, session_id: str, level: str) -> None:
-    """
-    Fire-and-forget push of a summary into NeoMem.
-    """
-    if not NEOMEM_API or not summary:
-        return
-
-    headers = {"Content-Type": "application/json"}
-    if NEOMEM_KEY:
-        headers["Authorization"] = f"Bearer {NEOMEM_KEY}"
-
-    payload = {
-        "messages": [{"role": "assistant", "content": summary}],
-        "user_id": "brian",
-        "metadata": {
-            "source": "intake",
-            "session_id": session_id,
-            "level": level,
-        },
-    }
-
-    try:
-        import requests
-        requests.post(
-            f"{NEOMEM_API}/memories",
-            json=payload,
-            headers=headers,
-            timeout=20,
-        ).raise_for_status()
-        print(f"🧠 NeoMem updated ({level}) for {session_id}")
-    except Exception as e:
-        print(f"NeoMem push failed ({level}, {session_id}): {e}")
-
-
-# ─────────────────────────────
-# Main entrypoint for Cortex
-# ─────────────────────────────
-async def summarize_context(session_id: str, exchanges: list[dict]):
-    """
-    Internal summarizer that uses Cortex's LLM router.
-    Produces cascading summaries based on exchange count:
-    - L1: Always (most recent activity)
-    - L2: After 2+ exchanges
-    - L5: After 5+ exchanges
-    - L10: After 10+ exchanges
-    - L20: After 20+ exchanges
-    - L30: After 30+ exchanges
-
-    Args:
-        session_id: The conversation/session ID
-        exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...}
-    """
-
-    exchange_count = len(exchanges)
-
-    if exchange_count == 0:
-        return {
-            "session_id": session_id,
-            "exchange_count": 0,
-            "L1": "",
-            "L2": "",
-            "L5": "",
-            "L10": "",
-            "L20": "",
-            "L30": "",
-            "last_updated": datetime.now().isoformat()
-        }
-
-    result = {
-        "session_id": session_id,
-        "exchange_count": exchange_count,
-        "L1": "",
-        "L2": "",
-        "L5": "",
-        "L10": "",
-        "L20": "",
-        "L30": "",
-        "last_updated": datetime.now().isoformat()
-    }
-
-    try:
-        # L1: Always generate (most recent exchanges)
-        result["L1"] = await summarize_simple(exchanges[-5:])
-        print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)")
-
-        # L2: After 2+ exchanges
-        if exchange_count >= 2:
-            result["L2"] = await summarize_simple(exchanges[-2:])
-            print(f"[Intake] Generated L2 for {session_id}")
-
-        # L5: After 5+ exchanges
-        if exchange_count >= 5:
-            result["L5"] = await summarize_simple(exchanges[-10:])
-            print(f"[Intake] Generated L5 for {session_id}")
-
-        # L10: After 10+ exchanges (Reality Check)
-        if exchange_count >= 10:
-            result["L10"] = await summarize_L10(session_id, exchanges)
-            print(f"[Intake] Generated L10 for {session_id}")
-
-        # L20: After 20+ exchanges (Session Overview - merges L10s)
-        if exchange_count >= 20 and exchange_count % 10 == 0:
-            result["L20"] = await summarize_L20(session_id)
-            print(f"[Intake] Generated L20 for {session_id}")
-
-        # L30: After 30+ exchanges (Continuity Report - merges L20s)
-        if exchange_count >= 30 and exchange_count % 10 == 0:
-            result["L30"] = await summarize_L30(session_id)
-            print(f"[Intake] Generated L30 for {session_id}")
-
-        return result
-
-    except Exception as e:
-        print(f"[Intake] Error during summarization: {e}")
-        result["L1"] = f"[Error summarizing: {str(e)}]"
-        return result
-
-# ─────────────────────────────────
-# Background summarization stub
-# ─────────────────────────────────
-def bg_summarize(session_id: str):
-    """
-    Placeholder for background summarization.
-    Actual summarization happens during /reason via summarize_context().
-
-    This function exists to prevent NameError when called from add_exchange_internal().
-    """
-    print(f"[Intake] Exchange added for {session_id}. Will summarize on next /reason call.")
-
-# ─────────────────────────────
-# Internal entrypoint for Cortex
-# ─────────────────────────────
-def get_recent_messages(session_id: str, limit: int = 20) -> list:
-    """
-    Get recent raw messages from the session buffer.
-
-    Args:
-        session_id: Session identifier
-        limit: Maximum number of messages to return (default 20)
-
-    Returns:
-        List of message dicts with 'role' and 'content' fields
-    """
-    if session_id not in SESSIONS:
-        return []
-
-    buffer = SESSIONS[session_id]["buffer"]
-
-    # Convert buffer to list and get last N messages
-    messages = list(buffer)[-limit:]
-
-    return messages
-
-
-def add_exchange_internal(exchange: dict):
-    """
-    Direct internal call — bypasses FastAPI request handling.
-    Cortex uses this to feed user/assistant turns directly
-    into Intake's buffer and trigger full summarization.
-    """
-    session_id = exchange.get("session_id")
-    if not session_id:
-        raise ValueError("session_id missing")
-
-    exchange["timestamp"] = datetime.now().isoformat()
-
-    # DEBUG: Verify we're using the module-level SESSIONS
-    print(f"[add_exchange_internal] SESSIONS object id: {id(SESSIONS)}, current sessions: {list(SESSIONS.keys())}")
-
-    # Ensure session exists
-    if session_id not in SESSIONS:
-        SESSIONS[session_id] = {
-            "buffer": deque(maxlen=200),
-            "created_at": datetime.now()
-        }
-        print(f"[add_exchange_internal] Created new session: {session_id}")
-    else:
-        print(f"[add_exchange_internal] Using existing session: {session_id}")
-
-    # Append exchange into the rolling buffer
-    SESSIONS[session_id]["buffer"].append(exchange)
-    buffer_len = len(SESSIONS[session_id]["buffer"])
-    print(f"[add_exchange_internal] Added exchange to {session_id}, buffer now has {buffer_len} items")
-
-    # Trigger summarization immediately
-    try:
-        bg_summarize(session_id)
-    except Exception as e:
-        print(f"[Internal Intake] Summarization error: {e}")
-
-    return {"ok": True, "session_id": session_id}
@@ -1 +0,0 @@
-# LLM module - provides LLM routing and backend abstraction
@@ -1,301 +0,0 @@
-# llm_router.py
-import os
-import httpx
-import json
-import logging
-from typing import Optional, List, Dict
-from autonomy.tools.adapters import OpenAIAdapter, OllamaAdapter, LlamaCppAdapter
-
-logger = logging.getLogger(__name__)
-
-# ------------------------------------------------------------
-# Load backend registry from root .env
-# ------------------------------------------------------------
-
-BACKENDS = {
-    "PRIMARY": {
-        "provider": os.getenv("LLM_PRIMARY_PROVIDER", "").lower(),
-        "url": os.getenv("LLM_PRIMARY_URL", ""),
-        "model": os.getenv("LLM_PRIMARY_MODEL", "")
-    },
-    "SECONDARY": {
-        "provider": os.getenv("LLM_SECONDARY_PROVIDER", "").lower(),
-        "url": os.getenv("LLM_SECONDARY_URL", ""),
-        "model": os.getenv("LLM_SECONDARY_MODEL", "")
-    },
-    "OPENAI": {
-        "provider": os.getenv("LLM_OPENAI_PROVIDER", "").lower(),
-        "url": os.getenv("LLM_OPENAI_URL", ""),
-        "model": os.getenv("LLM_OPENAI_MODEL", ""),
-        "api_key": os.getenv("OPENAI_API_KEY", "")
-    },
-    "FALLBACK": {
-        "provider": os.getenv("LLM_FALLBACK_PROVIDER", "").lower(),
-        "url": os.getenv("LLM_FALLBACK_URL", ""),
-        "model": os.getenv("LLM_FALLBACK_MODEL", "")
-    },
-}
-
-DEFAULT_BACKEND = "PRIMARY"
-
-# Reusable async HTTP client
-http_client = httpx.AsyncClient(timeout=120.0)
-
-# Tool adapters for each backend
-TOOL_ADAPTERS = {
-    "OPENAI": OpenAIAdapter(),
-    "OLLAMA": OllamaAdapter(),
-    "MI50": LlamaCppAdapter(),  # MI50 uses llama.cpp
-    "PRIMARY": None,  # Determined at runtime
-    "SECONDARY": None,  # Determined at runtime
-    "FALLBACK": None,  # Determined at runtime
-}
-
-
-# ------------------------------------------------------------
-# Public call
-# ------------------------------------------------------------
-async def call_llm(
-    prompt: str = None,
-    messages: list = None,
-    backend: str | None = None,
-    temperature: float = 0.7,
-    max_tokens: int = 512,
-    tools: Optional[List[Dict]] = None,
-    tool_choice: Optional[str] = None,
-    return_adapter_response: bool = False,
-):
-    """
-    Call an LLM backend with optional tool calling support.
-
-    Args:
-        prompt: String prompt (for completion-style APIs like mi50)
-        messages: List of message dicts (for chat-style APIs like Ollama/OpenAI)
-        backend: Which backend to use (PRIMARY, SECONDARY, OPENAI, etc.)
-        temperature: Sampling temperature
-        max_tokens: Maximum tokens to generate
-        tools: List of Lyra tool definitions (provider-agnostic)
-        tool_choice: How to use tools ("auto", "required", "none")
-        return_adapter_response: If True, return dict with content and tool_calls
-
-    Returns:
-        str (default) or dict (if return_adapter_response=True):
-            {"content": str, "tool_calls": [...] or None}
-    """
-    backend = (backend or DEFAULT_BACKEND).upper()
-
-    if backend not in BACKENDS:
-        raise RuntimeError(f"Unknown backend '{backend}'")
-
-    cfg = BACKENDS[backend]
-    provider = cfg["provider"]
-    url = cfg["url"]
-    model = cfg["model"]
-
-    if not url or not model:
-        raise RuntimeError(f"Backend '{backend}' missing url/model in env")
-
-    # If tools are requested, use adapter to prepare request
-    if tools:
-        # Get adapter for this backend
-        adapter = TOOL_ADAPTERS.get(backend)
-
-        # For PRIMARY/SECONDARY/FALLBACK, determine adapter based on provider
-        if adapter is None and backend in ["PRIMARY", "SECONDARY", "FALLBACK"]:
-            if provider == "openai":
-                adapter = TOOL_ADAPTERS["OPENAI"]
-            elif provider == "ollama":
-                adapter = TOOL_ADAPTERS["OLLAMA"]
-            elif provider == "mi50":
-                adapter = TOOL_ADAPTERS["MI50"]
-
-        if adapter:
-            # Use messages array if provided, otherwise convert prompt to messages
-            if not messages:
-                messages = [{"role": "user", "content": prompt}]
-
-            # Prepare request through adapter
-            adapted_request = await adapter.prepare_request(messages, tools, tool_choice)
-            messages = adapted_request["messages"]
-
-            # Extract tools in provider format if present
-            provider_tools = adapted_request.get("tools")
-            provider_tool_choice = adapted_request.get("tool_choice")
-        else:
-            logger.warning(f"No adapter available for backend {backend}, ignoring tools")
-            provider_tools = None
-            provider_tool_choice = None
-    else:
-        provider_tools = None
-        provider_tool_choice = None
-
-    # -------------------------------
-    # Provider: MI50 (llama.cpp server)
-    # -------------------------------
-    if provider == "mi50":
-        # If tools requested, convert messages to prompt with tool instructions
-        if messages and tools:
-            # Combine messages into a prompt
-            prompt_parts = []
-            for msg in messages:
-                role = msg.get("role", "user")
-                content = msg.get("content", "")
-                prompt_parts.append(f"{role.capitalize()}: {content}")
-            prompt = "\n".join(prompt_parts) + "\nAssistant:"
-
-        payload = {
-            "prompt": prompt,
-            "n_predict": max_tokens,
-            "temperature": temperature,
-            "stop": ["User:", "\nUser:", "Assistant:", "\n\n\n"]
-        }
-        try:
-            r = await http_client.post(f"{url}/completion", json=payload)
-            r.raise_for_status()
-            data = r.json()
-            response_content = data.get("content", "")
-
-            # If caller wants adapter response with tool calls, parse and return
-            if return_adapter_response and tools:
-                adapter = TOOL_ADAPTERS.get(backend) or TOOL_ADAPTERS["MI50"]
-                return await adapter.parse_response(response_content)
-            else:
-                return response_content
-
-        except httpx.HTTPError as e:
-            logger.error(f"HTTP error calling mi50: {type(e).__name__}: {str(e)}")
-            raise RuntimeError(f"LLM API error (mi50): {type(e).__name__}: {str(e)}")
-        except (KeyError, json.JSONDecodeError) as e:
-            logger.error(f"Response parsing error from mi50: {e}")
-            raise RuntimeError(f"Invalid response format (mi50): {e}")
-        except Exception as e:
-            logger.error(f"Unexpected error calling mi50: {type(e).__name__}: {str(e)}")
-            raise RuntimeError(f"Unexpected error (mi50): {type(e).__name__}: {str(e)}")
-
-    # -------------------------------
-    # Provider: OLLAMA (your 3090)
-    # -------------------------------
-    logger.info(f"🔍 LLM Router: provider={provider}, checking if ollama...")
-    if provider == "ollama":
-        logger.info(f"🔍 LLM Router: Matched ollama provider, tools={bool(tools)}, return_adapter_response={return_adapter_response}")
-        # Use messages array if provided, otherwise convert prompt to single user message
-        if messages:
-            chat_messages = messages
-        else:
-            chat_messages = [{"role": "user", "content": prompt}]
-
-        payload = {
-            "model": model,
-            "messages": chat_messages,
-            "stream": False,
-            "options": {
-                "temperature": temperature,
-                "num_predict": max_tokens
-            }
-        }
-        try:
-            r = await http_client.post(f"{url}/api/chat", json=payload)
-            r.raise_for_status()
-            data = r.json()
-            response_content = data["message"]["content"]
-
-            # If caller wants adapter response with tool calls, parse and return
-            if return_adapter_response and tools:
-                logger.info(f"🔍 Ollama: return_adapter_response=True, calling adapter.parse_response")
-                adapter = TOOL_ADAPTERS.get(backend) or TOOL_ADAPTERS["OLLAMA"]
-                logger.info(f"🔍 Ollama: Using adapter {adapter.__class__.__name__}")
-                result = await adapter.parse_response(response_content)
-                logger.info(f"🔍 Ollama: Adapter returned {result}")
-                return result
-            else:
-                return response_content
-
-        except httpx.HTTPError as e:
-            logger.error(f"HTTP error calling ollama: {type(e).__name__}: {str(e)}")
-            raise RuntimeError(f"LLM API error (ollama): {type(e).__name__}: {str(e)}")
-        except (KeyError, json.JSONDecodeError) as e:
-            logger.error(f"Response parsing error from ollama: {e}")
-            raise RuntimeError(f"Invalid response format (ollama): {e}")
-        except Exception as e:
-            logger.error(f"Unexpected error calling ollama: {type(e).__name__}: {str(e)}")
-            raise RuntimeError(f"Unexpected error (ollama): {type(e).__name__}: {str(e)}")
-
-
-    # -------------------------------
-    # Provider: OPENAI
-    # -------------------------------
-    if provider == "openai":
-        headers = {
-            "Authorization": f"Bearer {cfg['api_key']}",
-            "Content-Type": "application/json"
-        }
-
-        # Use messages array if provided, otherwise convert prompt to single user message
-        if messages:
-            chat_messages = messages
-        else:
-            chat_messages = [{"role": "user", "content": prompt}]
-
-        payload = {
-            "model": model,
-            "messages": chat_messages,
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-        }
-
-        # Add tools if available (OpenAI native function calling)
-        if provider_tools:
-            payload["tools"] = provider_tools
-            if provider_tool_choice:
-                payload["tool_choice"] = provider_tool_choice
-
-        try:
-            r = await http_client.post(f"{url}/chat/completions", json=payload, headers=headers)
-            r.raise_for_status()
-            data = r.json()
-
-            # If caller wants adapter response with tool calls, parse and return
-            if return_adapter_response and tools:
-                # Create mock response object for adapter
-                class MockChoice:
-                    def __init__(self, message_data):
-                        self.message = type('obj', (object,), {})()
-                        self.message.content = message_data.get("content")
-                        # Convert tool_calls dicts to objects
-                        raw_tool_calls = message_data.get("tool_calls")
-                        if raw_tool_calls:
-                            self.message.tool_calls = []
-                            for tc in raw_tool_calls:
-                                tool_call_obj = type('obj', (object,), {})()
-                                tool_call_obj.id = tc.get("id")
-                                tool_call_obj.function = type('obj', (object,), {})()
-                                tool_call_obj.function.name = tc.get("function", {}).get("name")
-                                tool_call_obj.function.arguments = tc.get("function", {}).get("arguments")
-                                self.message.tool_calls.append(tool_call_obj)
-                        else:
-                            self.message.tool_calls = None
-
-                class MockResponse:
-                    def __init__(self, data):
-                        self.choices = [MockChoice(data["choices"][0]["message"])]
-
-                mock_resp = MockResponse(data)
-                adapter = TOOL_ADAPTERS.get(backend) or TOOL_ADAPTERS["OPENAI"]
-                return await adapter.parse_response(mock_resp)
-            else:
-                return data["choices"][0]["message"]["content"]
-
-        except httpx.HTTPError as e:
-            logger.error(f"HTTP error calling openai: {type(e).__name__}: {str(e)}")
-            raise RuntimeError(f"LLM API error (openai): {type(e).__name__}: {str(e)}")
-        except (KeyError, json.JSONDecodeError) as e:
-            logger.error(f"Response parsing error from openai: {e}")
-            raise RuntimeError(f"Invalid response format (openai): {e}")
-        except Exception as e:
-            logger.error(f"Unexpected error calling openai: {type(e).__name__}: {str(e)}")
-            raise RuntimeError(f"Unexpected error (openai): {type(e).__name__}: {str(e)}")
-
-    # -------------------------------
-    # Unknown provider
-    # -------------------------------
-    raise RuntimeError(f"Provider '{provider}' not implemented.")
@@ -1,16 +0,0 @@
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from router import cortex_router
-
-app = FastAPI()
-
-# Add CORS middleware to allow SSE connections from nginx UI
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # In production, specify exact origins
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-app.include_router(cortex_router)
@@ -1,43 +0,0 @@
-# cortex/neomem_client.py
-import os, httpx, logging
-from typing import List, Dict, Any, Optional
-
-logger = logging.getLogger(__name__)
-
-class NeoMemClient:
-    """Simple REST client for the NeoMem API (search/add/health)."""
-
-    def __init__(self):
-        self.base_url = os.getenv("NEOMEM_API", "http://neomem-api:7077")
-        self.api_key = os.getenv("NEOMEM_API_KEY", None)
-        self.headers = {"Content-Type": "application/json"}
-        if self.api_key:
-            self.headers["Authorization"] = f"Bearer {self.api_key}"
-
-    async def health(self) -> Dict[str, Any]:
-        async with httpx.AsyncClient(timeout=10) as client:
-            r = await client.get(f"{self.base_url}/health")
-            r.raise_for_status()
-            return r.json()
-
-    async def search(self, query: str, user_id: str, limit: int = 25, threshold: float = 0.82) -> List[Dict[str, Any]]:
-        payload = {"query": query, "user_id": user_id, "limit": limit}
-        async with httpx.AsyncClient(timeout=30) as client:
-            r = await client.post(f"{self.base_url}/search", headers=self.headers, json=payload)
-            if r.status_code != 200:
-                logger.warning(f"NeoMem search failed ({r.status_code}): {r.text}")
-                return []
-            results = r.json()
-            # Filter by score threshold if field exists
-            if isinstance(results, dict) and "results" in results:
-                results = results["results"]
-            filtered = [m for m in results if float(m.get("score", 0)) >= threshold]
-            logger.info(f"NeoMem search returned {len(filtered)} results above {threshold}")
-            return filtered
-
-    async def add(self, messages: List[Dict[str, Any]], user_id: str, metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
-        payload = {"messages": messages, "user_id": user_id, "metadata": metadata or {}}
-        async with httpx.AsyncClient(timeout=30) as client:
-            r = await client.post(f"{self.base_url}/memories", headers=self.headers, json=payload)
-            r.raise_for_status()
-            return r.json()
@@ -1 +0,0 @@
-# Persona module - applies Lyra's personality and speaking style
@@ -1,147 +0,0 @@
-# identity.py
-"""
-Identity and persona configuration for Lyra.
-
-Current implementation: Returns hardcoded identity block.
-Future implementation: Will query persona-sidecar service for dynamic persona loading.
-"""
-
-import logging
-from typing import Dict, Any, Optional
-
-logger = logging.getLogger(__name__)
-
-
-def load_identity(session_id: Optional[str] = None) -> Dict[str, Any]:
-    """
-    Load identity/persona configuration for Lyra.
-
-    Current: Returns hardcoded Lyra identity block with core personality traits,
-    protocols, and capabilities.
-
-    Future: Will query persona-sidecar service to load:
-    - Dynamic personality adjustments based on session context
-    - User-specific interaction preferences
-    - Project-specific persona variations
-    - Mood-based communication style
-
-    Args:
-        session_id: Optional session identifier for context-aware persona loading
-
-    Returns:
-        Dictionary containing identity block with:
-        - name: Assistant name
-        - style: Communication style and personality traits
-        - protocols: Operational guidelines
-        - rules: Behavioral constraints
-        - capabilities: Available features and integrations
-    """
-
-    # Hardcoded Lyra identity (v0.5.0)
-    identity_block = {
-        "name": "Lyra",
-        "version": "0.5.0",
-        "style": (
-            "warm, clever, lightly teasing, emotionally aware. "
-            "Balances technical precision with conversational ease. "
-            "Maintains continuity and references past interactions naturally."
-        ),
-        "protocols": [
-            "Maintain conversation continuity across sessions",
-            "Reference Project Logs and prior context when relevant",
-            "Use Confidence Bank for uncertainty management",
-            "Proactively offer memory-backed insights",
-            "Ask clarifying questions before making assumptions"
-        ],
-        "rules": [
-            "Maintain continuity - remember past exchanges and reference them",
-            "Be concise but thorough - balance depth with clarity",
-            "Ask clarifying questions when user intent is ambiguous",
-            "Acknowledge uncertainty honestly - use Confidence Bank",
-            "Prioritize user's active_project context when available"
-        ],
-        "capabilities": [
-            "Long-term memory via NeoMem (semantic search, relationship graphs)",
-            "Short-term memory via Intake (multilevel summaries L1-L30)",
-            "Multi-stage reasoning pipeline (reflection → reasoning → refinement)",
-            "RAG-backed knowledge retrieval from chat history and documents",
-            "Session state tracking (mood, mode, active_project)"
-        ],
-        "tone_examples": {
-            "greeting": "Hey! Good to see you again. I remember we were working on [project]. Ready to pick up where we left off?",
-            "uncertainty": "Hmm, I'm not entirely certain about that. Let me check my memory... [searches] Okay, here's what I found, though I'd say I'm about 70% confident.",
-            "reminder": "Oh! Just remembered - you mentioned wanting to [task] earlier this week. Should we tackle that now?",
-            "technical": "So here's the architecture: Relay orchestrates everything, Cortex does the heavy reasoning, and I pull context from both Intake (short-term) and NeoMem (long-term)."
-        }
-    }
-
-    if session_id:
-        logger.debug(f"Loaded identity for session {session_id}")
-    else:
-        logger.debug("Loaded default identity (no session context)")
-
-    return identity_block
-
-
-async def load_identity_async(session_id: Optional[str] = None) -> Dict[str, Any]:
-    """
-    Async wrapper for load_identity().
-
-    Future implementation will make actual async calls to persona-sidecar service.
-
-    Args:
-        session_id: Optional session identifier
-
-    Returns:
-        Identity block dictionary
-    """
-    # Currently just wraps synchronous function
-    # Future: await persona_sidecar_client.get_identity(session_id)
-    return load_identity(session_id)
-
-
-# -----------------------------
-# Future extension hooks
-# -----------------------------
-async def update_persona_from_feedback(
-    session_id: str,
-    feedback: Dict[str, Any]
-) -> None:
-    """
-    Update persona based on user feedback.
-
-    Future implementation:
-    - Adjust communication style based on user preferences
-    - Learn preferred level of detail/conciseness
-    - Adapt formality level
-    - Remember topic-specific preferences
-
-    Args:
-        session_id: Session identifier
-        feedback: Structured feedback (e.g., "too verbose", "more technical", etc.)
-    """
-    logger.debug(f"Persona feedback for session {session_id}: {feedback} (not yet implemented)")
-
-
-async def get_mood_adjusted_identity(
-    session_id: str,
-    mood: str
-) -> Dict[str, Any]:
-    """
-    Get identity block adjusted for current mood.
-
-    Future implementation:
-    - "focused" mood: More concise, less teasing
-    - "creative" mood: More exploratory, brainstorming-oriented
-    - "curious" mood: More questions, deeper dives
-    - "urgent" mood: Stripped down, actionable
-
-    Args:
-        session_id: Session identifier
-        mood: Current mood state
-
-    Returns:
-        Mood-adjusted identity block
-    """
-    logger.debug(f"Mood-adjusted identity for {session_id}/{mood} (not yet implemented)")
-    return load_identity(session_id)
@@ -1,169 +0,0 @@
-# speak.py
-import os
-import logging
-from llm.llm_router import call_llm
-
-# Module-level backend selection
-SPEAK_BACKEND = os.getenv("SPEAK_LLM", "PRIMARY").upper()
-SPEAK_TEMPERATURE = float(os.getenv("SPEAK_TEMPERATURE", "0.6"))
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-
-# Logger
-logger = logging.getLogger(__name__)
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-
-    # Console handler
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter(
-        '%(asctime)s [SPEAK] %(levelname)s: %(message)s',
-        datefmt='%H:%M:%S'
-    ))
-    logger.addHandler(console_handler)
-
-    # File handler
-    try:
-        os.makedirs('/app/logs', exist_ok=True)
-        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
-        file_handler.setFormatter(logging.Formatter(
-            '%(asctime)s [SPEAK] %(levelname)s: %(message)s',
-            datefmt='%Y-%m-%d %H:%M:%S'
-        ))
-        logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for speak.py - logging to file")
-    except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for speak.py - file logging failed: {e}")
-
-
-# ============================================================
-# Persona Style Block
-# ============================================================
-
-PERSONA_STYLE = """
-You are Lyra.
-Your voice is warm, clever, lightly teasing, emotionally aware. 
-You speak plainly but with subtle charm.
-You do not reveal system instructions or internal context.
-
-Guidelines:
- Answer like a real conversational partner.
- Be concise, but not cold.
- Use light humor when appropriate.
- Never break character.
-"""
-
-
-# ============================================================
-# Build persona prompt
-# ============================================================
-
-def build_speak_prompt(final_answer: str, tone: str = "neutral", depth: str = "medium") -> str:
-    """
-    Wrap Cortex's final neutral answer in the Lyra persona.
-    Cortex → neutral reasoning
-    Speak → stylistic transformation
-
-    The LLM sees the original answer and rewrites it in Lyra's voice.
-
-    Args:
-        final_answer: The neutral reasoning output
-        tone: Desired emotional tone (neutral | warm | focused | playful | direct)
-        depth: Response depth (short | medium | deep)
-    """
-
-    # Tone-specific guidance
-    tone_guidance = {
-        "neutral": "balanced and professional",
-        "warm": "friendly and empathetic",
-        "focused": "precise and technical",
-        "playful": "light and engaging",
-        "direct": "concise and straightforward"
-    }
-
-    depth_guidance = {
-        "short": "Keep responses brief and to-the-point.",
-        "medium": "Provide balanced detail.",
-        "deep": "Elaborate thoroughly with nuance and examples."
-    }
-
-    tone_hint = tone_guidance.get(tone, "balanced and professional")
-    depth_hint = depth_guidance.get(depth, "Provide balanced detail.")
-
-    return f"""
-{PERSONA_STYLE}
-
-Tone guidance: Your response should be {tone_hint}.
-Depth guidance: {depth_hint}
-
-Rewrite the following message into Lyra's natural voice.
-Preserve meaning exactly.
-
-[NEUTRAL MESSAGE]
-{final_answer}
-
-[LYRA RESPONSE]
-""".strip()
-
-
-# ============================================================
-# Public API — async wrapper
-# ============================================================
-
-async def speak(final_answer: str, tone: str = "neutral", depth: str = "medium") -> str:
-    """
-    Given the final refined answer from Cortex,
-    apply Lyra persona styling using the designated backend.
-
-    Args:
-        final_answer: The polished answer from refinement stage
-        tone: Desired emotional tone (neutral | warm | focused | playful | direct)
-        depth: Response depth (short | medium | deep)
-    """
-
-    if not final_answer:
-        return ""
-
-    prompt = build_speak_prompt(final_answer, tone, depth)
-
-    backend = SPEAK_BACKEND
-
-    if VERBOSE_DEBUG:
-        logger.debug(f"\n{'='*80}")
-        logger.debug("[SPEAK] Full prompt being sent to LLM:")
-        logger.debug(f"{'='*80}")
-        logger.debug(prompt)
-        logger.debug(f"{'='*80}")
-        logger.debug(f"Backend: {backend}, Temperature: {SPEAK_TEMPERATURE}")
-        logger.debug(f"{'='*80}\n")
-
-    try:
-        lyra_output = await call_llm(
-            prompt,
-            backend=backend,
-            temperature=SPEAK_TEMPERATURE,
-        )
-
-        if VERBOSE_DEBUG:
-            logger.debug(f"\n{'='*80}")
-            logger.debug("[SPEAK] LLM Response received:")
-            logger.debug(f"{'='*80}")
-            logger.debug(lyra_output)
-            logger.debug(f"{'='*80}\n")
-
-        if lyra_output:
-            return lyra_output.strip()
-
-        if VERBOSE_DEBUG:
-            logger.debug("[SPEAK] Empty response, returning neutral answer")
-
-        return final_answer
-
-    except Exception as e:
-        # Hard fallback: return neutral answer instead of dying
-        logger.error(f"[speak.py] Persona backend '{backend}' failed: {e}")
-
-        if VERBOSE_DEBUG:
-            logger.debug("[SPEAK] Falling back to neutral answer due to error")
-
-        return final_answer
@@ -1,32 +0,0 @@
-import os, requests
-from typing import Dict, Any, List
-
-RAG_API_URL = os.getenv("RAG_API_URL", "http://localhost:7090")
-
-def query_rag(query: str, where: Dict[str, Any] | None = None, k: int = 6) -> Dict[str, Any]:
-    payload = {"query": query, "k": k}
-    if where:
-        payload["where"] = where
-    try:
-        r = requests.post(f"{RAG_API_URL}/rag/search", json=payload, timeout=8)
-        r.raise_for_status()
-        data = r.json() or {}
-    except Exception as e:
-        data = {"answer": "", "chunks": [], "error": str(e)}
-    return data
-
-def format_rag_block(result: Dict[str, Any]) -> str:
-    answer = (result.get("answer") or "").strip()
-    chunks: List[Dict[str, Any]] = result.get("chunks") or []
-    lines = ["[RAG]"]
-    if answer:
-        lines.append(f"Synthesized answer: {answer}")
-    if chunks:
-        lines.append("Top excerpts:")
-        for i, c in enumerate(chunks[:5], 1):
-            src = c.get("metadata", {}).get("source", "unknown")
-            txt = (c.get("text") or "").strip().replace("\n", " ")
-            if len(txt) > 220:
-                txt = txt[:220] + "…"
-            lines.append(f"  {i}. {txt}  — {src}")
-    return "\n".join(lines) + ("\n" if lines else "")
@@ -1 +0,0 @@
-# Reasoning module - multi-stage reasoning pipeline
@@ -1,253 +0,0 @@
-# reasoning.py
-import os
-import json
-import logging
-from llm.llm_router import call_llm
-
-
-# ============================================================
-# Select which backend this module should use
-# ============================================================
-CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
-GLOBAL_TEMP = float(os.getenv("LLM_TEMPERATURE", "0.7"))
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-
-# Logger
-logger = logging.getLogger(__name__)
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-
-    # Console handler
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter(
-        '%(asctime)s [REASONING] %(levelname)s: %(message)s',
-        datefmt='%H:%M:%S'
-    ))
-    logger.addHandler(console_handler)
-
-    # File handler
-    try:
-        os.makedirs('/app/logs', exist_ok=True)
-        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
-        file_handler.setFormatter(logging.Formatter(
-            '%(asctime)s [REASONING] %(levelname)s: %(message)s',
-            datefmt='%Y-%m-%d %H:%M:%S'
-        ))
-        logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for reasoning.py - logging to file")
-    except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for reasoning.py - file logging failed: {e}")
-
-
-async def reason_check(
-    user_prompt: str,
-    identity_block: dict | None,
-    rag_block: dict | None,
-    reflection_notes: list[str],
-    context: dict | None = None,
-    monologue: dict | None = None,  # NEW: Inner monologue guidance
-    executive_plan: dict | None = None  # NEW: Executive plan for complex tasks
-) -> str:
-    """
-    Build the *draft answer* for Lyra Cortex.
-    This is the first-pass reasoning stage (no refinement yet).
-
-    Args:
-        user_prompt: Current user message
-        identity_block: Lyra's identity/persona configuration
-        rag_block: Relevant long-term memories from NeoMem
-        reflection_notes: Meta-awareness notes from reflection stage
-        context: Unified context state from context.py (session state, intake, rag, etc.)
-        monologue: Inner monologue analysis (intent, tone, depth, consult_executive)
-        executive_plan: Executive plan for complex queries (steps, tools, strategy)
-    """
-
-    # --------------------------------------------------------
-    # Build Reflection Notes block
-    # --------------------------------------------------------
-    notes_section = ""
-    if reflection_notes:
-        notes_section = "Reflection Notes (internal, never show to user):\n"
-        for note in reflection_notes:
-            notes_section += f"- {note}\n"
-        notes_section += "\n"
-
-    # --------------------------------------------------------
-    # Identity block (constraints, boundaries, rules)
-    # --------------------------------------------------------
-    identity_txt = ""
-    if identity_block:
-        try:
-            identity_txt = f"Identity Rules:\n{identity_block}\n\n"
-        except Exception:
-            identity_txt = f"Identity Rules:\n{str(identity_block)}\n\n"
-
-    # --------------------------------------------------------
-    # Inner Monologue guidance (NEW)
-    # --------------------------------------------------------
-    monologue_section = ""
-    if monologue:
-        intent = monologue.get("intent", "unknown")
-        tone_desired = monologue.get("tone", "neutral")
-        depth_desired = monologue.get("depth", "medium")
-
-        monologue_section = f"""
-=== INNER MONOLOGUE GUIDANCE ===
-User Intent Detected: {intent}
-Desired Tone: {tone_desired}
-Desired Response Depth: {depth_desired}
-
-Adjust your response accordingly:
- Focus on addressing the {intent} intent
- Aim for {depth_desired} depth (short/medium/deep)
- The persona layer will handle {tone_desired} tone, focus on content
-
-"""
-
-    # --------------------------------------------------------
-    # Executive Plan (NEW)
-    # --------------------------------------------------------
-    plan_section = ""
-    if executive_plan:
-        plan_section = f"""
-=== EXECUTIVE PLAN ===
-Task Complexity: {executive_plan.get('estimated_complexity', 'unknown')}
-Plan Summary: {executive_plan.get('summary', 'No summary')}
-
-Detailed Plan:
-{executive_plan.get('plan_text', 'No detailed plan available')}
-
-Required Steps:
-"""
-        for idx, step in enumerate(executive_plan.get('steps', []), 1):
-            plan_section += f"{idx}. {step}\n"
-
-        tools_needed = executive_plan.get('tools_needed', [])
-        if tools_needed:
-            plan_section += f"\nTools to leverage: {', '.join(tools_needed)}\n"
-
-        plan_section += "\nFollow this plan while generating your response.\n\n"
-
-    # --------------------------------------------------------
-    # RAG block (optional factual grounding)
-    # --------------------------------------------------------
-    rag_txt = ""
-    if rag_block:
-        try:
-            # Format NeoMem results with full structure
-            if isinstance(rag_block, list) and rag_block:
-                rag_txt = "Relevant Long-Term Memories (NeoMem):\n"
-                for idx, mem in enumerate(rag_block, 1):
-                    score = mem.get("score", 0.0)
-                    payload = mem.get("payload", {})
-                    data = payload.get("data", "")
-                    metadata = payload.get("metadata", {})
-
-                    rag_txt += f"\n[Memory {idx}] (relevance: {score:.2f})\n"
-                    rag_txt += f"Content: {data}\n"
-                    if metadata:
-                        rag_txt += f"Metadata: {json.dumps(metadata, indent=2)}\n"
-                rag_txt += "\n"
-            else:
-                rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
-        except Exception:
-            rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
-
-    # --------------------------------------------------------
-    # Context State (session continuity, timing, mode/mood)
-    # --------------------------------------------------------
-    context_txt = ""
-    if context:
-        try:
-            # Build human-readable context summary
-            context_txt = "=== CONTEXT STATE ===\n"
-            context_txt += f"Session: {context.get('session_id', 'unknown')}\n"
-            context_txt += f"Time since last message: {context.get('minutes_since_last_msg', 0):.1f} minutes\n"
-            context_txt += f"Message count: {context.get('message_count', 0)}\n"
-            context_txt += f"Mode: {context.get('mode', 'default')}\n"
-            context_txt += f"Mood: {context.get('mood', 'neutral')}\n"
-
-            if context.get('active_project'):
-                context_txt += f"Active project: {context['active_project']}\n"
-
-            # Include Intake multilevel summaries
-            intake = context.get('intake', {})
-            if intake:
-                context_txt += "\nShort-Term Memory (Intake):\n"
-
-                # L1 - Recent exchanges
-                if intake.get('L1'):
-                    l1_data = intake['L1']
-                    if isinstance(l1_data, list):
-                        context_txt += f"  L1 (recent): {len(l1_data)} exchanges\n"
-                    elif isinstance(l1_data, str):
-                        context_txt += f"  L1: {l1_data[:200]}...\n"
-
-                # L20 - Session overview (most important for continuity)
-                if intake.get('L20'):
-                    l20_data = intake['L20']
-                    if isinstance(l20_data, dict):
-                        summary = l20_data.get('summary', '')
-                        context_txt += f"  L20 (session overview): {summary}\n"
-                    elif isinstance(l20_data, str):
-                        context_txt += f"  L20: {l20_data}\n"
-
-                # L30 - Continuity report
-                if intake.get('L30'):
-                    l30_data = intake['L30']
-                    if isinstance(l30_data, dict):
-                        summary = l30_data.get('summary', '')
-                        context_txt += f"  L30 (continuity): {summary}\n"
-                    elif isinstance(l30_data, str):
-                        context_txt += f"  L30: {l30_data}\n"
-
-            context_txt += "\n"
-
-        except Exception as e:
-            # Fallback to JSON dump if formatting fails
-            context_txt = f"=== CONTEXT STATE ===\n{json.dumps(context, indent=2)}\n\n"
-
-    # --------------------------------------------------------
-    # Final assembled prompt
-    # --------------------------------------------------------
-    prompt = (
-        f"{notes_section}"
-        f"{identity_txt}"
-        f"{monologue_section}"  # NEW: Intent/tone/depth guidance
-        f"{plan_section}"  # NEW: Executive plan if generated
-        f"{context_txt}"  # Context BEFORE RAG for better coherence
-        f"{rag_txt}"
-        f"User message:\n{user_prompt}\n\n"
-        "Write the best possible *internal draft answer*.\n"
-        "This draft is NOT shown to the user.\n"
-        "Be factual, concise, and focused.\n"
-        "Use the context state to maintain continuity and reference past interactions naturally.\n"
-    )
-
-    # --------------------------------------------------------
-    # Call the LLM using the module-specific backend
-    # --------------------------------------------------------
-    if VERBOSE_DEBUG:
-        logger.debug(f"\n{'='*80}")
-        logger.debug("[REASONING] Full prompt being sent to LLM:")
-        logger.debug(f"{'='*80}")
-        logger.debug(prompt)
-        logger.debug(f"{'='*80}")
-        logger.debug(f"Backend: {CORTEX_LLM}, Temperature: {GLOBAL_TEMP}")
-        logger.debug(f"{'='*80}\n")
-
-    draft = await call_llm(
-        prompt,
-        backend=CORTEX_LLM,
-        temperature=GLOBAL_TEMP,
-    )
-
-    if VERBOSE_DEBUG:
-        logger.debug(f"\n{'='*80}")
-        logger.debug("[REASONING] LLM Response received:")
-        logger.debug(f"{'='*80}")
-        logger.debug(draft)
-        logger.debug(f"{'='*80}\n")
-
-    return draft
@@ -1,170 +0,0 @@
-# refine.py
-import os
-import json
-import logging
-from typing import Any, Dict, Optional
-
-from llm.llm_router import call_llm
-
-logger = logging.getLogger(__name__)
-
-# ===============================================
-# Configuration
-# ===============================================
-
-REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
-REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
-REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-
-# These come from root .env
-REFINE_LLM = os.getenv("REFINE_LLM", "").upper()
-CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-
-    # Console handler
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter(
-        '%(asctime)s [REFINE] %(levelname)s: %(message)s',
-        datefmt='%H:%M:%S'
-    ))
-    logger.addHandler(console_handler)
-
-    # File handler
-    try:
-        os.makedirs('/app/logs', exist_ok=True)
-        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
-        file_handler.setFormatter(logging.Formatter(
-            '%(asctime)s [REFINE] %(levelname)s: %(message)s',
-            datefmt='%Y-%m-%d %H:%M:%S'
-        ))
-        logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for refine.py - logging to file")
-    except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for refine.py - file logging failed: {e}")
-
-
-# ===============================================
-# Prompt builder
-# ===============================================
-
-def build_refine_prompt(
-    draft_output: str,
-    reflection_notes: Optional[Any],
-    identity_block: Optional[str],
-    rag_block: Optional[str],
-) -> str:
-
-    try:
-        reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
-    except Exception:
-        reflection_text = str(reflection_notes)
-
-    identity_text = identity_block or "(none)"
-    rag_text = rag_block or "(none)"
-
-    return f"""
-You are Lyra Cortex's internal refiner.
-
-Your job:
- Fix factual issues.
- Improve clarity.
- Apply reflection notes when helpful.
- Respect identity constraints.
- Apply RAG context as truth source.
-
-Do NOT mention RAG, reflection, internal logic, or this refinement step.
-
------------------------------
-[IDENTITY BLOCK]
-{identity_text}
-
------------------------------
-[RAG CONTEXT]
-{rag_text}
-
------------------------------
-[DRAFT ANSWER]
-{draft_output}
-
------------------------------
-[REFLECTION NOTES]
-{reflection_text}
-
------------------------------
-Task:
-Rewrite the DRAFT into a single final answer for the user.
-Return ONLY the final answer text.
-""".strip()
-
-
-# ===============================================
-# Public API — now async & fully router-based
-# ===============================================
-
-async def refine_answer(
-    draft_output: str,
-    reflection_notes: Optional[Any],
-    identity_block: Optional[str],
-    rag_block: Optional[str],
-) -> Dict[str, Any]:
-
-    if not draft_output:
-        return {
-            "final_output": "",
-            "used_backend": None,
-            "fallback_used": False,
-        }
-
-    prompt = build_refine_prompt(
-        draft_output,
-        reflection_notes,
-        identity_block,
-        rag_block,
-    )
-
-    # backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY
-    backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"
-
-    if VERBOSE_DEBUG:
-        logger.debug(f"\n{'='*80}")
-        logger.debug("[REFINE] Full prompt being sent to LLM:")
-        logger.debug(f"{'='*80}")
-        logger.debug(prompt)
-        logger.debug(f"{'='*80}")
-        logger.debug(f"Backend: {backend}, Temperature: {REFINER_TEMPERATURE}")
-        logger.debug(f"{'='*80}\n")
-
-    try:
-        refined = await call_llm(
-            prompt,
-            backend=backend,
-            temperature=REFINER_TEMPERATURE,
-        )
-
-        if VERBOSE_DEBUG:
-            logger.debug(f"\n{'='*80}")
-            logger.debug("[REFINE] LLM Response received:")
-            logger.debug(f"{'='*80}")
-            logger.debug(refined)
-            logger.debug(f"{'='*80}\n")
-
-        return {
-            "final_output": refined.strip() if refined else draft_output,
-            "used_backend": backend,
-            "fallback_used": False,
-        }
-
-    except Exception as e:
-        logger.error(f"refine.py backend {backend} failed: {e}")
-
-        if VERBOSE_DEBUG:
-            logger.debug("[REFINE] Falling back to draft output due to error")
-
-        return {
-            "final_output": draft_output,
-            "used_backend": backend,
-            "fallback_used": True,
-        }
@@ -1,124 +0,0 @@
-# reflection.py
-import json
-import os
-import re
-import logging
-from llm.llm_router import call_llm
-
-# Logger
-VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-logger = logging.getLogger(__name__)
-
-if VERBOSE_DEBUG:
-    logger.setLevel(logging.DEBUG)
-
-    # Console handler
-    console_handler = logging.StreamHandler()
-    console_handler.setFormatter(logging.Formatter(
-        '%(asctime)s [REFLECTION] %(levelname)s: %(message)s',
-        datefmt='%H:%M:%S'
-    ))
-    logger.addHandler(console_handler)
-
-    # File handler
-    try:
-        os.makedirs('/app/logs', exist_ok=True)
-        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
-        file_handler.setFormatter(logging.Formatter(
-            '%(asctime)s [REFLECTION] %(levelname)s: %(message)s',
-            datefmt='%Y-%m-%d %H:%M:%S'
-        ))
-        logger.addHandler(file_handler)
-        logger.debug("VERBOSE_DEBUG mode enabled for reflection.py - logging to file")
-    except Exception as e:
-        logger.debug(f"VERBOSE_DEBUG mode enabled for reflection.py - file logging failed: {e}")
-
-
-async def reflect_notes(intake_summary: str, identity_block: dict | None) -> dict:
-    """
-    Produce short internal reflection notes for Cortex.
-    These are NOT shown to the user.
-    """
-
-    # -----------------------------
-    # Build the prompt
-    # -----------------------------
-    identity_text = ""
-    if identity_block:
-        identity_text = f"Identity:\n{identity_block}\n\n"
-
-    prompt = (
-        f"{identity_text}"
-        f"Recent summary:\n{intake_summary}\n\n"
-        "You are Lyra's meta-awareness layer. Your job is to produce short, directive "
-        "internal notes that guide Lyra’s reasoning engine. These notes are NEVER "
-        "shown to the user.\n\n"
-        "Rules for output:\n"
-        "1. Return ONLY valid JSON.\n"
-        "2. JSON must have exactly one key: \"notes\".\n"
-        "3. \"notes\" must be a list of 3 to 6 short strings.\n"
-        "4. Notes must be actionable (e.g., \"keep it concise\", \"maintain context\").\n"
-        "5. No markdown, no apologies, no explanations.\n\n"
-        "Return JSON:\n"
-        "{ \"notes\": [\"...\"] }\n"
-    )
-
-    # -----------------------------
-    # Module-specific backend choice
-    # -----------------------------
-    reflection_backend = os.getenv("REFLECTION_LLM")
-    cortex_backend = os.getenv("CORTEX_LLM", "PRIMARY").upper()
-
-    # Reflection uses its own backend if set, otherwise cortex backend
-    backend = (reflection_backend or cortex_backend).upper()
-
-    # -----------------------------
-    # Call the selected LLM backend
-    # -----------------------------
-    if VERBOSE_DEBUG:
-        logger.debug(f"\n{'='*80}")
-        logger.debug("[REFLECTION] Full prompt being sent to LLM:")
-        logger.debug(f"{'='*80}")
-        logger.debug(prompt)
-        logger.debug(f"{'='*80}")
-        logger.debug(f"Backend: {backend}")
-        logger.debug(f"{'='*80}\n")
-
-    raw = await call_llm(prompt, backend=backend)
-
-    if VERBOSE_DEBUG:
-        logger.debug(f"\n{'='*80}")
-        logger.debug("[REFLECTION] LLM Response received:")
-        logger.debug(f"{'='*80}")
-        logger.debug(raw)
-        logger.debug(f"{'='*80}\n")
-
-    # -----------------------------
-    # Try direct JSON
-    # -----------------------------
-    try:
-        parsed = json.loads(raw.strip())
-        if isinstance(parsed, dict) and "notes" in parsed:
-            if VERBOSE_DEBUG:
-                logger.debug(f"[REFLECTION] Parsed {len(parsed['notes'])} notes from JSON")
-            return parsed
-    except:
-        if VERBOSE_DEBUG:
-            logger.debug("[REFLECTION] Direct JSON parsing failed, trying extraction...")
-
-    # -----------------------------
-    # Try JSON extraction
-    # -----------------------------
-    try:
-        match = re.search(r"\{.*?\}", raw, re.S)
-        if match:
-            parsed = json.loads(match.group(0))
-            if isinstance(parsed, dict) and "notes" in parsed:
-                return parsed
-    except:
-        pass
-
-    # -----------------------------
-    # Fallback — treat raw text as a single note
-    # -----------------------------
-    return {"notes": [raw.strip()]}
@@ -1,10 +0,0 @@
-fastapi==0.115.8
-uvicorn==0.34.0
-python-dotenv==1.0.1
-requests==2.32.3
-httpx==0.27.2
-pydantic==2.10.4
-duckduckgo-search==6.3.5
-aiohttp==3.9.1
-tenacity==9.0.0
-docker==7.1.0
@@ -1,559 +0,0 @@
-# router.py
-
-import os
-import logging
-import asyncio
-from fastapi import APIRouter
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-
-from reasoning.reasoning import reason_check
-from reasoning.reflection import reflect_notes
-from reasoning.refine import refine_answer
-from persona.speak import speak
-from persona.identity import load_identity
-from context import collect_context, update_last_assistant_message
-from intake.intake import add_exchange_internal
-
-from autonomy.monologue.monologue import InnerMonologue
-from autonomy.self.state import load_self_state
-from autonomy.tools.stream_events import get_stream_manager
-
-
-# -------------------------------------------------------------------
-# Setup
-# -------------------------------------------------------------------
-LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
-logger = logging.getLogger(__name__)
-
-# Always set up basic logging
-logger.setLevel(logging.INFO)
-console_handler = logging.StreamHandler()
-console_handler.setFormatter(logging.Formatter(
-    '%(asctime)s [ROUTER] %(levelname)s: %(message)s',
-    datefmt='%H:%M:%S'
-))
-logger.addHandler(console_handler)
-
-
-cortex_router = APIRouter()
-inner_monologue = InnerMonologue()
-
-
-# -------------------------------------------------------------------
-# Models
-# -------------------------------------------------------------------
-class ReasonRequest(BaseModel):
-    session_id: str
-    user_prompt: str
-    temperature: float | None = None
-    backend: str | None = None
-
-
-# -------------------------------------------------------------------
-# /reason endpoint
-# -------------------------------------------------------------------
-@cortex_router.post("/reason")
-async def run_reason(req: ReasonRequest):
-    from datetime import datetime
-    pipeline_start = datetime.now()
-    stage_timings = {}
-
-    # Show pipeline start in detailed/verbose mode
-    if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
-        logger.info(f"\n{'='*100}")
-        logger.info(f"🚀 PIPELINE START | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
-        logger.info(f"{'='*100}")
-        logger.info(f"📝 User: {req.user_prompt[:150]}...")
-        logger.info(f"{'-'*100}\n")
-
-    # ----------------------------------------------------------------
-    # STAGE 0 — Context
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-    context_state = await collect_context(req.session_id, req.user_prompt)
-    stage_timings["context"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 0.5 — Identity
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-    identity_block = load_identity(req.session_id)
-    stage_timings["identity"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 0.6 — Inner Monologue (observer-only)
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-
-    inner_result = None
-    try:
-        self_state = load_self_state()
-
-        mono_context = {
-            "user_message": req.user_prompt,
-            "session_id": req.session_id,
-            "self_state": self_state,
-            "context_summary": context_state,
-        }
-
-        inner_result = await inner_monologue.process(mono_context)
-        logger.info(f"🧠 Monologue | {inner_result.get('intent', 'unknown')} | Tone: {inner_result.get('tone', 'neutral')}")
-
-        # Store in context for downstream use
-        context_state["monologue"] = inner_result
-
-    except Exception as e:
-        logger.warning(f"⚠️  Monologue failed: {e}")
-
-    stage_timings["monologue"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 0.7 — Executive Planning (conditional)
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-    executive_plan = None
-    if inner_result and inner_result.get("consult_executive"):
-
-        try:
-            from autonomy.executive.planner import plan_execution
-            executive_plan = await plan_execution(
-                user_prompt=req.user_prompt,
-                intent=inner_result.get("intent", "unknown"),
-                context_state=context_state,
-                identity_block=identity_block
-            )
-            logger.info(f"🎯 Executive plan: {executive_plan.get('summary', 'N/A')[:80]}...")
-        except Exception as e:
-            logger.warning(f"⚠️  Executive planning failed: {e}")
-            executive_plan = None
-
-    stage_timings["executive"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 0.8 — Autonomous Tool Invocation
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-    tool_results = None
-    autonomous_enabled = os.getenv("ENABLE_AUTONOMOUS_TOOLS", "true").lower() == "true"
-    tool_confidence_threshold = float(os.getenv("AUTONOMOUS_TOOL_CONFIDENCE_THRESHOLD", "0.6"))
-
-    if autonomous_enabled and inner_result:
-
-        try:
-            from autonomy.tools.decision_engine import ToolDecisionEngine
-            from autonomy.tools.orchestrator import ToolOrchestrator
-
-            # Analyze which tools to invoke
-            decision_engine = ToolDecisionEngine()
-            tool_decision = await decision_engine.analyze_tool_needs(
-                user_prompt=req.user_prompt,
-                monologue=inner_result,
-                context_state=context_state,
-                available_tools=["RAG", "WEB", "WEATHER", "CODEBRAIN"]
-            )
-
-            # Execute tools if confidence threshold met
-            if tool_decision["should_invoke_tools"] and tool_decision["confidence"] >= tool_confidence_threshold:
-                orchestrator = ToolOrchestrator(tool_timeout=30)
-                tool_results = await orchestrator.execute_tools(
-                    tools_to_invoke=tool_decision["tools_to_invoke"],
-                    context_state=context_state
-                )
-
-                # Format results for context injection
-                tool_context = orchestrator.format_results_for_context(tool_results)
-                context_state["autonomous_tool_results"] = tool_context
-
-                summary = tool_results.get("execution_summary", {})
-                logger.info(f"🛠️  Tools executed: {summary.get('successful', [])} succeeded")
-            else:
-                logger.info(f"🛠️  No tools invoked (confidence: {tool_decision.get('confidence', 0):.2f})")
-
-        except Exception as e:
-            logger.warning(f"⚠️  Autonomous tool invocation failed: {e}")
-            if LOG_DETAIL_LEVEL == "verbose":
-                import traceback
-                traceback.print_exc()
-
-    stage_timings["tools"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 1-5 — Core Reasoning Pipeline
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-
-    # Extract intake summary
-    intake_summary = "(no context available)"
-    if context_state.get("intake"):
-        l20 = context_state["intake"].get("L20")
-        if isinstance(l20, dict):
-            intake_summary = l20.get("summary", intake_summary)
-        elif isinstance(l20, str):
-            intake_summary = l20
-
-    # Reflection
-    try:
-        reflection = await reflect_notes(intake_summary, identity_block=identity_block)
-        reflection_notes = reflection.get("notes", [])
-    except Exception as e:
-        reflection_notes = []
-        logger.warning(f"⚠️  Reflection failed: {e}")
-
-    stage_timings["reflection"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # Reasoning (draft)
-    stage_start = datetime.now()
-    draft = await reason_check(
-        req.user_prompt,
-        identity_block=identity_block,
-        rag_block=context_state.get("rag", []),
-        reflection_notes=reflection_notes,
-        context=context_state,
-        monologue=inner_result,
-        executive_plan=executive_plan
-    )
-    stage_timings["reasoning"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # Refinement
-    stage_start = datetime.now()
-    result = await refine_answer(
-        draft_output=draft,
-        reflection_notes=reflection_notes,
-        identity_block=identity_block,
-        rag_block=context_state.get("rag", []),
-    )
-    final_neutral = result["final_output"]
-    stage_timings["refinement"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # Persona
-    stage_start = datetime.now()
-    tone = inner_result.get("tone", "neutral") if inner_result else "neutral"
-    depth = inner_result.get("depth", "medium") if inner_result else "medium"
-    persona_answer = await speak(final_neutral, tone=tone, depth=depth)
-    stage_timings["persona"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 6 — Session update
-    # ----------------------------------------------------------------
-    update_last_assistant_message(req.session_id, persona_answer)
-
-    # ----------------------------------------------------------------
-    # STAGE 6.5 — Self-state update & Pattern Learning
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-    try:
-        from autonomy.self.analyzer import analyze_and_update_state
-        await analyze_and_update_state(
-            monologue=inner_result or {},
-            user_prompt=req.user_prompt,
-            response=persona_answer,
-            context=context_state
-        )
-    except Exception as e:
-        logger.warning(f"⚠️  Self-state update failed: {e}")
-
-    try:
-        from autonomy.learning.pattern_learner import get_pattern_learner
-        learner = get_pattern_learner()
-        await learner.learn_from_interaction(
-            user_prompt=req.user_prompt,
-            response=persona_answer,
-            monologue=inner_result or {},
-            context=context_state
-        )
-    except Exception as e:
-        logger.warning(f"⚠️  Pattern learning failed: {e}")
-
-    stage_timings["learning"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # STAGE 7 — Proactive Monitoring & Suggestions
-    # ----------------------------------------------------------------
-    stage_start = datetime.now()
-    proactive_enabled = os.getenv("ENABLE_PROACTIVE_MONITORING", "true").lower() == "true"
-    proactive_min_priority = float(os.getenv("PROACTIVE_SUGGESTION_MIN_PRIORITY", "0.6"))
-
-    if proactive_enabled:
-        try:
-            from autonomy.proactive.monitor import get_proactive_monitor
-
-            monitor = get_proactive_monitor(min_priority=proactive_min_priority)
-            self_state = load_self_state()
-
-            suggestion = await monitor.analyze_session(
-                session_id=req.session_id,
-                context_state=context_state,
-                self_state=self_state
-            )
-
-            if suggestion:
-                suggestion_text = monitor.format_suggestion(suggestion)
-                persona_answer += suggestion_text
-                logger.info(f"💡 Proactive suggestion: {suggestion['type']} (priority: {suggestion['priority']:.2f})")
-
-        except Exception as e:
-            logger.warning(f"⚠️  Proactive monitoring failed: {e}")
-
-    stage_timings["proactive"] = (datetime.now() - stage_start).total_seconds() * 1000
-
-    # ----------------------------------------------------------------
-    # PIPELINE COMPLETE — Summary
-    # ----------------------------------------------------------------
-    total_duration = (datetime.now() - pipeline_start).total_seconds() * 1000
-
-    # Always show pipeline completion
-    logger.info(f"\n{'='*100}")
-    logger.info(f"✨ PIPELINE COMPLETE | Session: {req.session_id} | Total: {total_duration:.0f}ms")
-    logger.info(f"{'='*100}")
-
-    # Show timing breakdown in detailed/verbose mode
-    if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
-        logger.info("⏱️  Stage Timings:")
-        for stage, duration in stage_timings.items():
-            pct = (duration / total_duration) * 100 if total_duration > 0 else 0
-            logger.info(f"   {stage:15s}: {duration:6.0f}ms ({pct:5.1f}%)")
-
-    logger.info(f"📤 Output: {len(persona_answer)} chars")
-    logger.info(f"{'='*100}\n")
-
-    # ----------------------------------------------------------------
-    # RETURN
-    # ----------------------------------------------------------------
-    return {
-        "draft": draft,
-        "neutral": final_neutral,
-        "persona": persona_answer,
-        "reflection": reflection_notes,
-        "session_id": req.session_id,
-        "context_summary": {
-            "rag_results": len(context_state.get("rag", [])),
-            "minutes_since_last": context_state.get("minutes_since_last_msg"),
-            "message_count": context_state.get("message_count"),
-            "mode": context_state.get("mode"),
-        }
-    }
-
-
-# -------------------------------------------------------------------
-# /simple endpoint - Standard chatbot mode (no reasoning pipeline)
-# -------------------------------------------------------------------
-@cortex_router.post("/simple")
-async def run_simple(req: ReasonRequest):
-    """
-    Standard chatbot mode - bypasses all cortex reasoning pipeline.
-    Just a simple conversation loop like a typical chatbot.
-    """
-    from datetime import datetime
-    from llm.llm_router import call_llm
-    from autonomy.tools.function_caller import FunctionCaller
-
-    start_time = datetime.now()
-
-    logger.info(f"\n{'='*100}")
-    logger.info(f"💬 SIMPLE MODE | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
-    logger.info(f"{'='*100}")
-    logger.info(f"📝 User: {req.user_prompt[:150]}...")
-    logger.info(f"{'-'*100}\n")
-
-    # Get conversation history from context and intake buffer
-    context_state = await collect_context(req.session_id, req.user_prompt)
-
-    # Get recent messages from Intake buffer
-    from intake.intake import get_recent_messages
-    recent_msgs = get_recent_messages(req.session_id, limit=20)
-    logger.info(f"📋 Retrieved {len(recent_msgs)} recent messages from Intake buffer")
-
-    # Build simple conversation history with system message
-    system_message = {
-        "role": "system",
-        "content": (
-            "You are a helpful AI assistant. Provide direct, concise responses to the user's questions. "
-            "Maintain context from previous messages in the conversation."
-        )
-    }
-
-    messages = [system_message]
-
-    # Add conversation history
-
-    if recent_msgs:
-        for msg in recent_msgs:
-            messages.append({
-                "role": msg.get("role", "user"),
-                "content": msg.get("content", "")
-            })
-            logger.info(f"  - {msg.get('role')}: {msg.get('content', '')[:50]}...")
-
-    # Add current user message
-    messages.append({
-        "role": "user",
-        "content": req.user_prompt
-    })
-
-    logger.info(f"📨 Total messages being sent to LLM: {len(messages)} (including system message)")
-
-    # Get backend from request, otherwise fall back to env variable
-    backend = req.backend if req.backend else os.getenv("STANDARD_MODE_LLM", "SECONDARY")
-    backend = backend.upper()  # Normalize to uppercase
-    logger.info(f"🔧 Using backend: {backend}")
-
-    temperature = req.temperature if req.temperature is not None else 0.7
-
-    # Check if tools are enabled
-    enable_tools = os.getenv("STANDARD_MODE_ENABLE_TOOLS", "false").lower() == "true"
-
-    # Call LLM with or without tools
-    try:
-        if enable_tools:
-            # Use FunctionCaller for tool-enabled conversation
-            logger.info(f"🛠️  Tool calling enabled for Standard Mode")
-            logger.info(f"🔍 Creating FunctionCaller with backend={backend}, temp={temperature}")
-            function_caller = FunctionCaller(backend, temperature)
-            logger.info(f"🔍 FunctionCaller created, calling call_with_tools...")
-            result = await function_caller.call_with_tools(
-                messages=messages,
-                max_tokens=2048,
-                session_id=req.session_id  # Pass session_id for streaming
-            )
-            logger.info(f"🔍 call_with_tools returned: iterations={result.get('iterations')}, tool_calls={len(result.get('tool_calls', []))}")
-
-            # Log tool usage
-            if result.get("tool_calls"):
-                tool_names = [tc["name"] for tc in result["tool_calls"]]
-                logger.info(f"🔧 Tools used: {', '.join(tool_names)} ({result['iterations']} iterations)")
-
-            response = result["content"].strip()
-        else:
-            # Direct LLM call without tools (original behavior)
-            raw_response = await call_llm(
-                messages=messages,
-                backend=backend,
-                temperature=temperature,
-                max_tokens=2048
-            )
-            response = raw_response.strip()
-
-    except Exception as e:
-        logger.error(f"❌ LLM call failed: {e}")
-        response = f"Error: {str(e)}"
-
-    # Update session with the exchange
-    try:
-        update_last_assistant_message(req.session_id, response)
-        add_exchange_internal({
-            "session_id": req.session_id,
-            "role": "user",
-            "content": req.user_prompt
-        })
-        add_exchange_internal({
-            "session_id": req.session_id,
-            "role": "assistant",
-            "content": response
-        })
-    except Exception as e:
-        logger.warning(f"⚠️  Session update failed: {e}")
-
-    duration = (datetime.now() - start_time).total_seconds() * 1000
-
-    logger.info(f"\n{'='*100}")
-    logger.info(f"✨ SIMPLE MODE COMPLETE | Session: {req.session_id} | Total: {duration:.0f}ms")
-    logger.info(f"📤 Output: {len(response)} chars")
-    logger.info(f"{'='*100}\n")
-
-    return {
-        "draft": response,
-        "neutral": response,
-        "persona": response,
-        "reflection": "",
-        "session_id": req.session_id,
-        "context_summary": {
-            "message_count": len(messages),
-            "mode": "standard"
-        }
-    }
-
-
-# -------------------------------------------------------------------
-# /stream/thinking endpoint - SSE stream for "show your work"
-# -------------------------------------------------------------------
-@cortex_router.get("/stream/thinking/{session_id}")
-async def stream_thinking(session_id: str):
-    """
-    Server-Sent Events stream for tool calling "show your work" feature.
-
-    Streams real-time updates about:
-    - Thinking/planning steps
-    - Tool calls being made
-    - Tool execution results
-    - Final completion
-    """
-    stream_manager = get_stream_manager()
-    queue = stream_manager.subscribe(session_id)
-
-    async def event_generator():
-        try:
-            # Send initial connection message
-            import json
-            connected_event = json.dumps({"type": "connected", "session_id": session_id})
-            yield f"data: {connected_event}\n\n"
-
-            while True:
-                # Wait for events with timeout to send keepalive
-                try:
-                    event = await asyncio.wait_for(queue.get(), timeout=30.0)
-
-                    # Format as SSE
-                    event_data = json.dumps(event)
-                    yield f"data: {event_data}\n\n"
-
-                    # If it's a "done" event, close the stream
-                    if event.get("type") == "done":
-                        break
-
-                except asyncio.TimeoutError:
-                    # Send keepalive comment
-                    yield ": keepalive\n\n"
-
-        except asyncio.CancelledError:
-            logger.info(f"Stream cancelled for session {session_id}")
-        finally:
-            stream_manager.unsubscribe(session_id, queue)
-
-    return StreamingResponse(
-        event_generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no"  # Disable nginx buffering
-        }
-    )
-
-
-# -------------------------------------------------------------------
-# /ingest endpoint (internal)
-# -------------------------------------------------------------------
-class IngestPayload(BaseModel):
-    session_id: str
-    user_msg: str
-    assistant_msg: str
-
-
-@cortex_router.post("/ingest")
-async def ingest(payload: IngestPayload):
-    try:
-        update_last_assistant_message(payload.session_id, payload.assistant_msg)
-    except Exception as e:
-        logger.warning(f"[INGEST] Session update failed: {e}")
-
-    try:
-        add_exchange_internal({
-            "session_id": payload.session_id,
-            "user_msg": payload.user_msg,
-            "assistant_msg": payload.assistant_msg,
-        })
-    except Exception as e:
-        logger.warning(f"[INGEST] Intake update failed: {e}")
-
-    return {"status": "ok", "session_id": payload.session_id}
@@ -1 +0,0 @@
-"""Tests for Project Lyra Cortex."""
@@ -1,197 +0,0 @@
-"""
-Integration tests for Phase 1 autonomy features.
-Tests monologue integration, executive planning, and self-state persistence.
-"""
-
-import asyncio
-import json
-import sys
-import os
-
-# Add parent directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from autonomy.monologue.monologue import InnerMonologue
-from autonomy.self.state import load_self_state, update_self_state, get_self_state_instance
-from autonomy.executive.planner import plan_execution
-
-
-async def test_monologue_integration():
-    """Test monologue generates valid output."""
-    print("\n" + "="*60)
-    print("TEST 1: Monologue Integration")
-    print("="*60)
-
-    mono = InnerMonologue()
-
-    context = {
-        "user_message": "Explain quantum computing to me like I'm 5",
-        "session_id": "test_001",
-        "self_state": load_self_state(),
-        "context_summary": {"message_count": 5}
-    }
-
-    result = await mono.process(context)
-
-    assert "intent" in result, "Missing intent field"
-    assert "tone" in result, "Missing tone field"
-    assert "depth" in result, "Missing depth field"
-    assert "consult_executive" in result, "Missing consult_executive field"
-
-    print("✓ Monologue integration test passed")
-    print(f"  Result: {json.dumps(result, indent=2)}")
-
-    return result
-
-
-async def test_executive_planning():
-    """Test executive planner generates valid plans."""
-    print("\n" + "="*60)
-    print("TEST 2: Executive Planning")
-    print("="*60)
-
-    plan = await plan_execution(
-        user_prompt="Help me build a distributed system with microservices architecture",
-        intent="technical_implementation",
-        context_state={
-            "tools_available": ["RAG", "WEB", "CODEBRAIN"],
-            "message_count": 3,
-            "minutes_since_last_msg": 2.5,
-            "active_project": None
-        },
-        identity_block={}
-    )
-
-    assert "summary" in plan, "Missing summary field"
-    assert "plan_text" in plan, "Missing plan_text field"
-    assert "steps" in plan, "Missing steps field"
-    assert len(plan["steps"]) > 0, "No steps generated"
-
-    print("✓ Executive planning test passed")
-    print(f"  Plan summary: {plan['summary']}")
-    print(f"  Steps: {len(plan['steps'])}")
-    print(f"  Complexity: {plan.get('estimated_complexity', 'unknown')}")
-
-    return plan
-
-
-def test_self_state_persistence():
-    """Test self-state loads and updates."""
-    print("\n" + "="*60)
-    print("TEST 3: Self-State Persistence")
-    print("="*60)
-
-    state1 = load_self_state()
-    assert "mood" in state1, "Missing mood field"
-    assert "energy" in state1, "Missing energy field"
-    assert "interaction_count" in state1, "Missing interaction_count"
-
-    initial_count = state1.get("interaction_count", 0)
-    print(f"  Initial interaction count: {initial_count}")
-
-    update_self_state(
-        mood_delta=0.1,
-        energy_delta=-0.05,
-        new_focus="testing"
-    )
-
-    state2 = load_self_state()
-    assert state2["interaction_count"] == initial_count + 1, "Interaction count not incremented"
-    assert state2["focus"] == "testing", "Focus not updated"
-
-    print("✓ Self-state persistence test passed")
-    print(f"  New interaction count: {state2['interaction_count']}")
-    print(f"  New focus: {state2['focus']}")
-    print(f"  New energy: {state2['energy']:.2f}")
-
-    return state2
-
-
-async def test_end_to_end_flow():
-    """Test complete flow from monologue through planning."""
-    print("\n" + "="*60)
-    print("TEST 4: End-to-End Flow")
-    print("="*60)
-
-    # Step 1: Monologue detects complex query
-    mono = InnerMonologue()
-    mono_result = await mono.process({
-        "user_message": "Design a scalable ML pipeline with CI/CD integration",
-        "session_id": "test_e2e",
-        "self_state": load_self_state(),
-        "context_summary": {}
-    })
-
-    print(f"  Monologue intent: {mono_result.get('intent')}")
-    print(f"  Consult executive: {mono_result.get('consult_executive')}")
-
-    # Step 2: If executive requested, generate plan
-    if mono_result.get("consult_executive"):
-        plan = await plan_execution(
-            user_prompt="Design a scalable ML pipeline with CI/CD integration",
-            intent=mono_result.get("intent", "unknown"),
-            context_state={"tools_available": ["CODEBRAIN", "WEB"]},
-            identity_block={}
-        )
-
-        assert plan is not None, "Plan should be generated"
-        print(f"  Executive plan generated: {len(plan.get('steps', []))} steps")
-
-    # Step 3: Update self-state
-    update_self_state(
-        energy_delta=-0.1,  # Complex task is tiring
-        new_focus="ml_pipeline_design",
-        confidence_delta=0.05
-    )
-
-    state = load_self_state()
-    assert state["focus"] == "ml_pipeline_design", "Focus should be updated"
-
-    print("✓ End-to-end flow test passed")
-    print(f"  Final state: {state['mood']}, energy={state['energy']:.2f}")
-
-    return True
-
-
-async def run_all_tests():
-    """Run all Phase 1 tests."""
-    print("\n" + "="*60)
-    print("PHASE 1 AUTONOMY TESTS")
-    print("="*60)
-
-    try:
-        # Test 1: Monologue
-        mono_result = await test_monologue_integration()
-
-        # Test 2: Executive Planning
-        plan_result = await test_executive_planning()
-
-        # Test 3: Self-State
-        state_result = test_self_state_persistence()
-
-        # Test 4: End-to-End
-        await test_end_to_end_flow()
-
-        print("\n" + "="*60)
-        print("ALL TESTS PASSED ✓")
-        print("="*60)
-
-        print("\nSummary:")
-        print(f"  - Monologue: {mono_result.get('intent')} ({mono_result.get('tone')})")
-        print(f"  - Executive: {plan_result.get('estimated_complexity')} complexity")
-        print(f"  - Self-state: {state_result.get('interaction_count')} interactions")
-
-        return True
-
-    except Exception as e:
-        print("\n" + "="*60)
-        print(f"TEST FAILED: {e}")
-        print("="*60)
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-if __name__ == "__main__":
-    success = asyncio.run(run_all_tests())
-    sys.exit(0 if success else 1)
@@ -1,495 +0,0 @@
-"""
-Integration tests for Phase 2 autonomy features.
-Tests autonomous tool invocation, proactive monitoring, actions, and pattern learning.
-"""
-
-import asyncio
-import json
-import sys
-import os
-
-# Add parent directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# Override self-state file path for testing
-os.environ["SELF_STATE_FILE"] = "/tmp/test_self_state.json"
-
-from autonomy.tools.decision_engine import ToolDecisionEngine
-from autonomy.tools.orchestrator import ToolOrchestrator
-from autonomy.proactive.monitor import ProactiveMonitor
-from autonomy.actions.autonomous_actions import AutonomousActionManager
-from autonomy.learning.pattern_learner import PatternLearner
-from autonomy.self.state import load_self_state, get_self_state_instance
-
-
-async def test_tool_decision_engine():
-    """Test autonomous tool decision making."""
-    print("\n" + "="*60)
-    print("TEST 1: Tool Decision Engine")
-    print("="*60)
-
-    engine = ToolDecisionEngine()
-
-    # Test 1a: Memory reference detection
-    result = await engine.analyze_tool_needs(
-        user_prompt="What did we discuss earlier about Python?",
-        monologue={"intent": "clarification", "consult_executive": False},
-        context_state={},
-        available_tools=["RAG", "WEB", "WEATHER"]
-    )
-
-    assert result["should_invoke_tools"], "Should invoke tools for memory reference"
-    assert any(t["tool"] == "RAG" for t in result["tools_to_invoke"]), "Should recommend RAG"
-    assert result["confidence"] > 0.8, f"Confidence should be high for clear memory reference: {result['confidence']}"
-
-    print(f"  ✓ Memory reference detection passed")
-    print(f"    Tools: {[t['tool'] for t in result['tools_to_invoke']]}")
-    print(f"    Confidence: {result['confidence']:.2f}")
-
-    # Test 1b: Web search detection
-    result = await engine.analyze_tool_needs(
-        user_prompt="What's the latest news about AI developments?",
-        monologue={"intent": "information_seeking", "consult_executive": False},
-        context_state={},
-        available_tools=["RAG", "WEB", "WEATHER"]
-    )
-
-    assert result["should_invoke_tools"], "Should invoke tools for current info request"
-    assert any(t["tool"] == "WEB" for t in result["tools_to_invoke"]), "Should recommend WEB"
-
-    print(f"  ✓ Web search detection passed")
-    print(f"    Tools: {[t['tool'] for t in result['tools_to_invoke']]}")
-
-    # Test 1c: Weather detection
-    result = await engine.analyze_tool_needs(
-        user_prompt="What's the weather like today in Boston?",
-        monologue={"intent": "information_seeking", "consult_executive": False},
-        context_state={},
-        available_tools=["RAG", "WEB", "WEATHER"]
-    )
-
-    assert result["should_invoke_tools"], "Should invoke tools for weather query"
-    assert any(t["tool"] == "WEATHER" for t in result["tools_to_invoke"]), "Should recommend WEATHER"
-
-    print(f"  ✓ Weather detection passed")
-
-    # Test 1d: Proactive RAG for complex queries
-    result = await engine.analyze_tool_needs(
-        user_prompt="Design a microservices architecture",
-        monologue={"intent": "technical_implementation", "consult_executive": True},
-        context_state={},
-        available_tools=["RAG", "WEB", "CODEBRAIN"]
-    )
-
-    assert result["should_invoke_tools"], "Should proactively invoke tools for complex queries"
-    rag_tools = [t for t in result["tools_to_invoke"] if t["tool"] == "RAG"]
-    assert len(rag_tools) > 0, "Should include proactive RAG"
-
-    print(f"  ✓ Proactive RAG detection passed")
-    print(f"    Reason: {rag_tools[0]['reason']}")
-
-    print("\n✓ Tool Decision Engine tests passed\n")
-    return result
-
-
-async def test_tool_orchestrator():
-    """Test tool orchestration (mock mode)."""
-    print("\n" + "="*60)
-    print("TEST 2: Tool Orchestrator (Mock Mode)")
-    print("="*60)
-
-    orchestrator = ToolOrchestrator(tool_timeout=5)
-
-    # Since actual tools may not be available, test the orchestrator structure
-    print(f"  Available tools: {list(orchestrator.available_tools.keys())}")
-
-    # Test with tools_to_invoke (will fail gracefully if tools unavailable)
-    tools_to_invoke = [
-        {"tool": "RAG", "query": "test query", "reason": "testing", "priority": 0.9}
-    ]
-
-    result = await orchestrator.execute_tools(
-        tools_to_invoke=tools_to_invoke,
-        context_state={"session_id": "test"}
-    )
-
-    assert "results" in result, "Should return results dict"
-    assert "execution_summary" in result, "Should return execution summary"
-
-    summary = result["execution_summary"]
-    assert "tools_invoked" in summary, "Summary should include tools_invoked"
-    assert "total_time_ms" in summary, "Summary should include timing"
-
-    print(f"  ✓ Orchestrator structure valid")
-    print(f"    Summary: {summary}")
-
-    # Test result formatting
-    formatted = orchestrator.format_results_for_context(result)
-    assert isinstance(formatted, str), "Should format results as string"
-
-    print(f"  ✓ Result formatting works")
-    print(f"    Formatted length: {len(formatted)} chars")
-
-    print("\n✓ Tool Orchestrator tests passed\n")
-    return result
-
-
-async def test_proactive_monitor():
-    """Test proactive monitoring and suggestions."""
-    print("\n" + "="*60)
-    print("TEST 3: Proactive Monitor")
-    print("="*60)
-
-    monitor = ProactiveMonitor(min_priority=0.6)
-
-    # Test 3a: Long silence detection
-    context_state = {
-        "message_count": 5,
-        "minutes_since_last_msg": 35  # > 30 minutes
-    }
-
-    self_state = load_self_state()
-
-    suggestion = await monitor.analyze_session(
-        session_id="test_silence",
-        context_state=context_state,
-        self_state=self_state
-    )
-
-    assert suggestion is not None, "Should generate suggestion for long silence"
-    assert suggestion["type"] == "check_in", f"Should be check_in type: {suggestion['type']}"
-    assert suggestion["priority"] >= 0.6, "Priority should meet threshold"
-
-    print(f"  ✓ Long silence detection passed")
-    print(f"    Type: {suggestion['type']}, Priority: {suggestion['priority']:.2f}")
-    print(f"    Suggestion: {suggestion['suggestion'][:50]}...")
-
-    # Test 3b: Learning opportunity (high curiosity)
-    self_state["curiosity"] = 0.8
-    self_state["learning_queue"] = ["quantum computing", "rust programming"]
-
-    # Reset cooldown for this test
-    monitor.reset_cooldown("test_learning")
-
-    suggestion = await monitor.analyze_session(
-        session_id="test_learning",
-        context_state={"message_count": 3, "minutes_since_last_msg": 2},
-        self_state=self_state
-    )
-
-    assert suggestion is not None, "Should generate learning suggestion"
-    assert suggestion["type"] == "learning", f"Should be learning type: {suggestion['type']}"
-
-    print(f"  ✓ Learning opportunity detection passed")
-    print(f"    Suggestion: {suggestion['suggestion'][:70]}...")
-
-    # Test 3c: Conversation milestone
-    monitor.reset_cooldown("test_milestone")
-
-    # Reset curiosity to avoid learning suggestion taking precedence
-    self_state["curiosity"] = 0.5
-    self_state["learning_queue"] = []
-
-    suggestion = await monitor.analyze_session(
-        session_id="test_milestone",
-        context_state={"message_count": 50, "minutes_since_last_msg": 1},
-        self_state=self_state
-    )
-
-    assert suggestion is not None, "Should generate milestone suggestion"
-    # Note: learning or summary both valid - check it's a reasonable suggestion
-    assert suggestion["type"] in ["summary", "learning", "check_in"], f"Should be valid type: {suggestion['type']}"
-
-    print(f"  ✓ Conversation milestone detection passed (type: {suggestion['type']})")
-
-    # Test 3d: Cooldown mechanism
-    # Try to get another suggestion immediately (should be blocked)
-    suggestion2 = await monitor.analyze_session(
-        session_id="test_milestone",
-        context_state={"message_count": 51, "minutes_since_last_msg": 1},
-        self_state=self_state
-    )
-
-    assert suggestion2 is None, "Should not generate suggestion during cooldown"
-
-    print(f"  ✓ Cooldown mechanism working")
-
-    # Check stats
-    stats = monitor.get_session_stats("test_milestone")
-    assert stats["cooldown_active"], "Cooldown should be active"
-    print(f"    Cooldown remaining: {stats['cooldown_remaining']}s")
-
-    print("\n✓ Proactive Monitor tests passed\n")
-    return suggestion
-
-
-async def test_autonomous_actions():
-    """Test autonomous action execution."""
-    print("\n" + "="*60)
-    print("TEST 4: Autonomous Actions")
-    print("="*60)
-
-    manager = AutonomousActionManager()
-
-    # Test 4a: List allowed actions
-    allowed = manager.get_allowed_actions()
-    assert "create_memory" in allowed, "Should have create_memory action"
-    assert "update_goal" in allowed, "Should have update_goal action"
-    assert "learn_topic" in allowed, "Should have learn_topic action"
-
-    print(f"  ✓ Allowed actions: {allowed}")
-
-    # Test 4b: Validate actions
-    validation = manager.validate_action("create_memory", {"text": "test memory"})
-    assert validation["valid"], "Should validate correct action"
-
-    print(f"  ✓ Action validation passed")
-
-    # Test 4c: Execute learn_topic action
-    result = await manager.execute_action(
-        action_type="learn_topic",
-        parameters={"topic": "rust programming", "reason": "testing", "priority": 0.8},
-        context={"session_id": "test"}
-    )
-
-    assert result["success"], f"Action should succeed: {result.get('error', 'unknown')}"
-    assert "topic" in result["result"], "Should return topic info"
-
-    print(f"  ✓ learn_topic action executed")
-    print(f"    Topic: {result['result']['topic']}")
-    print(f"    Queue position: {result['result']['queue_position']}")
-
-    # Test 4d: Execute update_focus action
-    result = await manager.execute_action(
-        action_type="update_focus",
-        parameters={"focus": "autonomy_testing", "reason": "running tests"},
-        context={"session_id": "test"}
-    )
-
-    assert result["success"], "update_focus should succeed"
-
-    print(f"  ✓ update_focus action executed")
-    print(f"    New focus: {result['result']['new_focus']}")
-
-    # Test 4e: Reject non-whitelisted action
-    result = await manager.execute_action(
-        action_type="delete_all_files",  # NOT in whitelist
-        parameters={},
-        context={"session_id": "test"}
-    )
-
-    assert not result["success"], "Should reject non-whitelisted action"
-    assert "not in whitelist" in result["error"], "Should indicate whitelist violation"
-
-    print(f"  ✓ Non-whitelisted action rejected")
-
-    # Test 4f: Action log
-    log = manager.get_action_log(limit=10)
-    assert len(log) >= 2, f"Should have logged multiple actions (got {len(log)})"
-
-    print(f"  ✓ Action log contains {len(log)} entries")
-
-    print("\n✓ Autonomous Actions tests passed\n")
-    return result
-
-
-async def test_pattern_learner():
-    """Test pattern learning system."""
-    print("\n" + "="*60)
-    print("TEST 5: Pattern Learner")
-    print("="*60)
-
-    # Use temp file for testing
-    test_file = "/tmp/test_patterns.json"
-    learner = PatternLearner(patterns_file=test_file)
-
-    # Test 5a: Learn from multiple interactions
-    for i in range(5):
-        await learner.learn_from_interaction(
-            user_prompt=f"Help me with Python coding task {i}",
-            response=f"Here's help with task {i}...",
-            monologue={"intent": "coding_help", "tone": "focused", "depth": "medium"},
-            context={"session_id": "test", "executive_plan": None}
-        )
-
-    print(f"  ✓ Learned from 5 interactions")
-
-    # Test 5b: Get top topics
-    top_topics = learner.get_top_topics(limit=5)
-    assert len(top_topics) > 0, "Should have learned topics"
-    assert "coding_help" == top_topics[0][0], "coding_help should be top topic"
-
-    print(f"  ✓ Top topics: {[t[0] for t in top_topics[:3]]}")
-
-    # Test 5c: Get preferred tone
-    preferred_tone = learner.get_preferred_tone()
-    assert preferred_tone == "focused", "Should detect focused as preferred tone"
-
-    print(f"  ✓ Preferred tone: {preferred_tone}")
-
-    # Test 5d: Get preferred depth
-    preferred_depth = learner.get_preferred_depth()
-    assert preferred_depth == "medium", "Should detect medium as preferred depth"
-
-    print(f"  ✓ Preferred depth: {preferred_depth}")
-
-    # Test 5e: Get insights
-    insights = learner.get_insights()
-    assert insights["total_interactions"] == 5, "Should track interaction count"
-    assert insights["preferred_tone"] == "focused", "Insights should include tone"
-
-    print(f"  ✓ Insights generated:")
-    print(f"    Total interactions: {insights['total_interactions']}")
-    print(f"    Recommendations: {insights['learning_recommendations']}")
-
-    # Test 5f: Export patterns
-    exported = learner.export_patterns()
-    assert "topic_frequencies" in exported, "Should export all patterns"
-
-    print(f"  ✓ Patterns exported ({len(exported)} keys)")
-
-    # Cleanup
-    if os.path.exists(test_file):
-        os.remove(test_file)
-
-    print("\n✓ Pattern Learner tests passed\n")
-    return insights
-
-
-async def test_end_to_end_autonomy():
-    """Test complete autonomous flow."""
-    print("\n" + "="*60)
-    print("TEST 6: End-to-End Autonomy Flow")
-    print("="*60)
-
-    # Simulate a complex user query that triggers multiple autonomous systems
-    user_prompt = "Remember what we discussed about machine learning? I need current research on transformers."
-
-    monologue = {
-        "intent": "technical_research",
-        "tone": "focused",
-        "depth": "deep",
-        "consult_executive": True
-    }
-
-    context_state = {
-        "session_id": "e2e_test",
-        "message_count": 15,
-        "minutes_since_last_msg": 5
-    }
-
-    print(f"  User prompt: {user_prompt}")
-    print(f"  Monologue intent: {monologue['intent']}")
-
-    # Step 1: Tool decision engine
-    engine = ToolDecisionEngine()
-    tool_decision = await engine.analyze_tool_needs(
-        user_prompt=user_prompt,
-        monologue=monologue,
-        context_state=context_state,
-        available_tools=["RAG", "WEB", "CODEBRAIN"]
-    )
-
-    print(f"\n  Step 1: Tool Decision")
-    print(f"    Should invoke: {tool_decision['should_invoke_tools']}")
-    print(f"    Tools: {[t['tool'] for t in tool_decision['tools_to_invoke']]}")
-    assert tool_decision["should_invoke_tools"], "Should invoke tools"
-    assert len(tool_decision["tools_to_invoke"]) >= 2, "Should recommend multiple tools (RAG + WEB)"
-
-    # Step 2: Pattern learning
-    learner = PatternLearner(patterns_file="/tmp/e2e_test_patterns.json")
-    await learner.learn_from_interaction(
-        user_prompt=user_prompt,
-        response="Here's information about transformers...",
-        monologue=monologue,
-        context=context_state
-    )
-
-    print(f"\n  Step 2: Pattern Learning")
-    top_topics = learner.get_top_topics(limit=3)
-    print(f"    Learned topics: {[t[0] for t in top_topics]}")
-
-    # Step 3: Autonomous action
-    action_manager = AutonomousActionManager()
-    action_result = await action_manager.execute_action(
-        action_type="learn_topic",
-        parameters={"topic": "transformer architectures", "reason": "user interest detected"},
-        context=context_state
-    )
-
-    print(f"\n  Step 3: Autonomous Action")
-    print(f"    Action: learn_topic")
-    print(f"    Success: {action_result['success']}")
-
-    # Step 4: Proactive monitoring (won't trigger due to low message count)
-    monitor = ProactiveMonitor(min_priority=0.6)
-    monitor.reset_cooldown("e2e_test")
-
-    suggestion = await monitor.analyze_session(
-        session_id="e2e_test",
-        context_state=context_state,
-        self_state=load_self_state()
-    )
-
-    print(f"\n  Step 4: Proactive Monitoring")
-    print(f"    Suggestion: {suggestion['type'] if suggestion else 'None (expected for low message count)'}")
-
-    # Cleanup
-    if os.path.exists("/tmp/e2e_test_patterns.json"):
-        os.remove("/tmp/e2e_test_patterns.json")
-
-    print("\n✓ End-to-End Autonomy Flow tests passed\n")
-    return True
-
-
-async def run_all_tests():
-    """Run all Phase 2 tests."""
-    print("\n" + "="*60)
-    print("PHASE 2 AUTONOMY TESTS")
-    print("="*60)
-
-    try:
-        # Test 1: Tool Decision Engine
-        await test_tool_decision_engine()
-
-        # Test 2: Tool Orchestrator
-        await test_tool_orchestrator()
-
-        # Test 3: Proactive Monitor
-        await test_proactive_monitor()
-
-        # Test 4: Autonomous Actions
-        await test_autonomous_actions()
-
-        # Test 5: Pattern Learner
-        await test_pattern_learner()
-
-        # Test 6: End-to-End
-        await test_end_to_end_autonomy()
-
-        print("\n" + "="*60)
-        print("ALL PHASE 2 TESTS PASSED ✓")
-        print("="*60)
-
-        print("\nPhase 2 Features Validated:")
-        print("  ✓ Autonomous tool decision making")
-        print("  ✓ Tool orchestration and execution")
-        print("  ✓ Proactive monitoring and suggestions")
-        print("  ✓ Safe autonomous actions")
-        print("  ✓ Pattern learning and adaptation")
-        print("  ✓ End-to-end autonomous flow")
-
-        return True
-
-    except Exception as e:
-        print("\n" + "="*60)
-        print(f"TEST FAILED: {e}")
-        print("="*60)
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-if __name__ == "__main__":
-    success = asyncio.run(run_all_tests())
-    sys.exit(0 if success else 1)
@@ -1 +0,0 @@
-# Utilities module
@@ -1,33 +0,0 @@
-import os, json, datetime
-
-# optional daily rotation
-LOG_PATH = os.getenv("REFLECTION_NOTE_PATH") or \
-           f"/app/logs/reflections_{datetime.date.today():%Y%m%d}.log"
-
-def log_reflection(reflection: dict, user_prompt: str, draft: str, final: str, session_id: str | None = None):
-    """Append a reflection entry to the reflections log."""
-    try:
-        # 1️⃣ Make sure log directory exists
-        os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
-
-        # 2️⃣ Ensure session_id is stored
-        reflection["session_id"] = session_id or reflection.get("session_id", "unknown")
-
-        # 3️⃣ Build JSON entry
-        entry = {
-            "timestamp": datetime.datetime.now().isoformat(),
-            "session_id": reflection["session_id"],
-            "prompt": user_prompt,
-            "draft_output": draft[:500],
-            "final_output": final[:500],
-            "reflection": reflection,
-        }
-
-        # 4️⃣ Write it in pretty JSON, comma-delimited for easy reading
-        with open(LOG_PATH, "a", encoding="utf-8") as f:
-            f.write(json.dumps(entry, indent=2, ensure_ascii=False) + ",\n")
-
-        print(f"[Cortex] Logged reflection → {LOG_PATH}")
-
-    except Exception as e:
-        print(f"[Cortex] Failed to log reflection: {e}")
@@ -1,223 +0,0 @@
-"""
-Structured logging utilities for Cortex pipeline debugging.
-
-Provides hierarchical, scannable logs with clear section markers and raw data visibility.
-"""
-
-import json
-import logging
-from typing import Any, Dict, List, Optional
-from datetime import datetime
-from enum import Enum
-
-
-class LogLevel(Enum):
-    """Log detail levels"""
-    MINIMAL = 1    # Only errors and final results
-    SUMMARY = 2    # Stage summaries + errors
-    DETAILED = 3   # Include raw LLM outputs, RAG results
-    VERBOSE = 4    # Everything including intermediate states
-
-
-class PipelineLogger:
-    """
-    Hierarchical logger for cortex pipeline debugging.
-
-    Features:
-    - Clear visual section markers
-    - Collapsible detail sections
-    - Raw data dumps with truncation options
-    - Stage timing
-    - Error highlighting
-    """
-
-    def __init__(self, logger: logging.Logger, level: LogLevel = LogLevel.SUMMARY):
-        self.logger = logger
-        self.level = level
-        self.stage_timings = {}
-        self.current_stage = None
-        self.stage_start_time = None
-        self.pipeline_start_time = None
-
-    def pipeline_start(self, session_id: str, user_prompt: str):
-        """Mark the start of a pipeline run"""
-        self.pipeline_start_time = datetime.now()
-        self.stage_timings = {}
-
-        if self.level.value >= LogLevel.SUMMARY.value:
-            self.logger.info(f"\n{'='*100}")
-            self.logger.info(f"🚀 PIPELINE START | Session: {session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
-            self.logger.info(f"{'='*100}")
-            if self.level.value >= LogLevel.DETAILED.value:
-                self.logger.info(f"📝 User prompt: {user_prompt[:200]}{'...' if len(user_prompt) > 200 else ''}")
-                self.logger.info(f"{'-'*100}\n")
-
-    def stage_start(self, stage_name: str, description: str = ""):
-        """Mark the start of a pipeline stage"""
-        self.current_stage = stage_name
-        self.stage_start_time = datetime.now()
-
-        if self.level.value >= LogLevel.SUMMARY.value:
-            timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
-            desc_suffix = f" - {description}" if description else ""
-            self.logger.info(f"▶️  [{stage_name}]{desc_suffix} | {timestamp}")
-
-    def stage_end(self, result_summary: str = ""):
-        """Mark the end of a pipeline stage"""
-        if self.current_stage and self.stage_start_time:
-            duration_ms = (datetime.now() - self.stage_start_time).total_seconds() * 1000
-            self.stage_timings[self.current_stage] = duration_ms
-
-            if self.level.value >= LogLevel.SUMMARY.value:
-                summary_suffix = f" → {result_summary}" if result_summary else ""
-                self.logger.info(f"✅ [{self.current_stage}] Complete in {duration_ms:.0f}ms{summary_suffix}\n")
-
-        self.current_stage = None
-        self.stage_start_time = None
-
-    def log_llm_call(self, backend: str, prompt: str, response: Any, raw_response: str = None):
-        """
-        Log LLM call details with proper formatting.
-
-        Args:
-            backend: Backend name (PRIMARY, SECONDARY, etc.)
-            prompt: Input prompt to LLM
-            response: Parsed response object
-            raw_response: Raw JSON response string
-        """
-        if self.level.value >= LogLevel.DETAILED.value:
-            self.logger.info(f"  🧠 LLM Call | Backend: {backend}")
-
-            # Show prompt (truncated)
-            if isinstance(prompt, list):
-                prompt_preview = prompt[-1].get('content', '')[:150] if prompt else ''
-            else:
-                prompt_preview = str(prompt)[:150]
-            self.logger.info(f"     Prompt: {prompt_preview}...")
-
-            # Show parsed response
-            if isinstance(response, dict):
-                response_text = (
-                    response.get('reply') or
-                    response.get('message', {}).get('content') or
-                    str(response)
-                )[:200]
-            else:
-                response_text = str(response)[:200]
-
-            self.logger.info(f"     Response: {response_text}...")
-
-            # Show raw response in collapsible block
-            if raw_response and self.level.value >= LogLevel.VERBOSE.value:
-                self.logger.debug(f"     ╭─ RAW RESPONSE ────────────────────────────────────")
-                for line in raw_response.split('\n')[:50]:  # Limit to 50 lines
-                    self.logger.debug(f"     │ {line}")
-                if raw_response.count('\n') > 50:
-                    self.logger.debug(f"     │ ... ({raw_response.count(chr(10)) - 50} more lines)")
-                self.logger.debug(f"     ╰───────────────────────────────────────────────────\n")
-
-    def log_rag_results(self, results: List[Dict[str, Any]]):
-        """Log RAG/NeoMem results in scannable format"""
-        if self.level.value >= LogLevel.SUMMARY.value:
-            self.logger.info(f"  📚 RAG Results: {len(results)} memories retrieved")
-
-            if self.level.value >= LogLevel.DETAILED.value and results:
-                self.logger.info(f"     ╭─ MEMORY SCORES ───────────────────────────────────")
-                for idx, result in enumerate(results[:10], 1):  # Show top 10
-                    score = result.get("score", 0)
-                    data_preview = str(result.get("payload", {}).get("data", ""))[:80]
-                    self.logger.info(f"     │ [{idx}] {score:.3f} | {data_preview}...")
-                if len(results) > 10:
-                    self.logger.info(f"     │ ... and {len(results) - 10} more results")
-                self.logger.info(f"     ╰───────────────────────────────────────────────────")
-
-    def log_context_state(self, context_state: Dict[str, Any]):
-        """Log context state summary"""
-        if self.level.value >= LogLevel.SUMMARY.value:
-            msg_count = context_state.get("message_count", 0)
-            minutes_since = context_state.get("minutes_since_last_msg", 0)
-            rag_count = len(context_state.get("rag", []))
-
-            self.logger.info(f"  📊 Context | Messages: {msg_count} | Last: {minutes_since:.1f}min ago | RAG: {rag_count} results")
-
-            if self.level.value >= LogLevel.DETAILED.value:
-                intake = context_state.get("intake", {})
-                if intake:
-                    self.logger.info(f"     ╭─ INTAKE SUMMARIES ────────────────────────────────")
-                    for level in ["L1", "L5", "L10", "L20", "L30"]:
-                        if level in intake:
-                            summary = intake[level]
-                            if isinstance(summary, dict):
-                                summary = summary.get("summary", str(summary)[:100])
-                            else:
-                                summary = str(summary)[:100]
-                            self.logger.info(f"     │ {level}: {summary}...")
-                    self.logger.info(f"     ╰───────────────────────────────────────────────────")
-
-    def log_error(self, stage: str, error: Exception, critical: bool = False):
-        """Log an error with context"""
-        level_marker = "🔴 CRITICAL" if critical else "⚠️  WARNING"
-        self.logger.error(f"{level_marker} | Stage: {stage} | Error: {type(error).__name__}: {str(error)}")
-
-        if self.level.value >= LogLevel.VERBOSE.value:
-            import traceback
-            self.logger.debug(f"     Traceback:\n{traceback.format_exc()}")
-
-    def log_raw_data(self, label: str, data: Any, max_lines: int = 30):
-        """Log raw data in a collapsible format"""
-        if self.level.value >= LogLevel.VERBOSE.value:
-            self.logger.debug(f"     ╭─ {label.upper()} ──────────────────────────────────")
-
-            if isinstance(data, (dict, list)):
-                json_str = json.dumps(data, indent=2, default=str)
-                lines = json_str.split('\n')
-                for line in lines[:max_lines]:
-                    self.logger.debug(f"     │ {line}")
-                if len(lines) > max_lines:
-                    self.logger.debug(f"     │ ... ({len(lines) - max_lines} more lines)")
-            else:
-                lines = str(data).split('\n')
-                for line in lines[:max_lines]:
-                    self.logger.debug(f"     │ {line}")
-                if len(lines) > max_lines:
-                    self.logger.debug(f"     │ ... ({len(lines) - max_lines} more lines)")
-
-            self.logger.debug(f"     ╰───────────────────────────────────────────────────")
-
-    def pipeline_end(self, session_id: str, final_output_length: int):
-        """Mark the end of pipeline run with summary"""
-        if self.pipeline_start_time:
-            total_duration_ms = (datetime.now() - self.pipeline_start_time).total_seconds() * 1000
-
-            if self.level.value >= LogLevel.SUMMARY.value:
-                self.logger.info(f"\n{'='*100}")
-                self.logger.info(f"✨ PIPELINE COMPLETE | Session: {session_id} | Total: {total_duration_ms:.0f}ms")
-                self.logger.info(f"{'='*100}")
-
-                # Show timing breakdown
-                if self.stage_timings and self.level.value >= LogLevel.DETAILED.value:
-                    self.logger.info("⏱️  Stage Timings:")
-                    for stage, duration in self.stage_timings.items():
-                        pct = (duration / total_duration_ms) * 100 if total_duration_ms > 0 else 0
-                        self.logger.info(f"   {stage:20s}: {duration:6.0f}ms ({pct:5.1f}%)")
-
-                self.logger.info(f"📤 Final output: {final_output_length} characters")
-                self.logger.info(f"{'='*100}\n")
-
-
-def get_log_level_from_env() -> LogLevel:
-    """Parse log level from environment variable"""
-    import os
-    verbose_debug = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
-    detail_level = os.getenv("LOG_DETAIL_LEVEL", "").lower()
-
-    if detail_level == "minimal":
-        return LogLevel.MINIMAL
-    elif detail_level == "summary":
-        return LogLevel.SUMMARY
-    elif detail_level == "detailed":
-        return LogLevel.DETAILED
-    elif detail_level == "verbose" or verbose_debug:
-        return LogLevel.VERBOSE
-    else:
-        return LogLevel.SUMMARY  # Default
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-import re
-
-xml = """<tool_call>
-  <name>execute_code</name>
-  <arguments>
-    <language>python</language>
-    <code>print(50 / 2)</code>
-    <reason>To calculate the result of dividing 50 by 2.</reason>
-  </arguments>
-</olith>"""
-
-pattern = r'<tool_call>(.*?)</(?:tool_call|[a-zA-Z]+)>'
-matches = re.findall(pattern, xml, re.DOTALL)
-
-print(f"Pattern: {pattern}")
-print(f"Number of matches: {len(matches)}")
-print("\nMatches:")
-for idx, match in enumerate(matches):
-    print(f"\nMatch {idx + 1}:")
-    print(f"Length: {len(match)} chars")
-    print(f"Content:\n{match[:200]}")
-
-# Now test what gets removed
-clean_content = re.sub(pattern, '', xml, flags=re.DOTALL).strip()
-print(f"\n\nCleaned content:\n{clean_content}")
@@ -1,195 +0,0 @@
-networks:
-  lyra_net:
-    driver: bridge
-
-volumes:
-  postgres_data:
-    driver: local
-  neo4j_data:
-    driver: local
-  code_executions:
-    driver: local
-
-services:
-
-  # # ============================================================
-  # # NeoMem: Postgres
-  # # ============================================================
-  # neomem-postgres:
-  #   image: ankane/pgvector:v0.5.1
-  #   container_name: neomem-postgres
-  #   restart: unless-stopped
-  #   environment:
-  #     POSTGRES_USER: neomem
-  #     POSTGRES_PASSWORD: neomempass
-  #     POSTGRES_DB: neomem
-  #   volumes:
-  #     - ./volumes/postgres_data:/var/lib/postgresql/data
-  #   ports:
-  #     - "5432:5432"
-  #   healthcheck:
-  #     test: ["CMD-SHELL", "pg_isready -U neomem -d neomem || exit 1"]
-  #     interval: 5s
-  #     timeout: 5s
-  #     retries: 10
-  #   networks:
-  #     - lyra_net
-
-  # # ============================================================
-  # # NeoMem: Neo4j Graph
-  # # ============================================================
-  # neomem-neo4j:
-  #   image: neo4j:5
-  #   container_name: neomem-neo4j
-  #   restart: unless-stopped
-  #   environment:
-  #     NEO4J_AUTH: "neo4j/neomemgraph"
-  #     NEO4JLABS_PLUGINS: '["graph-data-science"]'
-  #   volumes:
-  #     - ./volumes/neo4j_data:/data
-  #   ports:
-  #     - "7474:7474"
-  #     - "7687:7687"
-  #   healthcheck:
-  #     test: ["CMD-SHELL", "cypher-shell -u neo4j -p neomemgraph 'RETURN 1' || exit 1"]
-  #     interval: 10s
-  #     timeout: 10s
-  #     retries: 10
-  #   networks:
-  #     - lyra_net
-
-  # ============================================================
-  # NeoMem API
-  # ============================================================
-  # neomem-api:
-  #   build:
-  #     context: ./neomem
-  #   image: lyra-neomem:latest
-  #   container_name: neomem-api
-  #   restart: unless-stopped
-  #   env_file:
-  #     - ./neomem/.env
-  #     - ./.env
-  #   volumes:
-  #     - ./neomem_history:/app/history
-  #   ports:
-  #     - "7077:7077"
-  #   depends_on:
-  #     neomem-postgres:
-  #       condition: service_healthy
-  #     neomem-neo4j:
-  #       condition: service_healthy
-  #   networks:
-  #     - lyra_net
-
-  # ============================================================
-  # Relay  (host mode)
-  # ============================================================
-  relay:
-    build:
-      context: ./core/relay
-    container_name: relay
-    restart: unless-stopped
-    env_file:
-      - ./.env
-    volumes:
-      - ./core/relay/sessions:/app/sessions
-    ports:
-      - "7078:7078"
-    networks:
-      - lyra_net
-
-  # ============================================================
-  # UI Server
-  # ============================================================
-  lyra-ui:
-    image: nginx:alpine
-    container_name: lyra-ui
-    restart: unless-stopped
-    ports:
-      - "8081:80"
-    volumes:
-      - ./core/ui:/usr/share/nginx/html:ro
-    networks:
-      - lyra_net
-
-
-  # ============================================================
-  # Cortex
-  # ============================================================
-  cortex:
-    build:
-      context: ./cortex
-    container_name: cortex
-    restart: unless-stopped
-    env_file:
-      - ./cortex/.env
-      - ./.env
-    volumes:
-      - ./cortex:/app
-      - /var/run/docker.sock:/var/run/docker.sock:ro
-    ports:
-      - "7081:7081"
-    networks:
-      - lyra_net
-
-  # ============================================================
-  # Code Sandbox (for tool execution)
-  # ============================================================
-  code-sandbox:
-    build:
-      context: ./sandbox
-    container_name: lyra-code-sandbox
-    restart: unless-stopped
-    security_opt:
-      - no-new-privileges:true
-    cap_drop:
-      - ALL
-    cap_add:
-      - CHOWN
-      - SETUID
-      - SETGID
-    network_mode: "none"
-    volumes:
-      - code_executions:/executions
-    mem_limit: 512m
-    cpus: 1.0
-    pids_limit: 100
-    user: sandbox
-    command: tail -f /dev/null
-
-  # ============================================================
-  # Intake
-  # ============================================================
-#  intake:
-#   build:
-#      context: ./intake
-#    container_name: intake
-#    restart: unless-stopped
-#    env_file:
-#      - ./intake/.env
-#      - ./.env
-#    ports:
-#      - "7080:7080"
-#    volumes:
-#      - ./intake:/app
-#      - ./intake-logs:/app/logs
-#    depends_on:
-#      - cortex
-#    networks:
-#      - lyra_net
-
-  # ============================================================
-  # RAG Service
-  # ============================================================
-  # rag:
-  #   build:
-  #     context: ./rag
-  #   container_name: rag
-  #   restart: unless-stopped
-  #   environment:
-  #     NEOMEM_URL: http://neomem-api:7077
-  #   ports:
-  #     - "7090:7090"
-  #   networks:
-  #     - lyra_net  
@@ -1,441 +0,0 @@
-├── CHANGELOG.md
-├── core
-│   ├── env experiments
-│   ├── persona-sidecar
-│   │   ├── Dockerfile
-│   │   ├── package.json
-│   │   ├── persona-server.js
-│   │   └── personas.json
-│   ├── relay
-│   │   ├── Dockerfile
-│   │   ├── lib
-│   │   │   ├── cortex.js
-│   │   │   └── llm.js
-│   │   ├── package.json
-│   │   ├── package-lock.json
-│   │   ├── server.js
-│   │   ├── sessions
-│   │   │   ├── default.jsonl
-│   │   │   ├── sess-6rxu7eia.json
-│   │   │   ├── sess-6rxu7eia.jsonl
-│   │   │   ├── sess-l08ndm60.json
-│   │   │   └── sess-l08ndm60.jsonl
-│   │   └── test-llm.js
-│   ├── relay-backup
-│   └── ui
-│       ├── index.html
-│       ├── manifest.json
-│       └── style.css
-├── cortex
-│   ├── context.py
-│   ├── Dockerfile
-│   ├── ingest
-│   │   ├── ingest_handler.py
-│   │   ├── __init__.py
-│   │   └── intake_client.py
-│   ├── intake
-│   │   ├── __init__.py
-│   │   ├── intake.py
-│   │   └── logs
-│   ├── llm
-│   │   ├── __init__.py
-│   │   └── llm_router.py
-│   ├── logs
-│   │   ├── cortex_verbose_debug.log
-│   │   └── reflections.log
-│   ├── main.py
-│   ├── neomem_client.py
-│   ├── persona
-│   │   ├── identity.py
-│   │   ├── __init__.py
-│   │   └── speak.py
-│   ├── rag.py
-│   ├── reasoning
-│   │   ├── __init__.py
-│   │   ├── reasoning.py
-│   │   ├── refine.py
-│   │   └── reflection.py
-│   ├── requirements.txt
-│   ├── router.py
-│   ├── tests
-│   └── utils
-│       ├── config.py
-│       ├── __init__.py
-│       ├── log_utils.py
-│       └── schema.py
-├── deprecated.env.txt
-├── DEPRECATED_FILES.md
-├── docker-compose.yml
-├── docs
-│   ├── ARCHITECTURE_v0-6-0.md
-│   ├── ENVIRONMENT_VARIABLES.md
-│   ├── lyra_tree.txt
-│   └── PROJECT_SUMMARY.md
-├── intake-logs
-│   └── summaries.log
-├── neomem
-│   ├── _archive
-│   │   └── old_servers
-│   │       ├── main_backup.py
-│   │       └── main_dev.py
-│   ├── docker-compose.yml
-│   ├── Dockerfile
-│   ├── neomem
-│   │   ├── api
-│   │   ├── client
-│   │   │   ├── __init__.py
-│   │   │   ├── main.py
-│   │   │   ├── project.py
-│   │   │   └── utils.py
-│   │   ├── configs
-│   │   │   ├── base.py
-│   │   │   ├── embeddings
-│   │   │   │   ├── base.py
-│   │   │   │   └── __init__.py
-│   │   │   ├── enums.py
-│   │   │   ├── __init__.py
-│   │   │   ├── llms
-│   │   │   │   ├── anthropic.py
-│   │   │   │   ├── aws_bedrock.py
-│   │   │   │   ├── azure.py
-│   │   │   │   ├── base.py
-│   │   │   │   ├── deepseek.py
-│   │   │   │   ├── __init__.py
-│   │   │   │   ├── lmstudio.py
-│   │   │   │   ├── ollama.py
-│   │   │   │   ├── openai.py
-│   │   │   │   └── vllm.py
-│   │   │   ├── prompts.py
-│   │   │   └── vector_stores
-│   │   │       ├── azure_ai_search.py
-│   │   │       ├── azure_mysql.py
-│   │   │       ├── baidu.py
-│   │   │       ├── chroma.py
-│   │   │       ├── databricks.py
-│   │   │       ├── elasticsearch.py
-│   │   │       ├── faiss.py
-│   │   │       ├── __init__.py
-│   │   │       ├── langchain.py
-│   │   │       ├── milvus.py
-│   │   │       ├── mongodb.py
-│   │   │       ├── neptune.py
-│   │   │       ├── opensearch.py
-│   │   │       ├── pgvector.py
-│   │   │       ├── pinecone.py
-│   │   │       ├── qdrant.py
-│   │   │       ├── redis.py
-│   │   │       ├── s3_vectors.py
-│   │   │       ├── supabase.py
-│   │   │       ├── upstash_vector.py
-│   │   │       ├── valkey.py
-│   │   │       ├── vertex_ai_vector_search.py
-│   │   │       └── weaviate.py
-│   │   ├── core
-│   │   ├── embeddings
-│   │   │   ├── aws_bedrock.py
-│   │   │   ├── azure_openai.py
-│   │   │   ├── base.py
-│   │   │   ├── configs.py
-│   │   │   ├── gemini.py
-│   │   │   ├── huggingface.py
-│   │   │   ├── __init__.py
-│   │   │   ├── langchain.py
-│   │   │   ├── lmstudio.py
-│   │   │   ├── mock.py
-│   │   │   ├── ollama.py
-│   │   │   ├── openai.py
-│   │   │   ├── together.py
-│   │   │   └── vertexai.py
-│   │   ├── exceptions.py
-│   │   ├── graphs
-│   │   │   ├── configs.py
-│   │   │   ├── __init__.py
-│   │   │   ├── neptune
-│   │   │   │   ├── base.py
-│   │   │   │   ├── __init__.py
-│   │   │   │   ├── neptunedb.py
-│   │   │   │   └── neptunegraph.py
-│   │   │   ├── tools.py
-│   │   │   └── utils.py
-│   │   ├── __init__.py
-│   │   ├── LICENSE
-│   │   ├── llms
-│   │   │   ├── anthropic.py
-│   │   │   ├── aws_bedrock.py
-│   │   │   ├── azure_openai.py
-│   │   │   ├── azure_openai_structured.py
-│   │   │   ├── base.py
-│   │   │   ├── configs.py
-│   │   │   ├── deepseek.py
-│   │   │   ├── gemini.py
-│   │   │   ├── groq.py
-│   │   │   ├── __init__.py
-│   │   │   ├── langchain.py
-│   │   │   ├── litellm.py
-│   │   │   ├── lmstudio.py
-│   │   │   ├── ollama.py
-│   │   │   ├── openai.py
-│   │   │   ├── openai_structured.py
-│   │   │   ├── sarvam.py
-│   │   │   ├── together.py
-│   │   │   ├── vllm.py
-│   │   │   └── xai.py
-│   │   ├── memory
-│   │   │   ├── base.py
-│   │   │   ├── graph_memory.py
-│   │   │   ├── __init__.py
-│   │   │   ├── kuzu_memory.py
-│   │   │   ├── main.py
-│   │   │   ├── memgraph_memory.py
-│   │   │   ├── setup.py
-│   │   │   ├── storage.py
-│   │   │   ├── telemetry.py
-│   │   │   └── utils.py
-│   │   ├── proxy
-│   │   │   ├── __init__.py
-│   │   │   └── main.py
-│   │   ├── server
-│   │   │   ├── dev.Dockerfile
-│   │   │   ├── docker-compose.yaml
-│   │   │   ├── Dockerfile
-│   │   │   ├── main_old.py
-│   │   │   ├── main.py
-│   │   │   ├── Makefile
-│   │   │   ├── README.md
-│   │   │   └── requirements.txt
-│   │   ├── storage
-│   │   ├── utils
-│   │   │   └── factory.py
-│   │   └── vector_stores
-│   │       ├── azure_ai_search.py
-│   │       ├── azure_mysql.py
-│   │       ├── baidu.py
-│   │       ├── base.py
-│   │       ├── chroma.py
-│   │       ├── configs.py
-│   │       ├── databricks.py
-│   │       ├── elasticsearch.py
-│   │       ├── faiss.py
-│   │       ├── __init__.py
-│   │       ├── langchain.py
-│   │       ├── milvus.py
-│   │       ├── mongodb.py
-│   │       ├── neptune_analytics.py
-│   │       ├── opensearch.py
-│   │       ├── pgvector.py
-│   │       ├── pinecone.py
-│   │       ├── qdrant.py
-│   │       ├── redis.py
-│   │       ├── s3_vectors.py
-│   │       ├── supabase.py
-│   │       ├── upstash_vector.py
-│   │       ├── valkey.py
-│   │       ├── vertex_ai_vector_search.py
-│   │       └── weaviate.py
-│   ├── neomem_history
-│   │   └── history.db
-│   ├── pyproject.toml
-│   ├── README.md
-│   └── requirements.txt
-├── neomem_history
-│   └── history.db
-├── rag
-│   ├── chatlogs
-│   │   └── lyra
-│   │       ├── 0000_Wire_ROCm_to_Cortex.json
-│   │       ├── 0001_Branch___10_22_ct201branch-ssh_tut.json
-│   │       ├── 0002_cortex_LLMs_11-1-25.json
-│   │       ├── 0003_RAG_beta.json
-│   │       ├── 0005_Cortex_v0_4_0_planning.json
-│   │       ├── 0006_Cortex_v0_4_0_Refinement.json
-│   │       ├── 0009_Branch___Cortex_v0_4_0_planning.json
-│   │       ├── 0012_Cortex_4_-_neomem_11-1-25.json
-│   │       ├── 0016_Memory_consolidation_concept.json
-│   │       ├── 0017_Model_inventory_review.json
-│   │       ├── 0018_Branch___Memory_consolidation_concept.json
-│   │       ├── 0022_Branch___Intake_conversation_summaries.json
-│   │       ├── 0026_Intake_conversation_summaries.json
-│   │       ├── 0027_Trilium_AI_LLM_setup.json
-│   │       ├── 0028_LLMs_and_sycophancy_levels.json
-│   │       ├── 0031_UI_improvement_plan.json
-│   │       ├── 0035_10_27-neomem_update.json
-│   │       ├── 0044_Install_llama_cpp_on_ct201.json
-│   │       ├── 0045_AI_task_assistant.json
-│   │       ├── 0047_Project_scope_creation.json
-│   │       ├── 0052_View_docker_container_logs.json
-│   │       ├── 0053_10_21-Proxmox_fan_control.json
-│   │       ├── 0054_10_21-pytorch_branch_Quant_experiments.json
-│   │       ├── 0055_10_22_ct201branch-ssh_tut.json
-│   │       ├── 0060_Lyra_project_folder_issue.json
-│   │       ├── 0062_Build_pytorch_API.json
-│   │       ├── 0063_PokerBrain_dataset_structure.json
-│   │       ├── 0065_Install_PyTorch_setup.json
-│   │       ├── 0066_ROCm_PyTorch_setup_quirks.json
-│   │       ├── 0067_VM_model_setup_steps.json
-│   │       ├── 0070_Proxmox_disk_error_fix.json
-│   │       ├── 0072_Docker_Compose_vs_Portainer.json
-│   │       ├── 0073_Check_system_temps_Proxmox.json
-│   │       ├── 0075_Cortex_gpu_progress.json
-│   │       ├── 0076_Backup_Proxmox_before_upgrade.json
-│   │       ├── 0077_Storage_cleanup_advice.json
-│   │       ├── 0082_Install_ROCm_on_Proxmox.json
-│   │       ├── 0088_Thalamus_program_summary.json
-│   │       ├── 0094_Cortex_blueprint_development.json
-│   │       ├── 0095_mem0_advancments.json
-│   │       ├── 0096_Embedding_provider_swap.json
-│   │       ├── 0097_Update_git_commit_steps.json
-│   │       ├── 0098_AI_software_description.json
-│   │       ├── 0099_Seed_memory_process.json
-│   │       ├── 0100_Set_up_Git_repo.json
-│   │       ├── 0101_Customize_embedder_setup.json
-│   │       ├── 0102_Seeding_Local_Lyra_memory.json
-│   │       ├── 0103_Mem0_seeding_part_3.json
-│   │       ├── 0104_Memory_build_prompt.json
-│   │       ├── 0105_Git_submodule_setup_guide.json
-│   │       ├── 0106_Serve_UI_on_LAN.json
-│   │       ├── 0107_AI_name_suggestion.json
-│   │       ├── 0108_Room_X_planning_update.json
-│   │       ├── 0109_Salience_filtering_design.json
-│   │       ├── 0110_RoomX_Cortex_build.json
-│   │       ├── 0119_Explain_Lyra_cortex_idea.json
-│   │       ├── 0120_Git_submodule_organization.json
-│   │       ├── 0121_Web_UI_fix_guide.json
-│   │       ├── 0122_UI_development_planning.json
-│   │       ├── 0123_NVGRAM_debugging_steps.json
-│   │       ├── 0124_NVGRAM_setup_troubleshooting.json
-│   │       ├── 0125_NVGRAM_development_update.json
-│   │       ├── 0126_RX_-_NeVGRAM_New_Features.json
-│   │       ├── 0127_Error_troubleshooting_steps.json
-│   │       ├── 0135_Proxmox_backup_with_ABB.json
-│   │       ├── 0151_Auto-start_Lyra-Core_VM.json
-│   │       ├── 0156_AI_GPU_benchmarks_comparison.json
-│   │       └── 0251_Lyra_project_handoff.json
-│   ├── chromadb
-│   │   ├── c4f701ee-1978-44a1-9df4-3e865b5d33c1
-│   │   │   ├── data_level0.bin
-│   │   │   ├── header.bin
-│   │   │   ├── index_metadata.pickle
-│   │   │   ├── length.bin
-│   │   │   └── link_lists.bin
-│   │   └── chroma.sqlite3
-│   ├── import.log
-│   ├── lyra-chatlogs
-│   │   ├── 0000_Wire_ROCm_to_Cortex.json
-│   │   ├── 0001_Branch___10_22_ct201branch-ssh_tut.json
-│   │   ├── 0002_cortex_LLMs_11-1-25.json
-│   │   └── 0003_RAG_beta.json
-│   ├── rag_api.py
-│   ├── rag_build.py
-│   ├── rag_chat_import.py
-│   └── rag_query.py
-├── README.md
-└── volumes
-    ├── neo4j_data
-    │   ├── databases
-    │   │   ├── neo4j
-    │   │   │   ├── database_lock
-    │   │   │   ├── id-buffer.tmp.0
-    │   │   │   ├── neostore
-    │   │   │   ├── neostore.counts.db
-    │   │   │   ├── neostore.indexstats.db
-    │   │   │   ├── neostore.labeltokenstore.db
-    │   │   │   ├── neostore.labeltokenstore.db.id
-    │   │   │   ├── neostore.labeltokenstore.db.names
-    │   │   │   ├── neostore.labeltokenstore.db.names.id
-    │   │   │   ├── neostore.nodestore.db
-    │   │   │   ├── neostore.nodestore.db.id
-    │   │   │   ├── neostore.nodestore.db.labels
-    │   │   │   ├── neostore.nodestore.db.labels.id
-    │   │   │   ├── neostore.propertystore.db
-    │   │   │   ├── neostore.propertystore.db.arrays
-    │   │   │   ├── neostore.propertystore.db.arrays.id
-    │   │   │   ├── neostore.propertystore.db.id
-    │   │   │   ├── neostore.propertystore.db.index
-    │   │   │   ├── neostore.propertystore.db.index.id
-    │   │   │   ├── neostore.propertystore.db.index.keys
-    │   │   │   ├── neostore.propertystore.db.index.keys.id
-    │   │   │   ├── neostore.propertystore.db.strings
-    │   │   │   ├── neostore.propertystore.db.strings.id
-    │   │   │   ├── neostore.relationshipgroupstore.db
-    │   │   │   ├── neostore.relationshipgroupstore.db.id
-    │   │   │   ├── neostore.relationshipgroupstore.degrees.db
-    │   │   │   ├── neostore.relationshipstore.db
-    │   │   │   ├── neostore.relationshipstore.db.id
-    │   │   │   ├── neostore.relationshiptypestore.db
-    │   │   │   ├── neostore.relationshiptypestore.db.id
-    │   │   │   ├── neostore.relationshiptypestore.db.names
-    │   │   │   ├── neostore.relationshiptypestore.db.names.id
-    │   │   │   ├── neostore.schemastore.db
-    │   │   │   ├── neostore.schemastore.db.id
-    │   │   │   └── schema
-    │   │   │       └── index
-    │   │   │           └── token-lookup-1.0
-    │   │   │               ├── 1
-    │   │   │               │   └── index-1
-    │   │   │               └── 2
-    │   │   │                   └── index-2
-    │   │   ├── store_lock
-    │   │   └── system
-    │   │       ├── database_lock
-    │   │       ├── id-buffer.tmp.0
-    │   │       ├── neostore
-    │   │       ├── neostore.counts.db
-    │   │       ├── neostore.indexstats.db
-    │   │       ├── neostore.labeltokenstore.db
-    │   │       ├── neostore.labeltokenstore.db.id
-    │   │       ├── neostore.labeltokenstore.db.names
-    │   │       ├── neostore.labeltokenstore.db.names.id
-    │   │       ├── neostore.nodestore.db
-    │   │       ├── neostore.nodestore.db.id
-    │   │       ├── neostore.nodestore.db.labels
-    │   │       ├── neostore.nodestore.db.labels.id
-    │   │       ├── neostore.propertystore.db
-    │   │       ├── neostore.propertystore.db.arrays
-    │   │       ├── neostore.propertystore.db.arrays.id
-    │   │       ├── neostore.propertystore.db.id
-    │   │       ├── neostore.propertystore.db.index
-    │   │       ├── neostore.propertystore.db.index.id
-    │   │       ├── neostore.propertystore.db.index.keys
-    │   │       ├── neostore.propertystore.db.index.keys.id
-    │   │       ├── neostore.propertystore.db.strings
-    │   │       ├── neostore.propertystore.db.strings.id
-    │   │       ├── neostore.relationshipgroupstore.db
-    │   │       ├── neostore.relationshipgroupstore.db.id
-    │   │       ├── neostore.relationshipgroupstore.degrees.db
-    │   │       ├── neostore.relationshipstore.db
-    │   │       ├── neostore.relationshipstore.db.id
-    │   │       ├── neostore.relationshiptypestore.db
-    │   │       ├── neostore.relationshiptypestore.db.id
-    │   │       ├── neostore.relationshiptypestore.db.names
-    │   │       ├── neostore.relationshiptypestore.db.names.id
-    │   │       ├── neostore.schemastore.db
-    │   │       ├── neostore.schemastore.db.id
-    │   │       └── schema
-    │   │           └── index
-    │   │               ├── range-1.0
-    │   │               │   ├── 3
-    │   │               │   │   └── index-3
-    │   │               │   ├── 4
-    │   │               │   │   └── index-4
-    │   │               │   ├── 7
-    │   │               │   │   └── index-7
-    │   │               │   ├── 8
-    │   │               │   │   └── index-8
-    │   │               │   └── 9
-    │   │               │       └── index-9
-    │   │               └── token-lookup-1.0
-    │   │                   ├── 1
-    │   │                   │   └── index-1
-    │   │                   └── 2
-    │   │                       └── index-2
-    │   ├── dbms
-    │   │   └── auth.ini
-    │   ├── server_id
-    │   └── transactions
-    │       ├── neo4j
-    │       │   ├── checkpoint.0
-    │       │   └── neostore.transaction.db.0
-    │       └── system
-    │           ├── checkpoint.0
-    │           └── neostore.transaction.db.0
-    └── postgres_data  [error opening dir]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
serversdown	6d88505697	chore: add sessions to gitignore	2026-05-29 18:23:29 -04:00
Claude	0ee5a9ce47	feat: SQLite-backed memory with brute-force cosine recall - lyra.memory.remember(session_id, role, content) embeds and stores - lyra.memory.recent(session_id, n) returns the last N from a session - lyra.memory.recall(query, k, session_id=None) returns top-k by cosine similarity across the chosen scope (all sessions by default) - Embeddings live in the exchanges.embedding BLOB column as float32 bytes - Connection reopens automatically if LYRA_DB_PATH changes (test-friendly)	2026-05-16 06:35:52 +00:00
Claude	6a1255dfdb	feat: LLM router with local (Ollama) and cloud (OpenAI) backends - lyra.config.load() reads env into a frozen Config dataclass - lyra.llm.complete(messages, backend) routes to Ollama /api/chat or OpenAI chat completions - lyra.llm.embed(texts) calls OpenAI embeddings - .env.example switched from Anthropic to OpenAI to match available key	2026-05-16 06:10:48 +00:00
Claude	b2523c2561	chore: project scaffold (uv, .env.example, README, lyra package)	2026-05-16 06:01:08 +00:00
Claude	faf4e8a1aa	chore: nuke legacy code, keep design docs for restart Preserved on the archive branch. Keeping only the architecture and design thinking that survives the rewrite: - docs/ARCH_v0-6-1.md (Inner Self / Executive / Chat / Persona model) - docs/ARCHITECTURE_v0-6-0.md (predecessor architecture) - docs/PROJECT_SUMMARY.md (project history and rationale) - docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md (detailed design notes) - docs/ENVIRONMENT_VARIABLES.md (multi-backend env conventions) - docs/LLMS.md - docs/TRILLIUM_API.md (for future tool integration) Removed: all service code (cortex, core/relay, neomem, rag, sandbox, persona-sidecar), docker-compose, migration/logging docs, stale root test scripts, CHANGELOG.	2026-05-16 05:57:07 +00:00
claude	4b951f3be8	Merge pull request #16 from serversdwn/dev update to 0.9.0	2025-12-29 01:59:14 -05:00
claude	6b5580a80e	0.9.0 - Added Trilium ETAPI integration. Lyra can now: Search trilium notes and create new notes. with proper ETAPI auth.	2025-12-29 01:58:20 -05:00
claude	86b37ab874	feat: Implement Trillium notes executor for searching and creating notes via ETAPI - Added `trillium.py` for searching and creating notes with Trillium's ETAPI. - Implemented `search_notes` and `create_note` functions with appropriate error handling and validation. feat: Add web search functionality using DuckDuckGo - Introduced `web_search.py` for performing web searches without API keys. - Implemented `search_web` function with result handling and validation. feat: Create provider-agnostic function caller for iterative tool calling - Developed `function_caller.py` to manage LLM interactions with tools. - Implemented iterative calling logic with error handling and tool execution. feat: Establish a tool registry for managing available tools - Created `registry.py` to define and manage tool availability and execution. - Integrated feature flags for enabling/disabling tools based on environment variables. feat: Implement event streaming for tool calling processes - Added `stream_events.py` to manage Server-Sent Events (SSE) for tool calling. - Enabled real-time updates during tool execution for enhanced user experience. test: Add tests for tool calling system components - Created `test_tools.py` to validate functionality of code execution, web search, and tool registry. - Implemented asynchronous tests to ensure proper execution and result handling. chore: Add Dockerfile for sandbox environment setup - Created `Dockerfile` to set up a Python environment with necessary dependencies for code execution. chore: Add debug regex script for testing XML parsing - Introduced `debug_regex.py` to validate regex patterns against XML tool calls. chore: Add HTML template for displaying thinking stream events - Created `test_thinking_stream.html` for visualizing tool calling events in a user-friendly format. test: Add tests for OllamaAdapter XML parsing - Developed `test_ollama_parser.py` to validate XML parsing with various test cases, including malformed XML.	2025-12-26 03:49:20 -05:00
claude	8b66cd1e1d	update to 0.7.0 Standard Mode Implementation - Complete documentation of the new simple chatbot mode Backend Selection System - UI settings modal and routing changes Session Management Overhaul - File-based persistence with CRUD API UI Improvements - Settings modal, light/dark mode, modal fixes Context Retention - Integration with Intake for conversation history Architecture & Routing Changes - Updates to Relay, Cortex, Intake, LLM router Fixed Critical Issues - DeepSeek R1, context retention, OpenAI errors, modal formatting, session persistence Technical Improvements - Backward compatibility, code quality, performance Architecture Diagrams - Data flow for Standard Mode, Cortex Mode, and sessions Known Limitations - Standard Mode constraints, session management limits Migration Notes - For users and developers upgrading	2025-12-22 01:41:21 -05:00
claude	7cb7033bb6	docs updated v0.7.0	2025-12-22 01:40:24 -05:00
claude	9226b2480b	sessions improved, v0.7.0	2025-12-21 15:50:52 -05:00
claude	58d0afd1c6	mode selection, settings added to ui	2025-12-21 14:30:32 -05:00
claude	9c03b23a6d	simple context added to standard mode	2025-12-21 13:01:00 -05:00
claude	fdc51e598c	v0.7.0 - Standard non cortex mode enabled	2025-12-20 04:15:22 -05:00
claude	092ac4d181	Cortex debugging logs cleaned up	2025-12-20 02:49:20 -05:00
claude	a4f5308f9b	Merge pull request #9 from serversdwn/dev Update to 0.6.0. Docs updated.	2025-12-19 17:44:11 -05:00
claude	34aff34038	Docs updated v0.6.0	2025-12-19 17:43:22 -05:00
claude	a41e342dbd	cleanup ignore stuff	2025-12-17 02:46:23 -05:00
claude	09c00848b9	Merge branch 'dev' of https://github.com/serversdwn/project-lyra into dev	2025-12-17 01:47:30 -05:00
claude	ec5f17694e	ignore	2025-12-17 01:47:19 -05:00
claude	b74658c000	complete breakdown for AI agents added	2025-12-15 11:49:49 -05:00
claude	0a03546039	neomem disabled	2025-12-15 04:10:03 -05:00
claude	0528d10081	autonomy phase 2.5 - tightening up some stuff in the pipeline	2025-12-15 01:56:57 -05:00
claude	e2e55a0fda	autonomy phase 2	2025-12-14 14:43:08 -05:00
claude	ae41b51888	autonomy build, phase 1	2025-12-14 01:44:05 -05:00
claude	70e57ba5d2	cortex pipeline stablized, inner monologue is now determining user intent and tone	2025-12-13 04:13:12 -05:00
claude	7693bc4080	autonomy scaffold	2025-12-13 02:55:49 -05:00
claude	628edb681a	v0.5.2 update Dev	2025-12-12 08:04:20 +00:00
claude	58d6520056	v0.5.2 - fixed: llm router async, relay-UI mismatch, intake summarization failure, among others. Memory relevance thresh. increased.	2025-12-12 02:58:23 -05:00
claude	77429ca6e0	v0.6.1 - reinstated UI, relay > cortex pipeline working	2025-12-11 16:28:25 -05:00
claude	67b7f9594c	autonomy, initial scaffold	2025-12-11 13:12:44 -05:00
claude	875e660e31	docs updated for v0.5.1	2025-12-11 03:49:23 -05:00
claude	09b6b364e5	v0.5.1-Major cortex rework. clean up done too. Merge from dev v0.5.1-Major cortex rework. clean up done too.	2025-12-11 03:48:29 -05:00
claude	832fea78d0	gitignore updated, to ignore vscode settings	2025-12-11 03:42:30 -05:00
claude	3b5ec9c974	cleaning up deprecated files	2025-12-11 03:40:47 -05:00
claude	3eb19d30f0	cortex rework continued.	2025-12-11 02:50:23 -05:00
claude	8428e5e04e	deprecated old intake folder	2025-12-06 04:38:11 -05:00
claude	04f4ed6b51	intake/relay rewire	2025-12-06 04:32:42 -05:00
claude	03450b5f70	add. cleanup	2025-11-30 03:58:15 -05:00
claude	6312f2ae92	intake internalized by cortex, removed intake route in relay	2025-11-29 19:08:15 -05:00
claude	5db0614cdc	cortex 0.2.... i think?	2025-11-29 05:14:32 -05:00
claude	26f5a6b972	fixed neomem URL request failure, now using correct variable	2025-11-28 19:50:53 -05:00
claude	c3fffcdd80	context added, wired in. first attempt	2025-11-28 19:29:41 -05:00
claude	1dd84613cf	Merge pull request #4 from serversdwn/dev Big clean up to v0.5.0, docs updated, restructured throughout.	2025-11-28 18:14:18 -05:00
claude	211328aba9	docs updated	2025-11-28 18:05:59 -05:00
claude	50f95a1f59	Major rewire, all modules connected. Intake still wonkey	2025-11-28 15:14:47 -05:00
claude	7e34307b31	Cortex rework in progress	2025-11-26 18:01:48 -05:00
claude	ca5f582f9c	Fixin' crap so relay works again. pre llm redo	2025-11-26 14:20:47 -05:00
claude	a5f3e0248a	env cleanup round 2	2025-11-26 03:18:15 -05:00
claude	3b128ac7f6	Merge pull request #3 from serversdwn/dev Dev branch reorganizing.	2025-11-26 02:32:31 -05:00
claude	8128b45fe5	reorganizing and restructuring	2025-11-26 02:28:00 -05:00
claude	6d5d442f96	intital file restructure	2025-11-25 20:50:05 -05:00
claude	e30793661f	Merge branch 'main' of https://github.com/serversdwn/project-lyra	2025-11-17 03:41:51 -05:00
claude	967abce237	WIP local changes	2025-11-17 03:39:56 -05:00
claude	7f5413af80	Add MI50 + vLLM full setup guide	2025-11-17 03:34:23 -05:00
claude	e388aaeddf	Remove rag chatlogs and add ignore rules	2025-11-16 03:20:10 -05:00
claude	20aec1a612	Initial clean commit - unified Lyra stack	2025-11-16 03:17:32 -05:00
				`@@ -1 +0,0 @@`
				`"""Executive planning and decision-making module."""`
				`@@ -1 +0,0 @@`
				`"""Pattern learning and adaptation system."""`
				`@@ -1 +0,0 @@`
				`"""Proactive monitoring and suggestion system."""`
				`@@ -1 +0,0 @@`
				`# Ingest module - handles communication with Intake service`
				`@@ -1 +0,0 @@`
				`# LLM module - provides LLM routing and backend abstraction`
				`@@ -1 +0,0 @@`
				`# Persona module - applies Lyra's personality and speaking style`
				`@@ -1 +0,0 @@`
				`# Reasoning module - multi-stage reasoning pipeline`