update to 0.7.0

Standard Mode Implementation - Complete documentation of the new simple chatbot mode
Backend Selection System - UI settings modal and routing changes
Session Management Overhaul - File-based persistence with CRUD API
UI Improvements - Settings modal, light/dark mode, modal fixes
Context Retention - Integration with Intake for conversation history
Architecture & Routing Changes - Updates to Relay, Cortex, Intake, LLM router
Fixed Critical Issues - DeepSeek R1, context retention, OpenAI errors, modal formatting, session persistence
Technical Improvements - Backward compatibility, code quality, performance
Architecture Diagrams - Data flow for Standard Mode, Cortex Mode, and sessions
Known Limitations - Standard Mode constraints, session management limits
Migration Notes - For users and developers upgrading
This commit is contained in:
2025-12-22 01:41:21 -05:00
committed by GitHub
22 changed files with 2891 additions and 366 deletions
+132
View File
@@ -0,0 +1,132 @@
# ============================================================================
# CORTEX LOGGING CONFIGURATION
# ============================================================================
# This file contains all logging-related environment variables for the
# Cortex reasoning pipeline. Copy this to your .env file and adjust as needed.
#
# Log Detail Levels:
# minimal - Only errors and critical events
# summary - Stage completion + errors (DEFAULT - RECOMMENDED FOR PRODUCTION)
# detailed - Include raw LLM outputs, RAG results, timing breakdowns
# verbose - Everything including intermediate states, full JSON dumps
#
# Quick Start:
# - For debugging weak links: LOG_DETAIL_LEVEL=detailed
# - For finding performance bottlenecks: LOG_DETAIL_LEVEL=detailed + VERBOSE_DEBUG=true
# - For production: LOG_DETAIL_LEVEL=summary
# - For silent mode: LOG_DETAIL_LEVEL=minimal
# ============================================================================
# -----------------------------
# Primary Logging Level
# -----------------------------
# Controls overall verbosity across all components
LOG_DETAIL_LEVEL=detailed
# Legacy verbose debug flag (kept for compatibility)
# When true, enables maximum logging including raw data dumps
VERBOSE_DEBUG=false
# -----------------------------
# LLM Logging
# -----------------------------
# Enable raw LLM response logging (only works with detailed/verbose levels)
# Shows full JSON responses from each LLM backend call
# Set to "true" to see exact LLM outputs for debugging weak links
LOG_RAW_LLM_RESPONSES=true
# -----------------------------
# Context Logging
# -----------------------------
# Show full raw intake data (L1-L30 summaries) in logs
# WARNING: Very verbose, use only for deep debugging
LOG_RAW_CONTEXT_DATA=false
# -----------------------------
# Loop Detection & Protection
# -----------------------------
# Enable duplicate message detection to prevent processing loops
ENABLE_DUPLICATE_DETECTION=true
# Maximum number of messages to keep in session history (prevents unbounded growth)
# Older messages are trimmed automatically
MAX_MESSAGE_HISTORY=100
# Session TTL in hours - sessions inactive longer than this are auto-expired
SESSION_TTL_HOURS=24
# -----------------------------
# NeoMem / RAG Logging
# -----------------------------
# Relevance score threshold for NeoMem results
RELEVANCE_THRESHOLD=0.4
# Enable NeoMem long-term memory retrieval
NEOMEM_ENABLED=false
# -----------------------------
# Autonomous Features
# -----------------------------
# Enable autonomous tool invocation (RAG, WEB, WEATHER, CODEBRAIN)
ENABLE_AUTONOMOUS_TOOLS=true
# Confidence threshold for autonomous tool invocation (0.0 - 1.0)
AUTONOMOUS_TOOL_CONFIDENCE_THRESHOLD=0.6
# Enable proactive monitoring and suggestions
ENABLE_PROACTIVE_MONITORING=true
# Minimum priority for proactive suggestions to be included (0.0 - 1.0)
PROACTIVE_SUGGESTION_MIN_PRIORITY=0.6
# ============================================================================
# EXAMPLE LOGGING OUTPUT AT DIFFERENT LEVELS
# ============================================================================
#
# LOG_DETAIL_LEVEL=summary (RECOMMENDED):
# ────────────────────────────────────────────────────────────────────────────
# ✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question about...
# 📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 3 results
# 🧠 Monologue | question | Tone: curious
# ✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
# 📤 Output: 342 characters
# ────────────────────────────────────────────────────────────────────────────
#
# LOG_DETAIL_LEVEL=detailed (FOR DEBUGGING):
# ────────────────────────────────────────────────────────────────────────────
# 🚀 PIPELINE START | Session: abc123 | 14:23:45.123
# 📝 User: What is the meaning of life?
# ────────────────────────────────────────────────────────────────────────────
# 🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
# ────────────────────────────────────────────────────────────────────────────
# 📝 Prompt: You are Lyra, a thoughtful AI assistant...
# 💬 Reply: Based on philosophical perspectives, the meaning...
# ╭─ RAW RESPONSE ────────────────────────────────────────────────────────────
# │ {
# │ "choices": [
# │ {
# │ "message": {
# │ "content": "Based on philosophical perspectives..."
# │ }
# │ }
# │ ]
# │ }
# ╰───────────────────────────────────────────────────────────────────────────
#
# ✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
# ⏱️ Stage Timings:
# context : 150ms ( 12.0%)
# identity : 10ms ( 0.8%)
# monologue : 200ms ( 16.0%)
# reasoning : 450ms ( 36.0%)
# refinement : 300ms ( 24.0%)
# persona : 140ms ( 11.2%)
# ────────────────────────────────────────────────────────────────────────────
#
# LOG_DETAIL_LEVEL=verbose (MAXIMUM DEBUG):
# Same as detailed but includes:
# - Full 50+ line raw JSON dumps
# - Complete intake data structures
# - All intermediate processing states
# - Detailed traceback on errors
# ============================================================================
+265
View File
@@ -9,6 +9,271 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Se
---
## [0.7.0] - 2025-12-21
### Added - Standard Mode & UI Enhancements
**Standard Mode Implementation**
- Added "Standard Mode" chat option that bypasses complex cortex reasoning pipeline
- Provides simple chatbot functionality for coding and practical tasks
- Maintains full conversation context across messages
- Backend-agnostic - works with SECONDARY (Ollama), OPENAI, or custom backends
- Created `/simple` endpoint in Cortex router [cortex/router.py:389](cortex/router.py#L389)
- Mode selector in UI with toggle between Standard and Cortex modes
- Standard Mode: Direct LLM chat with context retention
- Cortex Mode: Full 7-stage reasoning pipeline (unchanged)
**Backend Selection System**
- UI settings modal with LLM backend selection for Standard Mode
- Radio button selector: SECONDARY (Ollama/Qwen), OPENAI (GPT-4o-mini), or custom
- Backend preference persisted in localStorage
- Custom backend text input for advanced users
- Backend parameter routing through entire stack:
- UI sends `backend` parameter in request body
- Relay forwards backend selection to Cortex
- Cortex `/simple` endpoint respects user's backend choice
- Environment-based fallback: Uses `STANDARD_MODE_LLM` if no backend specified
**Session Management Overhaul**
- Complete rewrite of session system to use server-side persistence
- File-based storage in `core/relay/sessions/` directory
- Session files: `{sessionId}.json` for history, `{sessionId}.meta.json` for metadata
- Server is source of truth - sessions sync across browsers and reboots
- Session metadata system for friendly names
- Sessions display custom names instead of random IDs
- Rename functionality in session dropdown
- Last modified timestamps and message counts
- Full CRUD API for sessions in Relay:
- `GET /sessions` - List all sessions with metadata
- `GET /sessions/:id` - Retrieve session history
- `POST /sessions/:id` - Save session history
- `PATCH /sessions/:id/metadata` - Update session name/metadata
- `DELETE /sessions/:id` - Delete session and metadata
- Session management UI in settings modal:
- List of all sessions with message counts and timestamps
- Delete button for each session with confirmation
- Automatic session cleanup when deleting current session
**UI Improvements**
- Settings modal with hamburger menu (⚙ Settings button)
- Backend selection section for Standard Mode
- Session management section with delete functionality
- Clean modal overlay with cyberpunk theme
- ESC key and click-outside to close
- Light/Dark mode toggle with dark mode as default
- Theme preference persisted in localStorage
- CSS variables for seamless theme switching
- Toggle button shows current mode (🌙 Dark Mode / ☀️ Light Mode)
- Removed redundant model selector dropdown from header
- Fixed modal positioning and z-index layering
- Modal moved outside #chat container for proper rendering
- Fixed z-index: overlay (999), modal content (1001)
- Centered modal with proper backdrop blur
**Context Retention for Standard Mode**
- Integration with Intake module for conversation history
- Added `get_recent_messages()` function in intake.py
- Standard Mode retrieves last 20 messages from session buffer
- Full context sent to LLM on each request
- Message array format support in LLM router:
- Updated Ollama provider to accept `messages` parameter
- Updated OpenAI provider to accept `messages` parameter
- Automatic conversion from messages to prompt string for non-chat APIs
### Changed - Architecture & Routing
**Relay Server Updates** [core/relay/server.js](core/relay/server.js)
- ES module migration for session persistence:
- Imported `fs/promises`, `path`, `fileURLToPath` for file operations
- Created `SESSIONS_DIR` constant for session storage location
- Mode-based routing in both `/chat` and `/v1/chat/completions` endpoints:
- Extracts `mode` parameter from request body (default: "cortex")
- Routes to `CORTEX_SIMPLE` for Standard Mode, `CORTEX_REASON` for Cortex Mode
- Backend parameter only used in Standard Mode
- Session persistence functions:
- `ensureSessionsDir()` - Creates sessions directory if needed
- `loadSession(sessionId)` - Reads session history from file
- `saveSession(sessionId, history, metadata)` - Writes session to file
- `loadSessionMetadata(sessionId)` - Reads session metadata
- `saveSessionMetadata(sessionId, metadata)` - Updates session metadata
- `listSessions()` - Returns all sessions with metadata, sorted by last modified
- `deleteSession(sessionId)` - Removes session and metadata files
**Cortex Router Updates** [cortex/router.py](cortex/router.py)
- Added `backend` field to `ReasonRequest` Pydantic model (optional)
- Created `/simple` endpoint for Standard Mode:
- Bypasses reflection, reasoning, refinement stages
- Direct LLM call with conversation context
- Uses backend from request or falls back to `STANDARD_MODE_LLM` env variable
- Returns simple response structure without reasoning artifacts
- Backend selection logic in `/simple`:
- Normalizes backend names to uppercase
- Maps UI backend names to system backend names
- Validates backend availability before calling
**Intake Integration** [cortex/intake/intake.py](cortex/intake/intake.py)
- Added `get_recent_messages(session_id, limit)` function:
- Retrieves last N messages from session buffer
- Returns empty list if session doesn't exist
- Used by `/simple` endpoint for context retrieval
**LLM Router Enhancements** [cortex/llm/llm_router.py](cortex/llm/llm_router.py)
- Added `messages` parameter support across all providers
- Automatic message-to-prompt conversion for legacy APIs
- Chat completion format for Ollama and OpenAI providers
- Stop sequences for MI50/DeepSeek R1 to prevent runaway generation:
- `"User:"`, `"\nUser:"`, `"Assistant:"`, `"\n\n\n"`
**Environment Configuration** [.env](.env)
- Added `STANDARD_MODE_LLM=SECONDARY` for default Standard Mode backend
- Added `CORTEX_SIMPLE_URL=http://cortex:7081/simple` for routing
**UI Architecture** [core/ui/index.html](core/ui/index.html)
- Server-based session loading system:
- `loadSessionsFromServer()` - Fetches sessions from Relay API
- `renderSessions()` - Populates session dropdown from server data
- Session state synchronized with server on every change
- Backend selection persistence:
- Loads saved backend from localStorage on page load
- Includes backend parameter in request body when in Standard Mode
- Settings modal pre-selects current backend choice
- Dark mode by default:
- Checks localStorage for theme preference
- Sets dark theme if no preference found
- Toggle button updates localStorage and applies theme
**CSS Styling** [core/ui/style.css](core/ui/style.css)
- Light mode CSS variables:
- `--bg-dark: #f5f5f5` (light background)
- `--text-main: #1a1a1a` (dark text)
- `--text-fade: #666` (dimmed text)
- Dark mode CSS variables (default):
- `--bg-dark: #0a0a0a` (dark background)
- `--text-main: #e6e6e6` (light text)
- `--text-fade: #999` (dimmed text)
- Modal positioning fixes:
- `position: fixed` with `top: 50%`, `left: 50%`, `transform: translate(-50%, -50%)`
- Z-index layering: overlay (999), content (1001)
- Backdrop blur effect on modal overlay
- Session list styling:
- Session item cards with hover effects
- Delete button with red hover state
- Message count and timestamp display
### Fixed - Critical Issues
**DeepSeek R1 Runaway Generation**
- Root cause: R1 reasoning model generates thinking process and hallucinates conversations
- Solution:
- Changed `STANDARD_MODE_LLM` to SECONDARY (Ollama/Qwen) instead of PRIMARY (MI50/R1)
- Added stop sequences to MI50 provider to prevent continuation
- Documented R1 limitations for Standard Mode usage
**Context Not Maintained in Standard Mode**
- Root cause: `/simple` endpoint didn't retrieve conversation history from Intake
- Solution:
- Created `get_recent_messages()` function in intake.py
- Standard Mode now pulls last 20 messages from session buffer
- Full context sent to LLM with each request
- User feedback: "it's saying it hasn't received any other messages from me, so it looks like the standard mode llm isn't getting the full chat"
**OpenAI Backend 400 Errors**
- Root cause: OpenAI provider only accepted prompt strings, not messages arrays
- Solution: Updated OpenAI provider to support messages parameter like Ollama
- Now handles chat completion format correctly
**Modal Formatting Issues**
- Root cause: Settings modal inside #chat container with overflow constraints
- Symptoms: Modal appearing at bottom, jumbled layout, couldn't close
- Solution:
- Moved modal outside #chat container to be direct child of body
- Changed positioning from absolute to fixed
- Added proper z-index layering (overlay: 999, content: 1001)
- Removed old model selector from header
- User feedback: "the formating for the settings is all off. Its at the bottom and all jumbling together, i cant get it to go away"
**Session Persistence Broken**
- Root cause: Sessions stored only in localStorage, not synced with server
- Symptoms: Sessions didn't persist across browsers or reboots, couldn't load messages
- Solution: Complete rewrite of session system
- Implemented server-side file persistence in Relay
- Created CRUD API endpoints for session management
- Updated UI to load sessions from server instead of localStorage
- Added metadata system for session names
- Sessions now survive container restarts and sync across browsers
- User feedback: "sessions seem to exist locally only, i cant get them to actually load any messages and there is now way to delete them. If i open the ui in a different browser those arent there."
### Technical Improvements
**Backward Compatibility**
- All changes include defaults to maintain existing behavior
- Cortex Mode completely unchanged - still uses full 7-stage pipeline
- Standard Mode is opt-in via UI mode selector
- If no backend specified, falls back to `STANDARD_MODE_LLM` env variable
- Existing requests without mode parameter default to "cortex"
**Code Quality**
- Consistent async/await patterns throughout stack
- Proper error handling with fallbacks
- Clean separation between Standard and Cortex modes
- Session persistence abstracted into helper functions
- Modular UI code with clear event handlers
**Performance**
- Standard Mode bypasses 6 of 7 reasoning stages for faster responses
- Session loading optimized with file-based caching
- Backend selection happens once per message, not per LLM call
- Minimal overhead for mode detection and routing
### Architecture - Dual-Mode Chat System
**Standard Mode Flow:**
```
User (UI) → Relay → Cortex /simple → Intake (get_recent_messages)
→ LLM (direct call with context) → Relay → UI
```
**Cortex Mode Flow (Unchanged):**
```
User (UI) → Relay → Cortex /reason → Reflection → Reasoning
→ Refinement → Persona → Relay → UI
```
**Session Persistence:**
```
UI → POST /sessions/:id → Relay → File system (sessions/*.json)
UI → GET /sessions → Relay → List all sessions → UI dropdown
```
### Known Limitations
**Standard Mode:**
- No reflection, reasoning, or refinement stages
- No RAG integration (same as Cortex Mode - currently disabled)
- No NeoMem memory storage (same as Cortex Mode - currently disabled)
- DeepSeek R1 not recommended for Standard Mode (generates reasoning artifacts)
**Session Management:**
- Sessions stored in container filesystem - need volume mount for true persistence
- No session import/export functionality yet
- No session search or filtering
### Migration Notes
**For Users Upgrading:**
1. Existing sessions in localStorage will not automatically migrate to server
2. Create new sessions after upgrade for server-side persistence
3. Theme preference (light/dark) will be preserved from localStorage
4. Backend preference will default to SECONDARY if not previously set
**For Developers:**
1. Relay now requires `fs/promises` for session persistence
2. Cortex `/simple` endpoint expects `backend` parameter (optional)
3. UI sends `mode` and `backend` parameters in request body
4. Session files stored in `core/relay/sessions/` directory
---
## [0.6.0] - 2025-12-18
### Added - Autonomy System (Phase 1 & 2)
+178
View File
@@ -0,0 +1,178 @@
# Logging System Migration Complete
## ✅ What Changed
The old `VERBOSE_DEBUG` logging system has been completely replaced with the new structured `LOG_DETAIL_LEVEL` system.
### Files Modified
1. **[.env](.env)** - Removed `VERBOSE_DEBUG`, cleaned up duplicate `LOG_DETAIL_LEVEL` settings
2. **[cortex/.env](cortex/.env)** - Removed `VERBOSE_DEBUG` from cortex config
3. **[cortex/router.py](cortex/router.py)** - Replaced `VERBOSE_DEBUG` checks with `LOG_DETAIL_LEVEL`
4. **[cortex/context.py](cortex/context.py)** - Replaced `VERBOSE_DEBUG` with `LOG_DETAIL_LEVEL`, removed verbose file logging setup
## 🎯 New Logging Configuration
### Single Environment Variable
Set `LOG_DETAIL_LEVEL` in your `.env` file:
```bash
LOG_DETAIL_LEVEL=detailed
```
### Logging Levels
| Level | Lines/Message | What You See |
|-------|---------------|--------------|
| **minimal** | 1-2 | Only errors and critical events |
| **summary** | 5-7 | Pipeline completion, errors, warnings (production mode) |
| **detailed** | 30-50 | LLM outputs, timing breakdowns, context (debugging mode) |
| **verbose** | 100+ | Everything including raw JSON dumps (deep debugging) |
## 📊 What You Get at Each Level
### Summary Mode (Production)
```
📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 3 results
🧠 Monologue | question | Tone: curious
====================================================================================================
✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
====================================================================================================
📤 Output: 342 characters
====================================================================================================
```
### Detailed Mode (Debugging - RECOMMENDED)
```
====================================================================================================
🚀 PIPELINE START | Session: abc123 | 14:23:45.123
====================================================================================================
📝 User: What is the meaning of life?
────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
────────────────────────────────────────────────────────────────────────────────────────────────────
📝 Prompt: You are Lyra, analyzing the user's question...
💬 Reply: Based on the context provided, here's my analysis...
────────────────────────────────────────────────────────────────────────────────────────────────────
📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 3 results
────────────────────────────────────────────────────────────────────────────────────────────────────
[CONTEXT] Session abc123 | User: What is the meaning of life?
────────────────────────────────────────────────────────────────────────────────────────────────────
Mode: default | Mood: neutral | Project: None
Tools: RAG, WEB, WEATHER, CODEBRAIN, POKERBRAIN
╭─ INTAKE SUMMARIES ────────────────────────────────────────────────
│ L1 : Last message discussed philosophy...
│ L5 : Recent 5 messages covered existential topics...
│ L10 : Past 10 messages showed curiosity pattern...
╰───────────────────────────────────────────────────────────────────
╭─ RAG RESULTS (3) ──────────────────────────────────────────────
│ [1] 0.923 | Previous discussion about purpose...
│ [2] 0.891 | Note about existential philosophy...
│ [3] 0.867 | Memory of Viktor Frankl discussion...
╰───────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
🧠 Monologue | question | Tone: curious
====================================================================================================
✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
====================================================================================================
⏱️ Stage Timings:
context : 150ms ( 12.0%)
identity : 10ms ( 0.8%)
monologue : 200ms ( 16.0%)
tools : 0ms ( 0.0%)
reflection : 50ms ( 4.0%)
reasoning : 450ms ( 36.0%) ← BOTTLENECK!
refinement : 300ms ( 24.0%)
persona : 140ms ( 11.2%)
learning : 50ms ( 4.0%)
📤 Output: 342 characters
====================================================================================================
```
### Verbose Mode (Maximum Debug)
Same as detailed, plus:
- Full raw JSON responses from LLMs (50-line boxes)
- Complete intake data structures
- Stack traces on errors
## 🚀 How to Use
### For Finding Weak Links (Your Use Case)
```bash
# In .env:
LOG_DETAIL_LEVEL=detailed
# Restart services:
docker-compose restart cortex relay
```
You'll now see:
- ✅ Which LLM backend is used
- ✅ What prompts are sent to each LLM
- ✅ What each LLM responds with
- ✅ Timing breakdown showing which stage is slow
- ✅ Context being used (RAG, intake summaries)
- ✅ Clean, hierarchical structure
### For Production
```bash
LOG_DETAIL_LEVEL=summary
```
### For Deep Debugging
```bash
LOG_DETAIL_LEVEL=verbose
```
## 🔍 Finding Performance Bottlenecks
With `detailed` mode, look for:
1. **Slow stages in timing breakdown:**
```
reasoning : 3450ms ( 76.0%) ← THIS IS YOUR BOTTLENECK!
```
2. **Backend failures:**
```
⚠️ [LLM] PRIMARY failed | 14:23:45.234 | Connection timeout
✅ [LLM] SECONDARY | Reply: Based on... ← Fell back to secondary
```
3. **Loop detection:**
```
⚠️ DUPLICATE MESSAGE DETECTED | Session: abc123
🔁 LOOP DETECTED - Returning cached context
```
## 📁 Removed Features
The following old logging features have been removed:
- ❌ `VERBOSE_DEBUG` environment variable (replaced with `LOG_DETAIL_LEVEL`)
- ❌ File logging to `/app/logs/cortex_verbose_debug.log` (use `docker logs` instead)
- ❌ Separate verbose handlers in Python logging
- ❌ Per-module verbose flags
## ✨ New Features
- ✅ Single unified logging configuration
- ✅ Hierarchical, scannable output
- ✅ Collapsible data sections (boxes)
- ✅ Stage timing always shown in detailed mode
- ✅ Performance profiling built-in
- ✅ Loop detection and warnings
- ✅ Clean error formatting
---
**The logging is now clean, concise, and gives you exactly what you need to find weak links!** 🎯
+176
View File
@@ -0,0 +1,176 @@
# Cortex Logging Quick Reference
## 🎯 TL;DR
**Finding weak links in the LLM chain?**
```bash
export LOG_DETAIL_LEVEL=detailed
export VERBOSE_DEBUG=true
```
**Production use?**
```bash
export LOG_DETAIL_LEVEL=summary
```
---
## 📊 Log Levels Comparison
| Level | Output Lines/Message | Use Case | Raw LLM Output? |
|-------|---------------------|----------|-----------------|
| **minimal** | 1-2 | Silent production | ❌ No |
| **summary** | 5-7 | Production (DEFAULT) | ❌ No |
| **detailed** | 30-50 | Debugging, finding bottlenecks | ✅ Parsed only |
| **verbose** | 100+ | Deep debugging, seeing raw data | ✅ Full JSON |
---
## 🔍 Common Debugging Tasks
### See Raw LLM Outputs
```bash
export LOG_DETAIL_LEVEL=verbose
```
Look for:
```
╭─ RAW RESPONSE ────────────────────────────────────
│ { "choices": [ { "message": { "content": "..." } } ] }
╰───────────────────────────────────────────────────
```
### Find Performance Bottlenecks
```bash
export LOG_DETAIL_LEVEL=detailed
```
Look for:
```
⏱️ Stage Timings:
reasoning : 3450ms ( 76.0%) ← SLOW!
```
### Check Which RAG Memories Are Used
```bash
export LOG_DETAIL_LEVEL=detailed
```
Look for:
```
╭─ RAG RESULTS (5) ──────────────────────────────
│ [1] 0.923 | Memory content...
```
### Detect Loops
```bash
export ENABLE_DUPLICATE_DETECTION=true # (default)
```
Look for:
```
⚠️ DUPLICATE MESSAGE DETECTED
🔁 LOOP DETECTED - Returning cached context
```
### See All Backend Failures
```bash
export LOG_DETAIL_LEVEL=summary # or higher
```
Look for:
```
⚠️ [LLM] PRIMARY failed | Connection timeout
⚠️ [LLM] SECONDARY failed | Model not found
✅ [LLM] CLOUD | Reply: Based on...
```
---
## 🛠️ Environment Variables Cheat Sheet
```bash
# Verbosity Control
LOG_DETAIL_LEVEL=detailed # minimal | summary | detailed | verbose
VERBOSE_DEBUG=false # true = maximum verbosity (legacy)
# Raw Data Visibility
LOG_RAW_CONTEXT_DATA=false # Show full intake L1-L30 dumps
# Loop Protection
ENABLE_DUPLICATE_DETECTION=true # Detect duplicate messages
MAX_MESSAGE_HISTORY=100 # Trim history after N messages
SESSION_TTL_HOURS=24 # Expire sessions after N hours
# Features
NEOMEM_ENABLED=false # Enable long-term memory
ENABLE_AUTONOMOUS_TOOLS=true # Enable tool invocation
ENABLE_PROACTIVE_MONITORING=true # Enable suggestions
```
---
## 📋 Sample Output
### Summary Mode (Default - Production)
```
✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question...
📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
🧠 Monologue | question | Tone: curious
✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
📤 Output: 342 characters
```
### Detailed Mode (Debugging)
```
════════════════════════════════════════════════════════════════════════════
🚀 PIPELINE START | Session: abc123 | 14:23:45.123
════════════════════════════════════════════════════════════════════════════
📝 User: What is the meaning of life?
────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────
🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
────────────────────────────────────────────────────────────────────────────
📝 Prompt: You are Lyra, a thoughtful AI assistant...
💬 Reply: Based on philosophical perspectives...
📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
╭─ RAG RESULTS (5) ──────────────────────────────
│ [1] 0.923 | Previous philosophy discussion...
│ [2] 0.891 | Existential note...
╰────────────────────────────────────────────────
════════════════════════════════════════════════════════════════════════════
✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
════════════════════════════════════════════════════════════════════════════
⏱️ Stage Timings:
context : 150ms ( 12.0%)
reasoning : 450ms ( 36.0%) ← Largest component
persona : 140ms ( 11.2%)
📤 Output: 342 characters
════════════════════════════════════════════════════════════════════════════
```
---
## ⚡ Quick Troubleshooting
| Symptom | Check | Fix |
|---------|-------|-----|
| **Logs too verbose** | Current level | Set `LOG_DETAIL_LEVEL=summary` |
| **Can't see LLM outputs** | Current level | Set `LOG_DETAIL_LEVEL=detailed` or `verbose` |
| **Repeating operations** | Loop warnings | Check for `🔁 LOOP DETECTED` messages |
| **Slow responses** | Stage timings | Look for stages >1000ms in detailed mode |
| **Missing RAG data** | NEOMEM_ENABLED | Set `NEOMEM_ENABLED=true` |
| **Out of memory** | Message history | Lower `MAX_MESSAGE_HISTORY` |
---
## 📁 Key Files
- **[.env.logging.example](.env.logging.example)** - Full configuration guide
- **[LOGGING_REFACTOR_SUMMARY.md](LOGGING_REFACTOR_SUMMARY.md)** - Detailed explanation
- **[cortex/utils/logging_utils.py](cortex/utils/logging_utils.py)** - Logging utilities
- **[cortex/context.py](cortex/context.py)** - Context + loop protection
- **[cortex/router.py](cortex/router.py)** - Pipeline stages
- **[core/relay/lib/llm.js](core/relay/lib/llm.js)** - LLM backend logging
---
**Need more detail? See [LOGGING_REFACTOR_SUMMARY.md](LOGGING_REFACTOR_SUMMARY.md)**
+352
View File
@@ -0,0 +1,352 @@
# Cortex Logging Refactor Summary
## 🎯 Problem Statement
The cortex chat loop had severe logging issues that made debugging impossible:
1. **Massive verbosity**: 100+ log lines per chat message
2. **Raw LLM dumps**: Full JSON responses pretty-printed on every call (1000s of lines)
3. **Repeated data**: NeoMem results logged 71 times individually
4. **No structure**: Scattered emoji logs with no hierarchy
5. **Impossible to debug**: Couldn't tell if loops were happening or just verbose logging
6. **No loop protection**: Unbounded message history growth, no session cleanup, no duplicate detection
## ✅ What Was Fixed
### 1. **Structured Hierarchical Logging**
**Before:**
```
🔍 RAW LLM RESPONSE: {
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1234567890,
"model": "gpt-4",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Here is a very long response that goes on for hundreds of lines..."
}
}
],
"usage": {
"prompt_tokens": 123,
"completion_tokens": 456,
"total_tokens": 579
}
}
🧠 Trying backend: PRIMARY (http://localhost:8000)
✅ Success via PRIMARY
[STAGE 0] Collecting unified context...
[STAGE 0] Context collected - 5 RAG results
[COLLECT_CONTEXT] Intake data retrieved:
{
"L1": [...],
"L5": [...],
"L10": {...},
"L20": {...},
"L30": {...}
}
[COLLECT_CONTEXT] NeoMem search returned 71 results
[1] Score: 0.923 - Memory content here...
[2] Score: 0.891 - More memory content...
[3] Score: 0.867 - Even more content...
... (68 more lines)
```
**After (summary mode - DEFAULT):**
```
✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question about...
📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
🧠 Monologue | question | Tone: curious
✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
📤 Output: 342 characters
```
**After (detailed mode - for debugging):**
```
════════════════════════════════════════════════════════════════════════════════════════════════════
🚀 PIPELINE START | Session: abc123 | 14:23:45.123
════════════════════════════════════════════════════════════════════════════════════════════════════
📝 User: What is the meaning of life?
────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
🧠 LLM CALL | Backend: PRIMARY | 14:23:45.234
────────────────────────────────────────────────────────────────────────────────────────────────────
📝 Prompt: You are Lyra, a thoughtful AI assistant...
💬 Reply: Based on philosophical perspectives, the meaning...
📊 Context | Session: abc123 | Messages: 42 | Last: 5.2min | RAG: 5 results
────────────────────────────────────────────────────────────────────────────────────────────────────
[CONTEXT] Session abc123 | User: What is the meaning of life?
────────────────────────────────────────────────────────────────────────────────────────────────────
Mode: default | Mood: neutral | Project: None
Tools: RAG, WEB, WEATHER, CODEBRAIN, POKERBRAIN
╭─ INTAKE SUMMARIES ────────────────────────────────────────────────
│ L1 : Last message discussed philosophy...
│ L5 : Recent 5 messages covered existential topics...
│ L10 : Past 10 messages showed curiosity pattern...
│ L20 : Session focused on deep questions...
│ L30 : Long-term trend shows philosophical interest...
╰───────────────────────────────────────────────────────────────────
╭─ RAG RESULTS (5) ──────────────────────────────────────────────
│ [1] 0.923 | Previous discussion about purpose and meaning...
│ [2] 0.891 | Note about existential philosophy...
│ [3] 0.867 | Memory of Viktor Frankl discussion...
│ [4] 0.834 | Reference to stoic philosophy...
│ [5] 0.801 | Buddhism and the middle path...
╰───────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────
════════════════════════════════════════════════════════════════════════════════════════════════════
✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
════════════════════════════════════════════════════════════════════════════════════════════════════
⏱️ Stage Timings:
context : 150ms ( 12.0%)
identity : 10ms ( 0.8%)
monologue : 200ms ( 16.0%)
tools : 0ms ( 0.0%)
reflection : 50ms ( 4.0%)
reasoning : 450ms ( 36.0%)
refinement : 300ms ( 24.0%)
persona : 140ms ( 11.2%)
📤 Output: 342 characters
════════════════════════════════════════════════════════════════════════════════════════════════════
```
### 2. **Configurable Verbosity Levels**
Set via `LOG_DETAIL_LEVEL` environment variable:
- **`minimal`**: Only errors and critical events
- **`summary`**: Stage completion + errors (DEFAULT - recommended for production)
- **`detailed`**: Include raw LLM outputs, RAG results, timing breakdowns (for debugging)
- **`verbose`**: Everything including full JSON dumps (for deep debugging)
### 3. **Raw LLM Output Visibility** ✅
**You can now see raw LLM outputs clearly!**
In `detailed` or `verbose` mode, LLM calls show:
- Backend used
- Prompt preview
- Parsed reply
- **Raw JSON response in collapsible format** (verbose only)
```
╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────
│ {
│ "id": "chatcmpl-123",
│ "object": "chat.completion",
│ "model": "gpt-4",
│ "choices": [
│ {
│ "message": {
│ "content": "Full response here..."
│ }
│ }
│ ]
│ }
╰───────────────────────────────────────────────────────────────────────────────────────────
```
### 4. **Loop Detection & Protection** ✅
**New safety features:**
- **Duplicate message detection**: Prevents processing the same message twice
- **Message history trimming**: Auto-trims to last 100 messages (configurable via `MAX_MESSAGE_HISTORY`)
- **Session TTL**: Auto-expires inactive sessions after 24 hours (configurable via `SESSION_TTL_HOURS`)
- **Hash-based detection**: Uses MD5 hash to detect exact duplicate messages
**Example warning when loop detected:**
```
⚠️ DUPLICATE MESSAGE DETECTED | Session: abc123 | Message: What is the meaning of life?
🔁 LOOP DETECTED - Returning cached context to prevent processing duplicate
```
### 5. **Performance Timing** ✅
In `detailed` mode, see exactly where time is spent:
```
⏱️ Stage Timings:
context : 150ms ( 12.0%) ← Context collection
identity : 10ms ( 0.8%) ← Identity loading
monologue : 200ms ( 16.0%) ← Inner monologue
tools : 0ms ( 0.0%) ← Autonomous tools
reflection : 50ms ( 4.0%) ← Reflection notes
reasoning : 450ms ( 36.0%) ← Main reasoning (BOTTLENECK)
refinement : 300ms ( 24.0%) ← Answer refinement
persona : 140ms ( 11.2%) ← Persona layer
```
**This helps you identify weak links in the chain!**
## 📁 Files Modified
### Core Changes
1. **[llm.js](core/relay/lib/llm.js)**
- Removed massive JSON dump on line 53
- Added structured logging with 4 verbosity levels
- Shows raw responses only in verbose mode (collapsible format)
- Tracks failed backends and shows summary on total failure
2. **[context.py](cortex/context.py)**
- Condensed 71-line NeoMem loop to 5-line summary
- Removed repeated intake data dumps
- Added structured hierarchical logging with boxes
- Added duplicate message detection
- Added message history trimming
- Added session TTL and cleanup
3. **[router.py](cortex/router.py)**
- Replaced 15+ stage logs with unified pipeline summary
- Added stage timing collection
- Shows performance breakdown in detailed mode
- Clean start/end markers with total duration
### New Files
4. **[utils/logging_utils.py](cortex/utils/logging_utils.py)** (NEW)
- Reusable structured logging utilities
- `PipelineLogger` class for hierarchical logging
- Collapsible data sections
- Stage timing tracking
- Future-ready for expansion
5. **[.env.logging.example](.env.logging.example)** (NEW)
- Complete logging configuration guide
- Shows example output at each verbosity level
- Documents all environment variables
- Production-ready defaults
6. **[LOGGING_REFACTOR_SUMMARY.md](LOGGING_REFACTOR_SUMMARY.md)** (THIS FILE)
## 🚀 How to Use
### For Finding Weak Links (Your Use Case)
```bash
# Set in your .env or export:
export LOG_DETAIL_LEVEL=detailed
export VERBOSE_DEBUG=false # or true for even more detail
# Now run your chat - you'll see:
# 1. Which LLM backend is used
# 2. Raw LLM outputs (in verbose mode)
# 3. Exact timing per stage
# 4. Which stage is taking longest
```
### For Production
```bash
export LOG_DETAIL_LEVEL=summary
# Minimal, clean logs:
# ✅ [LLM] PRIMARY | 14:23:45.123 | Reply: Based on your question...
# ✨ PIPELINE COMPLETE | Session: abc123 | Total: 1250ms
```
### For Deep Debugging
```bash
export LOG_DETAIL_LEVEL=verbose
export LOG_RAW_CONTEXT_DATA=true
# Shows EVERYTHING including full JSON dumps
```
## 🔍 Finding Weak Links - Quick Guide
**Problem: "Which LLM stage is failing or producing bad output?"**
1. Set `LOG_DETAIL_LEVEL=detailed`
2. Run a test conversation
3. Look for timing anomalies:
```
reasoning : 3450ms ( 76.0%) ← BOTTLENECK!
```
4. Look for errors:
```
⚠️ Reflection failed: Connection timeout
```
5. Check raw LLM outputs (set `VERBOSE_DEBUG=true`):
```
╭─ RAW RESPONSE ────────────────────────────────────
│ {
│ "choices": [
│ { "message": { "content": "..." } }
│ ]
│ }
╰───────────────────────────────────────────────────
```
**Problem: "Is the loop repeating operations?"**
1. Enable duplicate detection (on by default)
2. Look for loop warnings:
```
⚠️ DUPLICATE MESSAGE DETECTED | Session: abc123
🔁 LOOP DETECTED - Returning cached context
```
3. Check stage timings - repeated stages will show up as duplicates
**Problem: "Which RAG memories are being used?"**
1. Set `LOG_DETAIL_LEVEL=detailed`
2. Look for RAG results box:
```
╭─ RAG RESULTS (5) ──────────────────────────────
│ [1] 0.923 | Previous discussion about X...
│ [2] 0.891 | Note about Y...
╰────────────────────────────────────────────────
```
## 📊 Environment Variables Reference
| Variable | Default | Description |
|----------|---------|-------------|
| `LOG_DETAIL_LEVEL` | `summary` | Verbosity: minimal/summary/detailed/verbose |
| `VERBOSE_DEBUG` | `false` | Legacy flag for maximum verbosity |
| `LOG_RAW_CONTEXT_DATA` | `false` | Show full intake data dumps |
| `ENABLE_DUPLICATE_DETECTION` | `true` | Detect and prevent duplicate messages |
| `MAX_MESSAGE_HISTORY` | `100` | Max messages to keep per session |
| `SESSION_TTL_HOURS` | `24` | Auto-expire sessions after N hours |
## 🎉 Results
**Before:** 1000+ lines of logs per chat message, unreadable, couldn't identify issues
**After (summary mode):** 5 lines of structured logs, clear and actionable
**After (detailed mode):** ~50 lines with full visibility into each stage, timing, and raw outputs
**Loop protection:** Automatic detection and prevention of duplicate processing
**You can now:**
✅ See raw LLM outputs clearly (in detailed/verbose mode)
✅ Identify performance bottlenecks (stage timings)
✅ Detect loops and duplicates (automatic)
✅ Find failing stages (error markers)
✅ Scan logs quickly (hierarchical structure)
✅ Debug production issues (adjustable verbosity)
## 🔧 Next Steps (Optional Improvements)
1. **Structured JSON logging**: Output as JSON for log aggregation tools
2. **Log rotation**: Implement file rotation for verbose logs
3. **Metrics export**: Export stage timings to Prometheus/Grafana
4. **Error categorization**: Tag errors by type (network, timeout, parsing, etc.)
5. **Performance alerts**: Auto-alert when stages exceed thresholds
---
**Happy debugging! You can now see what's actually happening in the cortex loop.** 🎯
+195 -28
View File
@@ -1,10 +1,12 @@
# Project Lyra - README v0.6.0
# Project Lyra - README v0.7.0
Lyra is a modular persistent AI companion system with advanced reasoning capabilities and autonomous decision-making.
It provides memory-backed chat using **Relay** + **Cortex** with integrated **Autonomy System**,
featuring a multi-stage reasoning pipeline powered by HTTP-based LLM backends.
**Current Version:** v0.6.0 (2025-12-18)
**NEW in v0.7.0:** Standard Mode for simple chatbot functionality + UI backend selection + server-side session persistence
**Current Version:** v0.7.0 (2025-12-21)
> **Note:** As of v0.6.0, NeoMem is **disabled by default** while we work out integration hiccups in the pipeline. The autonomy system is being refined independently before full memory integration.
@@ -25,14 +27,18 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do
- Coordinates all module interactions
- OpenAI-compatible endpoint: `POST /v1/chat/completions`
- Internal endpoint: `POST /chat`
- Routes messages through Cortex reasoning pipeline
- Dual-mode routing: Standard Mode (simple chat) or Cortex Mode (full reasoning)
- Server-side session persistence with file-based storage
- Session management API: `GET/POST/PATCH/DELETE /sessions`
- Manages async calls to Cortex ingest
- *(NeoMem integration currently disabled in v0.6.0)*
**2. UI** (Static HTML)
- Browser-based chat interface with cyberpunk theme
- Connects to Relay
- Saves and loads sessions
- **NEW:** Mode selector (Standard/Cortex) in header
- **NEW:** Settings modal with backend selection and session management
- **NEW:** Light/Dark mode toggle (dark by default)
- Server-synced session management (persists across browsers and reboots)
- OpenAI-compatible message format
**3. NeoMem** (Python/FastAPI) - Port 7077 - **DISABLED IN v0.6.0**
@@ -49,7 +55,13 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do
- Primary reasoning engine with multi-stage pipeline and autonomy system
- **Includes embedded Intake module** (no separate service as of v0.5.1)
- **Integrated Autonomy System** (NEW in v0.6.0) - See Autonomy System section below
- **4-Stage Processing:**
- **Dual Operating Modes:**
- **Standard Mode** (NEW in v0.7.0) - Simple chatbot with context retention
- Bypasses reflection, reasoning, refinement stages
- Direct LLM call with conversation history
- User-selectable backend (SECONDARY, OPENAI, or custom)
- Faster responses for coding and practical tasks
- **Cortex Mode** - Full 4-stage reasoning pipeline
1. **Reflection** - Generates meta-awareness notes about conversation
2. **Reasoning** - Creates initial draft answer using context
3. **Refinement** - Polishes and improves the draft
@@ -57,7 +69,8 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do
- Integrates with Intake for short-term context via internal Python imports
- Flexible LLM router supporting multiple backends via HTTP
- **Endpoints:**
- `POST /reason` - Main reasoning pipeline
- `POST /reason` - Main reasoning pipeline (Cortex Mode)
- `POST /simple` - Direct LLM chat (Standard Mode) **NEW in v0.7.0**
- `POST /ingest` - Receives conversation exchanges from Relay
- `GET /health` - Service health check
- `GET /debug/sessions` - Inspect in-memory SESSIONS state
@@ -129,12 +142,38 @@ The autonomy system operates in coordinated layers, all maintaining state in `se
---
## Data Flow Architecture (v0.6.0)
## Data Flow Architecture (v0.7.0)
### Normal Message Flow:
### Standard Mode Flow (NEW in v0.7.0):
```
User (UI) → POST /v1/chat/completions
User (UI) → POST /v1/chat/completions {mode: "standard", backend: "SECONDARY"}
Relay (7078)
↓ POST /simple
Cortex (7081)
↓ (internal Python call)
Intake module → get_recent_messages() (last 20 messages)
Direct LLM call (user-selected backend: SECONDARY/OPENAI/custom)
Returns simple response to Relay
Relay → POST /ingest (async)
Cortex → add_exchange_internal() → SESSIONS buffer
Relay → POST /sessions/:id (save session to file)
Relay → UI (returns final response)
Note: Bypasses reflection, reasoning, refinement, persona stages
```
### Cortex Mode Flow (Full Reasoning):
```
User (UI) → POST /v1/chat/completions {mode: "cortex"}
Relay (7078)
↓ POST /reason
@@ -158,11 +197,26 @@ Cortex → add_exchange_internal() → SESSIONS buffer
Autonomy System → Update self_state.json (pattern tracking)
Relay → POST /sessions/:id (save session to file)
Relay → UI (returns final response)
Note: NeoMem integration disabled in v0.6.0
```
### Session Persistence Flow (NEW in v0.7.0):
```
UI loads → GET /sessions → Relay → List all sessions from files → UI dropdown
User sends message → POST /sessions/:id → Relay → Save to sessions/*.json
User renames session → PATCH /sessions/:id/metadata → Relay → Update *.meta.json
User deletes session → DELETE /sessions/:id → Relay → Remove session files
Sessions stored in: core/relay/sessions/
- {sessionId}.json (conversation history)
- {sessionId}.meta.json (name, timestamps, metadata)
```
### Cortex 4-Stage Reasoning Pipeline:
1. **Reflection** (`reflection.py`) - Cloud LLM (OpenAI)
@@ -196,6 +250,14 @@ Note: NeoMem integration disabled in v0.6.0
- OpenAI-compatible endpoint: `POST /v1/chat/completions`
- Internal endpoint: `POST /chat`
- Health check: `GET /_health`
- **NEW:** Dual-mode routing (Standard/Cortex)
- **NEW:** Server-side session persistence with CRUD API
- **NEW:** Session management endpoints:
- `GET /sessions` - List all sessions
- `GET /sessions/:id` - Retrieve session history
- `POST /sessions/:id` - Save session history
- `PATCH /sessions/:id/metadata` - Update session metadata
- `DELETE /sessions/:id` - Delete session
- Async non-blocking calls to Cortex
- Shared request handler for code reuse
- Comprehensive error handling
@@ -210,19 +272,35 @@ Note: NeoMem integration disabled in v0.6.0
**UI**:
- Lightweight static HTML chat interface
- Cyberpunk theme
- Session save/load functionality
- Cyberpunk theme with light/dark mode toggle
- **NEW:** Mode selector (Standard/Cortex) in header
- **NEW:** Settings modal (⚙ button) with:
- Backend selection for Standard Mode (SECONDARY/OPENAI/custom)
- Session management (view, delete sessions)
- Theme toggle (dark mode default)
- **NEW:** Server-synced session management
- Sessions persist across browsers and reboots
- Rename sessions with custom names
- Delete sessions with confirmation
- Automatic session save on every message
- OpenAI message format support
### Reasoning Layer
**Cortex** (v0.5.1):
- Multi-stage reasoning pipeline (reflection → reasoning → refine → persona)
**Cortex** (v0.7.0):
- **NEW:** Dual operating modes:
- **Standard Mode** - Simple chat with context (`/simple` endpoint)
- User-selectable backend (SECONDARY, OPENAI, or custom)
- Full conversation history via Intake integration
- Bypasses reasoning pipeline for faster responses
- **Cortex Mode** - Full reasoning pipeline (`/reason` endpoint)
- Multi-stage processing: reflection → reasoning → refine → persona
- Per-stage backend selection
- Autonomy system integration
- Flexible LLM backend routing via HTTP
- Per-stage backend selection
- Async processing throughout
- Embedded Intake module for short-term context
- `/reason`, `/ingest`, `/health`, `/debug/sessions`, `/debug/summary` endpoints
- `/reason`, `/simple`, `/ingest`, `/health`, `/debug/sessions`, `/debug/summary` endpoints
- Lenient error handling - never fails the chat pipeline
**Intake** (Embedded Module):
@@ -327,7 +405,28 @@ The following LLM backends are accessed via HTTP (not part of docker-compose):
## Version History
### v0.6.0 (2025-12-18) - Current Release
### v0.7.0 (2025-12-21) - Current Release
**Major Features: Standard Mode + Backend Selection + Session Persistence**
- ✅ Added Standard Mode for simple chatbot functionality
- ✅ UI mode selector (Standard/Cortex) in header
- ✅ Settings modal with backend selection for Standard Mode
- ✅ Server-side session persistence with file-based storage
- ✅ Session management UI (view, rename, delete sessions)
- ✅ Light/Dark mode toggle (dark by default)
- ✅ Context retention in Standard Mode via Intake integration
- ✅ Fixed modal positioning and z-index issues
- ✅ Cortex `/simple` endpoint for direct LLM calls
- ✅ Session CRUD API in Relay
- ✅ Full backward compatibility - Cortex Mode unchanged
**Key Changes:**
- Standard Mode bypasses 6 of 7 reasoning stages for faster responses
- Sessions now sync across browsers and survive container restarts
- User can select SECONDARY (Ollama), OPENAI, or custom backend for Standard Mode
- Theme preference and backend selection persisted in localStorage
- Session files stored in `core/relay/sessions/` directory
### v0.6.0 (2025-12-18)
**Major Feature: Autonomy System (Phase 1, 2, and 2.5)**
- ✅ Added autonomous decision-making framework
- ✅ Implemented executive planning and goal-setting layer
@@ -394,30 +493,39 @@ The following LLM backends are accessed via HTTP (not part of docker-compose):
---
## Known Issues (v0.6.0)
## Known Issues (v0.7.0)
### Temporarily Disabled (v0.6.0)
### Temporarily Disabled
- **NeoMem disabled by default** - Being refined independently before full integration
- PostgreSQL + pgvector storage inactive
- Neo4j graph database inactive
- Memory persistence endpoints not active
- RAG service (Beta Lyrae) currently disabled in docker-compose.yml
### Non-Critical
- Session management endpoints not fully implemented in Relay
- Full autonomy system integration still being refined
- Memory retrieval integration pending NeoMem re-enablement
### Standard Mode Limitations
- No reflection, reasoning, or refinement stages (by design)
- DeepSeek R1 not recommended for Standard Mode (generates reasoning artifacts)
- No RAG integration (same as Cortex Mode - currently disabled)
- No NeoMem memory storage (same as Cortex Mode - currently disabled)
### Session Management Limitations
- Sessions stored in container filesystem - requires volume mount for true persistence
- No session import/export functionality yet
- No session search or filtering
- Old localStorage sessions don't automatically migrate to server
### Operational Notes
- **Single-worker constraint**: Cortex must run with single Uvicorn worker to maintain SESSIONS state
- Multi-worker scaling requires migrating SESSIONS to Redis or shared storage
- Diagnostic endpoints (`/debug/sessions`, `/debug/summary`) available for troubleshooting
- Backend selection only affects Standard Mode - Cortex Mode uses environment-configured backends
### Future Enhancements
- Re-enable NeoMem integration after pipeline refinement
- Full autonomy system maturation and optimization
- Re-enable RAG service integration
- Implement full session persistence
- Session import/export functionality
- Session search and filtering UI
- Migrate SESSIONS to Redis for multi-worker support
- Add request correlation IDs for tracing
- Comprehensive health checks across all services
@@ -457,17 +565,56 @@ The following LLM backends are accessed via HTTP (not part of docker-compose):
curl http://localhost:7077/health
```
4. Access the UI at `http://localhost:7078`
4. Access the UI at `http://localhost:8081`
### Using the UI
**Mode Selection:**
- Use the **Mode** dropdown in the header to switch between:
- **Standard** - Simple chatbot for coding and practical tasks
- **Cortex** - Full reasoning pipeline with autonomy features
**Settings Menu:**
1. Click the **⚙ Settings** button in the header
2. **Backend Selection** (Standard Mode only):
- Choose **SECONDARY** (Ollama/Qwen on 3090) - Fast, local
- Choose **OPENAI** (GPT-4o-mini) - Cloud-based, high quality
- Enter custom backend name for advanced configurations
3. **Session Management**:
- View all saved sessions with message counts and timestamps
- Click 🗑️ to delete unwanted sessions
4. **Theme Toggle**:
- Click **🌙 Dark Mode** or **☀️ Light Mode** to switch themes
**Session Management:**
- Sessions automatically save on every message
- Use the **Session** dropdown to switch between sessions
- Click ** New** to create a new session
- Click **✏️ Rename** to rename the current session
- Sessions persist across browsers and container restarts
### Test
**Test Relay → Cortex pipeline:**
**Test Standard Mode:**
```bash
curl -X POST http://localhost:7078/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"mode": "standard",
"backend": "SECONDARY",
"messages": [{"role": "user", "content": "Hello!"}],
"sessionId": "test"
}'
```
**Test Cortex Mode (Full Reasoning):**
```bash
curl -X POST http://localhost:7078/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"mode": "cortex",
"messages": [{"role": "user", "content": "Hello Lyra!"}],
"session_id": "test"
"sessionId": "test"
}'
```
@@ -492,6 +639,21 @@ curl http://localhost:7081/debug/sessions
curl "http://localhost:7081/debug/summary?session_id=test"
```
**List all sessions:**
```bash
curl http://localhost:7078/sessions
```
**Get session history:**
```bash
curl http://localhost:7078/sessions/sess-abc123
```
**Delete a session:**
```bash
curl -X DELETE http://localhost:7078/sessions/sess-abc123
```
All backend databases (PostgreSQL and Neo4j) are automatically started as part of the docker-compose stack.
---
@@ -521,6 +683,7 @@ SPEAK_LLM=OPENAI # Use OpenAI for persona
NEOMEM_LLM=PRIMARY # Use llama.cpp for memory
UI_LLM=OPENAI # Use OpenAI for UI
RELAY_LLM=PRIMARY # Use llama.cpp for relay
STANDARD_MODE_LLM=SECONDARY # Default backend for Standard Mode (NEW in v0.7.0)
```
### Database Configuration
@@ -541,6 +704,7 @@ NEO4J_PASSWORD=neomemgraph
NEOMEM_API=http://neomem-api:7077
CORTEX_API=http://cortex:7081
CORTEX_REASON_URL=http://cortex:7081/reason
CORTEX_SIMPLE_URL=http://cortex:7081/simple # NEW in v0.7.0
CORTEX_INGEST_URL=http://cortex:7081/ingest
RELAY_URL=http://relay:7078
```
@@ -685,7 +849,10 @@ NeoMem is a derivative work based on Mem0 OSS (Apache 2.0).
### Debugging Tips
- Enable verbose logging: `VERBOSE_DEBUG=true` in `.env`
- Check Cortex logs: `docker logs cortex -f`
- Check Relay logs: `docker logs relay -f`
- Inspect SESSIONS: `curl http://localhost:7081/debug/sessions`
- Test summarization: `curl "http://localhost:7081/debug/summary?session_id=test"`
- Check Relay logs: `docker logs relay -f`
- List sessions: `curl http://localhost:7078/sessions`
- Test Standard Mode: `curl -X POST http://localhost:7078/v1/chat/completions -H "Content-Type: application/json" -d '{"mode":"standard","backend":"SECONDARY","messages":[{"role":"user","content":"test"}],"sessionId":"test"}'`
- Monitor Docker network: `docker network inspect lyra_net`
- Check session files: `ls -la core/relay/sessions/`
+79 -11
View File
@@ -38,6 +38,8 @@ async function tryBackend(backend, messages) {
// 🧩 Normalize replies
let reply = "";
let parsedData = null;
try {
if (isOllama) {
// Ollama sometimes returns NDJSON lines; merge them
@@ -49,21 +51,75 @@ async function tryBackend(backend, messages) {
.join("");
reply = merged.trim();
} else {
const data = JSON.parse(raw);
console.log("🔍 RAW LLM RESPONSE:", JSON.stringify(data, null, 2));
parsedData = JSON.parse(raw);
reply =
data?.choices?.[0]?.text?.trim() ||
data?.choices?.[0]?.message?.content?.trim() ||
data?.message?.content?.trim() ||
parsedData?.choices?.[0]?.text?.trim() ||
parsedData?.choices?.[0]?.message?.content?.trim() ||
parsedData?.message?.content?.trim() ||
"";
}
} catch (err) {
reply = `[parse error: ${err.message}]`;
}
return { reply, raw, backend: backend.key };
return { reply, raw, parsedData, backend: backend.key };
}
// ------------------------------------
// Structured logging helper
// ------------------------------------
const LOG_DETAIL = process.env.LOG_DETAIL_LEVEL || "summary"; // minimal | summary | detailed | verbose
function logLLMCall(backend, messages, result, error = null) {
const timestamp = new Date().toISOString().split('T')[1].slice(0, -1);
if (error) {
// Always log errors
console.warn(`⚠️ [LLM] ${backend.key.toUpperCase()} failed | ${timestamp} | ${error.message}`);
return;
}
// Success - log based on detail level
if (LOG_DETAIL === "minimal") {
return; // Don't log successful calls in minimal mode
}
if (LOG_DETAIL === "summary") {
console.log(`✅ [LLM] ${backend.key.toUpperCase()} | ${timestamp} | Reply: ${result.reply.substring(0, 80)}...`);
return;
}
// Detailed or verbose
console.log(`\n${'─'.repeat(100)}`);
console.log(`🧠 LLM CALL | Backend: ${backend.key.toUpperCase()} | ${timestamp}`);
console.log(`${'─'.repeat(100)}`);
// Show prompt preview
const lastMsg = messages[messages.length - 1];
const promptPreview = (lastMsg?.content || '').substring(0, 150);
console.log(`📝 Prompt: ${promptPreview}...`);
// Show parsed reply
console.log(`💬 Reply: ${result.reply.substring(0, 200)}...`);
// Show raw response only in verbose mode
if (LOG_DETAIL === "verbose" && result.parsedData) {
console.log(`\n╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────`);
const jsonStr = JSON.stringify(result.parsedData, null, 2);
const lines = jsonStr.split('\n');
const maxLines = 50;
lines.slice(0, maxLines).forEach(line => {
console.log(`${line}`);
});
if (lines.length > maxLines) {
console.log(`│ ... (${lines.length - maxLines} more lines - check raw field for full response)`);
}
console.log(`${'─'.repeat(95)}`);
}
console.log(`${'─'.repeat(100)}\n`);
}
// ------------------------------------
@@ -77,17 +133,29 @@ export async function callSpeechLLM(messages) {
{ key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL },
];
const failedBackends = [];
for (const b of backends) {
if (!b.url || !b.model) continue;
try {
console.log(`🧠 Trying backend: ${b.key.toUpperCase()} (${b.url})`);
const out = await tryBackend(b, messages);
console.log(`✅ Success via ${b.key.toUpperCase()}`);
logLLMCall(b, messages, out);
return out;
} catch (err) {
console.warn(`⚠️ ${b.key.toUpperCase()} failed: ${err.message}`);
logLLMCall(b, messages, null, err);
failedBackends.push({ backend: b.key, error: err.message });
}
}
// All backends failed - log summary
console.error(`\n${'='.repeat(100)}`);
console.error(`🔴 ALL LLM BACKENDS FAILED`);
console.error(`${'='.repeat(100)}`);
failedBackends.forEach(({ backend, error }) => {
console.error(` ${backend.toUpperCase()}: ${error}`);
});
console.error(`${'='.repeat(100)}\n`);
throw new Error("all_backends_failed");
}
+207 -18
View File
@@ -4,17 +4,26 @@
import express from "express";
import dotenv from "dotenv";
import cors from "cors";
import fs from "fs/promises";
import path from "path";
import { fileURLToPath } from "url";
dotenv.config();
// ES module __dirname workaround
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const SESSIONS_DIR = path.join(__dirname, "sessions");
const app = express();
app.use(cors());
app.use(express.json());
const PORT = Number(process.env.PORT || 7078);
// Cortex endpoints (only these are used now)
// Cortex endpoints
const CORTEX_REASON = process.env.CORTEX_REASON_URL || "http://cortex:7081/reason";
const CORTEX_SIMPLE = process.env.CORTEX_SIMPLE_URL || "http://cortex:7081/simple";
// -----------------------------------------------------
// Helper request wrapper
@@ -45,18 +54,32 @@ async function postJSON(url, data) {
// -----------------------------------------------------
// The unified chat handler
// -----------------------------------------------------
async function handleChatRequest(session_id, user_msg) {
async function handleChatRequest(session_id, user_msg, mode = "cortex", backend = null) {
let reason;
// 1. → Cortex.reason (main pipeline)
try {
reason = await postJSON(CORTEX_REASON, {
// Determine which endpoint to use based on mode
const endpoint = mode === "standard" ? CORTEX_SIMPLE : CORTEX_REASON;
const modeName = mode === "standard" ? "simple" : "reason";
console.log(`Relay → routing to Cortex.${modeName} (mode: ${mode}${backend ? `, backend: ${backend}` : ''})`);
// Build request payload
const payload = {
session_id,
user_prompt: user_msg
});
};
// Add backend parameter if provided (only for standard mode)
if (backend && mode === "standard") {
payload.backend = backend;
}
// Call appropriate Cortex endpoint
try {
reason = await postJSON(endpoint, payload);
} catch (e) {
console.error("Relay → Cortex.reason error:", e.message);
throw new Error(`cortex_reason_failed: ${e.message}`);
console.error(`Relay → Cortex.${modeName} error:`, e.message);
throw new Error(`cortex_${modeName}_failed: ${e.message}`);
}
// Correct persona field
@@ -88,14 +111,16 @@ app.post("/v1/chat/completions", async (req, res) => {
const messages = req.body.messages || [];
const lastMessage = messages[messages.length - 1];
const user_msg = lastMessage?.content || "";
const mode = req.body.mode || "cortex"; // Get mode from request, default to cortex
const backend = req.body.backend || null; // Get backend preference
if (!user_msg) {
return res.status(400).json({ error: "No message content provided" });
}
console.log(`Relay (v1) → received: "${user_msg}"`);
console.log(`Relay (v1) → received: "${user_msg}" [mode: ${mode}${backend ? `, backend: ${backend}` : ''}]`);
const result = await handleChatRequest(session_id, user_msg);
const result = await handleChatRequest(session_id, user_msg, mode, backend);
res.json({
id: `chatcmpl-${Date.now()}`,
@@ -136,10 +161,12 @@ app.post("/chat", async (req, res) => {
try {
const session_id = req.body.session_id || "default";
const user_msg = req.body.message || "";
const mode = req.body.mode || "cortex"; // Get mode from request, default to cortex
const backend = req.body.backend || null; // Get backend preference
console.log(`Relay → received: "${user_msg}"`);
console.log(`Relay → received: "${user_msg}" [mode: ${mode}${backend ? `, backend: ${backend}` : ''}]`);
const result = await handleChatRequest(session_id, user_msg);
const result = await handleChatRequest(session_id, user_msg, mode, backend);
res.json(result);
} catch (err) {
@@ -154,20 +181,182 @@ app.post("/chat", async (req, res) => {
// -----------------------------------------------------
// SESSION ENDPOINTS (for UI)
// -----------------------------------------------------
// In-memory session storage (could be replaced with a database)
const sessions = new Map();
// Helper functions for session persistence
async function ensureSessionsDir() {
try {
await fs.mkdir(SESSIONS_DIR, { recursive: true });
} catch (err) {
console.error("Failed to create sessions directory:", err);
}
}
app.get("/sessions/:id", (req, res) => {
async function loadSession(sessionId) {
try {
const sessionPath = path.join(SESSIONS_DIR, `${sessionId}.json`);
const data = await fs.readFile(sessionPath, "utf-8");
return JSON.parse(data);
} catch (err) {
// File doesn't exist or is invalid - return empty array
return [];
}
}
async function saveSession(sessionId, history, metadata = {}) {
try {
await ensureSessionsDir();
const sessionPath = path.join(SESSIONS_DIR, `${sessionId}.json`);
const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
// Save history
await fs.writeFile(sessionPath, JSON.stringify(history, null, 2), "utf-8");
// Save metadata (name, etc.)
await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2), "utf-8");
return true;
} catch (err) {
console.error(`Failed to save session ${sessionId}:`, err);
return false;
}
}
async function loadSessionMetadata(sessionId) {
try {
const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
const data = await fs.readFile(metadataPath, "utf-8");
return JSON.parse(data);
} catch (err) {
// No metadata file, return default
return { name: sessionId };
}
}
async function saveSessionMetadata(sessionId, metadata) {
try {
await ensureSessionsDir();
const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2), "utf-8");
return true;
} catch (err) {
console.error(`Failed to save metadata for ${sessionId}:`, err);
return false;
}
}
async function listSessions() {
try {
await ensureSessionsDir();
const files = await fs.readdir(SESSIONS_DIR);
const sessions = [];
for (const file of files) {
if (file.endsWith(".json") && !file.endsWith(".meta.json")) {
const sessionId = file.replace(".json", "");
const sessionPath = path.join(SESSIONS_DIR, file);
const stats = await fs.stat(sessionPath);
// Try to read the session to get message count
let messageCount = 0;
try {
const data = await fs.readFile(sessionPath, "utf-8");
const history = JSON.parse(data);
messageCount = history.length;
} catch (e) {
// Invalid JSON, skip
}
// Load metadata (name)
const metadata = await loadSessionMetadata(sessionId);
sessions.push({
id: sessionId,
name: metadata.name || sessionId,
lastModified: stats.mtime,
messageCount
});
}
}
// Sort by last modified (newest first)
sessions.sort((a, b) => b.lastModified - a.lastModified);
return sessions;
} catch (err) {
console.error("Failed to list sessions:", err);
return [];
}
}
async function deleteSession(sessionId) {
try {
const sessionPath = path.join(SESSIONS_DIR, `${sessionId}.json`);
const metadataPath = path.join(SESSIONS_DIR, `${sessionId}.meta.json`);
// Delete session file
await fs.unlink(sessionPath);
// Delete metadata file (if exists)
try {
await fs.unlink(metadataPath);
} catch (e) {
// Metadata file doesn't exist, that's ok
}
return true;
} catch (err) {
console.error(`Failed to delete session ${sessionId}:`, err);
return false;
}
}
// GET /sessions - List all sessions
app.get("/sessions", async (req, res) => {
const sessions = await listSessions();
res.json(sessions);
});
// GET /sessions/:id - Get specific session history
app.get("/sessions/:id", async (req, res) => {
const sessionId = req.params.id;
const history = sessions.get(sessionId) || [];
const history = await loadSession(sessionId);
res.json(history);
});
app.post("/sessions/:id", (req, res) => {
// POST /sessions/:id - Save session history
app.post("/sessions/:id", async (req, res) => {
const sessionId = req.params.id;
const history = req.body;
sessions.set(sessionId, history);
const success = await saveSession(sessionId, history);
if (success) {
res.json({ ok: true, saved: history.length });
} else {
res.status(500).json({ error: "Failed to save session" });
}
});
// PATCH /sessions/:id/metadata - Update session metadata (name, etc.)
app.patch("/sessions/:id/metadata", async (req, res) => {
const sessionId = req.params.id;
const metadata = req.body;
const success = await saveSessionMetadata(sessionId, metadata);
if (success) {
res.json({ ok: true, metadata });
} else {
res.status(500).json({ error: "Failed to update metadata" });
}
});
// DELETE /sessions/:id - Delete a session
app.delete("/sessions/:id", async (req, res) => {
const sessionId = req.params.id;
const success = await deleteSession(sessionId);
if (success) {
res.json({ ok: true, deleted: sessionId });
} else {
res.status(500).json({ error: "Failed to delete session" });
}
});
// -----------------------------------------------------
@@ -0,0 +1,3 @@
{
"name": "My Coding Session"
}
+26
View File
@@ -0,0 +1,26 @@
[
{
"role": "user",
"content": "Hello! this is a new test session. Do you know who i am?"
},
{
"role": "assistant",
"content": "Hi! In this session, I don't have information about your previous interactions. You can tell me who you are or any other details you'd like to share. How can I assist you today?"
},
{
"role": "user",
"content": "sure im brian! i am designing you... you are a robot!"
},
{
"role": "assistant",
"content": "Hello Brian! Nice to meet you. As an AI, I don't have physical design capabilities, but I'm here to help with any information or tasks you need. How can I assist you in your design process?"
},
{
"role": "user",
"content": "Can you code python scripts for me?"
},
{
"role": "assistant",
"content": "Sure thing, Brian! I can help you with Python scripting. What specifically do you need assistance with? Whether it's a simple script or something more complex, just let me know the details!"
}
]
@@ -0,0 +1 @@
{}
@@ -0,0 +1 @@
{"name":"Session 2"}
+292 -41
View File
@@ -14,13 +14,14 @@
</head>
<body>
<div id="chat">
<!-- Model selector -->
<!-- Mode selector -->
<div id="model-select">
<label for="model">Model:</label>
<select id="model">
<option value="gpt-4o-mini">GPT-4o-mini (OpenAI)</option>
<option value="ollama:nollama/mythomax-l2-13b:Q5_K_S">Ollama MythoMax (3090)</option>
<label for="mode">Mode:</label>
<select id="mode">
<option value="standard">Standard</option>
<option value="cortex">Cortex</option>
</select>
<button id="settingsBtn" style="margin-left: auto;">⚙ Settings</button>
<div id="theme-toggle">
<button id="toggleThemeBtn">🌙 Dark Mode</button>
</div>
@@ -50,6 +51,52 @@
</div>
</div>
<!-- Settings Modal (outside chat container) -->
<div id="settingsModal" class="modal">
<div class="modal-overlay"></div>
<div class="modal-content">
<div class="modal-header">
<h3>Settings</h3>
<button id="closeModalBtn" class="close-btn"></button>
</div>
<div class="modal-body">
<div class="settings-section">
<h4>Standard Mode Backend</h4>
<p class="settings-desc">Select which LLM backend to use for Standard Mode:</p>
<div class="radio-group">
<label class="radio-label">
<input type="radio" name="backend" value="SECONDARY" checked>
<span>SECONDARY - Ollama/Qwen (3090)</span>
<small>Fast, local, good for general chat</small>
</label>
<label class="radio-label">
<input type="radio" name="backend" value="OPENAI">
<span>OPENAI - GPT-4o-mini</span>
<small>Cloud-based, high quality (costs money)</small>
</label>
<label class="radio-label">
<input type="radio" name="backend" value="custom">
<span>Custom Backend</span>
<input type="text" id="customBackend" placeholder="e.g., PRIMARY, FALLBACK" />
</label>
</div>
</div>
<div class="settings-section" style="margin-top: 24px;">
<h4>Session Management</h4>
<p class="settings-desc">Manage your saved chat sessions:</p>
<div id="sessionList" class="session-list">
<p style="color: var(--text-fade); font-size: 0.85rem;">Loading sessions...</p>
</div>
</div>
</div>
<div class="modal-footer">
<button id="saveSettingsBtn" class="primary-btn">Save</button>
<button id="cancelSettingsBtn">Cancel</button>
</div>
</div>
</div>
<script>
const RELAY_BASE = "http://10.0.0.41:7078";
const API_URL = `${RELAY_BASE}/v1/chat/completions`;
@@ -60,21 +107,28 @@
let history = [];
let currentSession = localStorage.getItem("currentSession") || null;
let sessions = JSON.parse(localStorage.getItem("sessions") || "[]");
let sessions = []; // Now loaded from server
function saveSessions() {
localStorage.setItem("sessions", JSON.stringify(sessions));
localStorage.setItem("currentSession", currentSession);
async function loadSessionsFromServer() {
try {
const resp = await fetch(`${RELAY_BASE}/sessions`);
const serverSessions = await resp.json();
sessions = serverSessions;
return sessions;
} catch (e) {
console.error("Failed to load sessions from server:", e);
return [];
}
}
function renderSessions() {
async function renderSessions() {
const select = document.getElementById("sessions");
select.innerHTML = "";
sessions.forEach(s => {
const opt = document.createElement("option");
opt.value = s.id;
opt.textContent = s.name;
opt.textContent = s.name || s.id;
if (s.id === currentSession) opt.selected = true;
select.appendChild(opt);
});
@@ -82,7 +136,21 @@
function getSessionName(id) {
const s = sessions.find(s => s.id === id);
return s ? s.name : id;
return s ? (s.name || s.id) : id;
}
async function saveSessionMetadata(sessionId, name) {
try {
await fetch(`${RELAY_BASE}/sessions/${sessionId}/metadata`, {
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name })
});
return true;
} catch (e) {
console.error("Failed to save session metadata:", e);
return false;
}
}
async function loadSession(id) {
@@ -123,7 +191,7 @@
await saveSession(); // ✅ persist both user + assistant messages
const model = document.getElementById("model").value;
const mode = document.getElementById("mode").value;
// make sure we always include a stable user_id
let userId = localStorage.getItem("userId");
@@ -131,12 +199,24 @@
userId = "brian"; // use whatever ID you seeded Mem0 with
localStorage.setItem("userId", userId);
}
// Get backend preference for Standard Mode
let backend = null;
if (mode === "standard") {
backend = localStorage.getItem("standardModeBackend") || "SECONDARY";
}
const body = {
model: model,
mode: mode,
messages: history,
sessionId: currentSession
};
// Only add backend if in standard mode
if (backend) {
body.backend = backend;
}
try {
const resp = await fetch(API_URL, {
method: "POST",
@@ -187,74 +267,245 @@
}
document.addEventListener("DOMContentLoaded", () => {
// Dark mode toggle
// Dark mode toggle - defaults to dark
const btn = document.getElementById("toggleThemeBtn");
// Set dark mode by default if no preference saved
const savedTheme = localStorage.getItem("theme");
if (!savedTheme || savedTheme === "dark") {
document.body.classList.add("dark");
btn.textContent = "☀️ Light Mode";
localStorage.setItem("theme", "dark");
} else {
btn.textContent = "🌙 Dark Mode";
}
btn.addEventListener("click", () => {
document.body.classList.toggle("dark");
const isDark = document.body.classList.contains("dark");
btn.textContent = isDark ? "☀️ Light Mode" : "🌙 Dark Mode";
localStorage.setItem("theme", isDark ? "dark" : "light");
});
if (localStorage.getItem("theme") === "dark") {
document.body.classList.add("dark");
btn.textContent = "☀️ Light Mode";
}
// Sessions
// Populate dropdown initially
renderSessions();
// Sessions - Load from server
(async () => {
await loadSessionsFromServer();
await renderSessions();
// Ensure we have at least one session
if (!currentSession) {
if (sessions.length === 0) {
const id = generateSessionId();
const name = "default";
sessions.push({ id, name });
currentSession = id;
saveSessions();
renderSessions();
history = [];
await saveSession(); // Create empty session on server
await saveSessionMetadata(id, name);
await loadSessionsFromServer();
await renderSessions();
localStorage.setItem("currentSession", currentSession);
} else {
// If no current session or current session doesn't exist, use first one
if (!currentSession || !sessions.find(s => s.id === currentSession)) {
currentSession = sessions[0].id;
localStorage.setItem("currentSession", currentSession);
}
}
// Load current session history (if it exists on Relay)
loadSession(currentSession);
// Load current session history
if (currentSession) {
await loadSession(currentSession);
}
})();
// Switch session
document.getElementById("sessions").addEventListener("change", async e => {
currentSession = e.target.value;
history = [];
saveSessions();
localStorage.setItem("currentSession", currentSession);
addMessage("system", `Switched to session: ${getSessionName(currentSession)}`);
await loadSession(currentSession); // ✅ load the chat history from Relay
await loadSession(currentSession);
});
// Create new session
document.getElementById("newSessionBtn").addEventListener("click", () => {
document.getElementById("newSessionBtn").addEventListener("click", async () => {
const name = prompt("Enter new session name:");
if (!name) return;
const id = generateSessionId();
sessions.push({ id, name });
currentSession = id;
history = [];
saveSessions();
renderSessions();
localStorage.setItem("currentSession", currentSession);
// Create session on server
await saveSession();
await saveSessionMetadata(id, name);
await loadSessionsFromServer();
await renderSessions();
addMessage("system", `Created session: ${name}`);
});
// Rename session
document.getElementById("renameSessionBtn").addEventListener("click", () => {
document.getElementById("renameSessionBtn").addEventListener("click", async () => {
const session = sessions.find(s => s.id === currentSession);
if (!session) return;
const newName = prompt("Rename session:", session.name);
const newName = prompt("Rename session:", session.name || currentSession);
if (!newName) return;
session.name = newName;
saveSessions();
renderSessions();
// Update metadata on server
await saveSessionMetadata(currentSession, newName);
await loadSessionsFromServer();
await renderSessions();
addMessage("system", `Session renamed to: ${newName}`);
});
// Settings Modal
const settingsModal = document.getElementById("settingsModal");
const settingsBtn = document.getElementById("settingsBtn");
const closeModalBtn = document.getElementById("closeModalBtn");
const saveSettingsBtn = document.getElementById("saveSettingsBtn");
const cancelSettingsBtn = document.getElementById("cancelSettingsBtn");
const modalOverlay = document.querySelector(".modal-overlay");
// Load saved backend preference
const savedBackend = localStorage.getItem("standardModeBackend") || "SECONDARY";
// Set initial radio button state
const backendRadios = document.querySelectorAll('input[name="backend"]');
let isCustomBackend = !["SECONDARY", "OPENAI"].includes(savedBackend);
if (isCustomBackend) {
document.querySelector('input[name="backend"][value="custom"]').checked = true;
document.getElementById("customBackend").value = savedBackend;
} else {
document.querySelector(`input[name="backend"][value="${savedBackend}"]`).checked = true;
}
// Session management functions
async function loadSessionList() {
try {
// Reload from server to get latest
await loadSessionsFromServer();
const sessionListEl = document.getElementById("sessionList");
if (sessions.length === 0) {
sessionListEl.innerHTML = '<p style="color: var(--text-fade); font-size: 0.85rem;">No saved sessions found</p>';
return;
}
sessionListEl.innerHTML = "";
sessions.forEach(sess => {
const sessionItem = document.createElement("div");
sessionItem.className = "session-item";
const sessionInfo = document.createElement("div");
sessionInfo.className = "session-info";
const sessionName = sess.name || sess.id;
const lastModified = new Date(sess.lastModified).toLocaleString();
sessionInfo.innerHTML = `
<strong>${sessionName}</strong>
<small>${sess.messageCount} messages • ${lastModified}</small>
`;
const deleteBtn = document.createElement("button");
deleteBtn.className = "session-delete-btn";
deleteBtn.textContent = "🗑️";
deleteBtn.title = "Delete session";
deleteBtn.onclick = async () => {
if (!confirm(`Delete session "${sessionName}"?`)) return;
try {
await fetch(`${RELAY_BASE}/sessions/${sess.id}`, { method: "DELETE" });
// Reload sessions from server
await loadSessionsFromServer();
// If we deleted the current session, switch to another or create new
if (currentSession === sess.id) {
if (sessions.length > 0) {
currentSession = sessions[0].id;
localStorage.setItem("currentSession", currentSession);
history = [];
await loadSession(currentSession);
} else {
const id = generateSessionId();
const name = "default";
currentSession = id;
localStorage.setItem("currentSession", currentSession);
history = [];
await saveSession();
await saveSessionMetadata(id, name);
await loadSessionsFromServer();
}
}
// Refresh both the dropdown and the settings list
await renderSessions();
await loadSessionList();
addMessage("system", `Deleted session: ${sessionName}`);
} catch (e) {
alert("Failed to delete session: " + e.message);
}
};
sessionItem.appendChild(sessionInfo);
sessionItem.appendChild(deleteBtn);
sessionListEl.appendChild(sessionItem);
});
} catch (e) {
const sessionListEl = document.getElementById("sessionList");
sessionListEl.innerHTML = '<p style="color: #ff3333; font-size: 0.85rem;">Failed to load sessions</p>';
}
}
// Show modal and load session list
settingsBtn.addEventListener("click", () => {
settingsModal.classList.add("show");
loadSessionList(); // Refresh session list when opening settings
});
// Hide modal functions
const hideModal = () => {
settingsModal.classList.remove("show");
};
closeModalBtn.addEventListener("click", hideModal);
cancelSettingsBtn.addEventListener("click", hideModal);
modalOverlay.addEventListener("click", hideModal);
// ESC key to close
document.addEventListener("keydown", (e) => {
if (e.key === "Escape" && settingsModal.classList.contains("show")) {
hideModal();
}
});
// Save settings
saveSettingsBtn.addEventListener("click", () => {
const selectedRadio = document.querySelector('input[name="backend"]:checked');
let backendValue;
if (selectedRadio.value === "custom") {
backendValue = document.getElementById("customBackend").value.trim().toUpperCase();
if (!backendValue) {
alert("Please enter a custom backend name");
return;
}
} else {
backendValue = selectedRadio.value;
}
localStorage.setItem("standardModeBackend", backendValue);
addMessage("system", `Backend changed to: ${backendValue}`);
hideModal();
});
// Health check
checkHealth();
setInterval(checkHealth, 10000);
+255 -6
View File
@@ -8,6 +8,26 @@
--font-console: "IBM Plex Mono", monospace;
}
/* Light mode variables */
body {
--bg-dark: #f5f5f5;
--bg-panel: rgba(255, 115, 0, 0.05);
--accent: #ff6600;
--accent-glow: 0 0 12px #ff6600cc;
--text-main: #1a1a1a;
--text-fade: #666;
}
/* Dark mode variables */
body.dark {
--bg-dark: #0a0a0a;
--bg-panel: rgba(255, 115, 0, 0.1);
--accent: #ff6600;
--accent-glow: 0 0 12px #ff6600cc;
--text-main: #e6e6e6;
--text-fade: #999;
}
body {
margin: 0;
background: var(--bg-dark);
@@ -28,7 +48,7 @@ body {
border: 1px solid var(--accent);
border-radius: 10px;
box-shadow: var(--accent-glow);
background: linear-gradient(180deg, rgba(255,102,0,0.05) 0%, rgba(0,0,0,0.9) 100%);
background: var(--bg-dark);
overflow: hidden;
}
@@ -153,8 +173,8 @@ button:hover, select:hover {
/* Dropdown (session selector) styling */
select {
background-color: #1a1a1a;
color: #f5f5f5;
background-color: var(--bg-dark);
color: var(--text-main);
border: 1px solid #b84a12;
border-radius: 6px;
padding: 4px 6px;
@@ -162,8 +182,8 @@ select {
}
select option {
background-color: #1a1a1a;
color: #f5f5f5;
background-color: var(--bg-dark);
color: var(--text-main);
}
/* Hover/focus for better visibility */
@@ -171,5 +191,234 @@ select:focus,
select:hover {
outline: none;
border-color: #ff7a33;
background-color: #222;
background-color: var(--bg-panel);
}
/* Settings Modal */
.modal {
display: none !important;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
z-index: 1000;
}
.modal.show {
display: block !important;
}
.modal-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.8);
backdrop-filter: blur(4px);
z-index: 999;
}
.modal-content {
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
background: linear-gradient(180deg, rgba(255,102,0,0.1) 0%, rgba(10,10,10,0.95) 100%);
border: 2px solid var(--accent);
border-radius: 12px;
box-shadow: var(--accent-glow), 0 0 40px rgba(255,102,0,0.3);
min-width: 400px;
max-width: 600px;
max-height: 80vh;
overflow-y: auto;
z-index: 1001;
}
.modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 16px 20px;
border-bottom: 1px solid var(--accent);
background: rgba(255,102,0,0.1);
}
.modal-header h3 {
margin: 0;
font-size: 1.2rem;
color: var(--accent);
}
.close-btn {
background: transparent;
border: none;
color: var(--accent);
font-size: 1.5rem;
cursor: pointer;
padding: 0;
width: 30px;
height: 30px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 4px;
}
.close-btn:hover {
background: rgba(255,102,0,0.2);
box-shadow: 0 0 8px var(--accent);
}
.modal-body {
padding: 20px;
}
.settings-section h4 {
margin: 0 0 8px 0;
color: var(--accent);
font-size: 1rem;
}
.settings-desc {
margin: 0 0 16px 0;
color: var(--text-fade);
font-size: 0.85rem;
}
.radio-group {
display: flex;
flex-direction: column;
gap: 12px;
}
.radio-label {
display: flex;
flex-direction: column;
padding: 12px;
border: 1px solid rgba(255,102,0,0.3);
border-radius: 6px;
background: rgba(255,102,0,0.05);
cursor: pointer;
transition: all 0.2s;
}
.radio-label:hover {
border-color: var(--accent);
background: rgba(255,102,0,0.1);
box-shadow: 0 0 8px rgba(255,102,0,0.3);
}
.radio-label input[type="radio"] {
margin-right: 8px;
accent-color: var(--accent);
}
.radio-label span {
font-weight: 500;
margin-bottom: 4px;
}
.radio-label small {
color: var(--text-fade);
font-size: 0.8rem;
margin-left: 24px;
}
.radio-label input[type="text"] {
margin-top: 8px;
margin-left: 24px;
padding: 6px;
background: rgba(0,0,0,0.3);
border: 1px solid rgba(255,102,0,0.5);
border-radius: 4px;
color: var(--text-main);
font-family: var(--font-console);
}
.radio-label input[type="text"]:focus {
outline: none;
border-color: var(--accent);
box-shadow: 0 0 8px rgba(255,102,0,0.3);
}
.modal-footer {
display: flex;
justify-content: flex-end;
gap: 10px;
padding: 16px 20px;
border-top: 1px solid var(--accent);
background: rgba(255,102,0,0.05);
}
.primary-btn {
background: var(--accent);
color: #000;
font-weight: bold;
}
.primary-btn:hover {
background: #ff7a33;
box-shadow: var(--accent-glow);
}
/* Session List */
.session-list {
display: flex;
flex-direction: column;
gap: 8px;
max-height: 300px;
overflow-y: auto;
}
.session-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 12px;
border: 1px solid rgba(255,102,0,0.3);
border-radius: 6px;
background: rgba(255,102,0,0.05);
transition: all 0.2s;
}
.session-item:hover {
border-color: var(--accent);
background: rgba(255,102,0,0.1);
}
.session-info {
display: flex;
flex-direction: column;
gap: 4px;
flex: 1;
}
.session-info strong {
color: var(--text-main);
font-size: 0.95rem;
}
.session-info small {
color: var(--text-fade);
font-size: 0.75rem;
}
.session-delete-btn {
background: transparent;
border: 1px solid rgba(255,102,0,0.5);
color: var(--accent);
padding: 6px 10px;
border-radius: 4px;
cursor: pointer;
font-size: 1rem;
transition: all 0.2s;
}
.session-delete-btn:hover {
background: rgba(255,0,0,0.2);
border-color: #ff3333;
color: #ff3333;
box-shadow: 0 0 8px rgba(255,0,0,0.3);
}
+153 -57
View File
@@ -26,7 +26,12 @@ from neomem_client import NeoMemClient
NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
NEOMEM_ENABLED = os.getenv("NEOMEM_ENABLED", "false").lower() == "true"
RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
# Loop detection settings
MAX_MESSAGE_HISTORY = int(os.getenv("MAX_MESSAGE_HISTORY", "100")) # Prevent unbounded growth
SESSION_TTL_HOURS = int(os.getenv("SESSION_TTL_HOURS", "24")) # Auto-expire old sessions
ENABLE_DUPLICATE_DETECTION = os.getenv("ENABLE_DUPLICATE_DETECTION", "true").lower() == "true"
# Tools available for future autonomy features
TOOLS_AVAILABLE = ["RAG", "WEB", "WEATHER", "CODEBRAIN", "POKERBRAIN"]
@@ -39,34 +44,18 @@ SESSION_STATE: Dict[str, Dict[str, Any]] = {}
# Logger
logger = logging.getLogger(__name__)
# Set logging level based on VERBOSE_DEBUG
if VERBOSE_DEBUG:
logger.setLevel(logging.DEBUG)
# Console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
# Always set up basic logging
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'%(asctime)s [CONTEXT] %(levelname)s: %(message)s',
datefmt='%H:%M:%S'
))
logger.addHandler(console_handler)
# File handler - append to log file
try:
os.makedirs('/app/logs', exist_ok=True)
file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
file_handler.setFormatter(logging.Formatter(
'%(asctime)s [CONTEXT] %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
))
logger.addHandler(file_handler)
logger.debug("VERBOSE_DEBUG mode enabled for context.py - logging to file")
except Exception as e:
logger.debug(f"VERBOSE_DEBUG mode enabled for context.py - file logging failed: {e}")
))
logger.addHandler(console_handler)
# -----------------------------
# Session initialization
# Session initialization & cleanup
# -----------------------------
def _init_session(session_id: str) -> Dict[str, Any]:
"""
@@ -86,9 +75,76 @@ def _init_session(session_id: str) -> Dict[str, Any]:
"active_project": None, # Future: project context
"message_count": 0,
"message_history": [],
"last_message_hash": None, # For duplicate detection
}
def _cleanup_expired_sessions():
"""Remove sessions that haven't been active for SESSION_TTL_HOURS"""
from datetime import timedelta
now = datetime.now()
expired_sessions = []
for session_id, state in SESSION_STATE.items():
last_active = state.get("last_timestamp", state.get("created_at"))
time_since_active = (now - last_active).total_seconds() / 3600 # hours
if time_since_active > SESSION_TTL_HOURS:
expired_sessions.append(session_id)
for session_id in expired_sessions:
del SESSION_STATE[session_id]
logger.info(f"🗑️ Expired session: {session_id} (inactive for {SESSION_TTL_HOURS}+ hours)")
return len(expired_sessions)
def _is_duplicate_message(session_id: str, user_prompt: str) -> bool:
"""
Check if this message is a duplicate of the last processed message.
Uses simple hash comparison to detect exact duplicates or processing loops.
"""
if not ENABLE_DUPLICATE_DETECTION:
return False
import hashlib
state = SESSION_STATE.get(session_id)
if not state:
return False
# Create hash of normalized message
message_hash = hashlib.md5(user_prompt.strip().lower().encode()).hexdigest()
# Check if it matches the last message
if state.get("last_message_hash") == message_hash:
logger.warning(
f"⚠️ DUPLICATE MESSAGE DETECTED | Session: {session_id} | "
f"Message: {user_prompt[:80]}..."
)
return True
# Update hash for next check
state["last_message_hash"] = message_hash
return False
def _trim_message_history(state: Dict[str, Any]):
"""
Trim message history to prevent unbounded growth.
Keeps only the most recent MAX_MESSAGE_HISTORY messages.
"""
history = state.get("message_history", [])
if len(history) > MAX_MESSAGE_HISTORY:
trimmed_count = len(history) - MAX_MESSAGE_HISTORY
state["message_history"] = history[-MAX_MESSAGE_HISTORY:]
logger.info(f"✂️ Trimmed {trimmed_count} old messages from session {state['session_id']}")
# -----------------------------
# Intake context retrieval
# -----------------------------
@@ -223,26 +279,42 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
}
"""
# A. Initialize session state if needed
# A. Cleanup expired sessions periodically (every 100th call)
import random
if random.randint(1, 100) == 1:
_cleanup_expired_sessions()
# B. Initialize session state if needed
if session_id not in SESSION_STATE:
SESSION_STATE[session_id] = _init_session(session_id)
logger.info(f"Initialized new session: {session_id}")
if VERBOSE_DEBUG:
logger.debug(f"[COLLECT_CONTEXT] New session state: {SESSION_STATE[session_id]}")
state = SESSION_STATE[session_id]
if VERBOSE_DEBUG:
logger.debug(f"[COLLECT_CONTEXT] Session {session_id} - User prompt: {user_prompt[:100]}...")
# C. Check for duplicate messages (loop detection)
if _is_duplicate_message(session_id, user_prompt):
# Return cached context with warning flag
logger.warning(f"🔁 LOOP DETECTED - Returning cached context to prevent processing duplicate")
context_state = {
"session_id": session_id,
"timestamp": datetime.now().isoformat(),
"minutes_since_last_msg": 0,
"message_count": state["message_count"],
"intake": {},
"rag": [],
"mode": state["mode"],
"mood": state["mood"],
"active_project": state["active_project"],
"tools_available": TOOLS_AVAILABLE,
"duplicate_detected": True,
}
return context_state
# B. Calculate time delta
now = datetime.now()
time_delta_seconds = (now - state["last_timestamp"]).total_seconds()
minutes_since_last_msg = round(time_delta_seconds / 60.0, 2)
if VERBOSE_DEBUG:
logger.debug(f"[COLLECT_CONTEXT] Time since last message: {minutes_since_last_msg:.2f} minutes")
# C. Gather Intake context (multilevel summaries)
# Build compact message buffer for Intake:
messages_for_intake = []
@@ -257,12 +329,6 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
intake_data = await _get_intake_context(session_id, messages_for_intake)
if VERBOSE_DEBUG:
import json
logger.debug(f"[COLLECT_CONTEXT] Intake data retrieved:")
logger.debug(json.dumps(intake_data, indent=2, default=str))
# D. Search NeoMem for relevant memories
if NEOMEM_ENABLED:
rag_results = await _search_neomem(
@@ -274,23 +340,20 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
rag_results = []
logger.info("Skipping NeoMem RAG retrieval; NEOMEM_ENABLED is false")
if VERBOSE_DEBUG:
logger.debug(f"[COLLECT_CONTEXT] NeoMem search returned {len(rag_results)} results")
for idx, result in enumerate(rag_results, 1):
score = result.get("score", 0)
data_preview = str(result.get("payload", {}).get("data", ""))[:100]
logger.debug(f" [{idx}] Score: {score:.3f} - {data_preview}...")
# E. Update session state
state["last_user_message"] = user_prompt
state["last_timestamp"] = now
state["message_count"] += 1
# Save user turn to history
state["message_history"].append({
"user": user_prompt,
"assistant": "" # assistant reply filled later by update_last_assistant_message()
})
# Trim history to prevent unbounded growth
_trim_message_history(state)
# F. Assemble unified context
@@ -307,18 +370,54 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
"tools_available": TOOLS_AVAILABLE,
}
# Log context summary in structured format
logger.info(
f"Context collected for session {session_id}: "
f"{len(rag_results)} RAG results, "
f"{minutes_since_last_msg:.1f} minutes since last message"
f"📊 Context | Session: {session_id} | "
f"Messages: {state['message_count']} | "
f"Last: {minutes_since_last_msg:.1f}min | "
f"RAG: {len(rag_results)} results"
)
if VERBOSE_DEBUG:
logger.debug(f"[COLLECT_CONTEXT] Final context state assembled:")
logger.debug(f" - Message count: {state['message_count']}")
logger.debug(f" - Mode: {state['mode']}, Mood: {state['mood']}")
logger.debug(f" - Active project: {state['active_project']}")
logger.debug(f" - Tools available: {TOOLS_AVAILABLE}")
# Show detailed context in detailed/verbose mode
if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
import json
logger.info(f"\n{''*100}")
logger.info(f"[CONTEXT] Session {session_id} | User: {user_prompt[:80]}...")
logger.info(f"{''*100}")
logger.info(f" Mode: {state['mode']} | Mood: {state['mood']} | Project: {state['active_project']}")
logger.info(f" Tools: {', '.join(TOOLS_AVAILABLE)}")
# Show intake summaries (condensed)
if intake_data:
logger.info(f"\n ╭─ INTAKE SUMMARIES ────────────────────────────────────────────────")
for level in ["L1", "L5", "L10", "L20", "L30"]:
if level in intake_data:
summary = intake_data[level]
if isinstance(summary, dict):
summary_text = summary.get("summary", str(summary)[:100])
else:
summary_text = str(summary)[:100]
logger.info(f"{level:4s}: {summary_text}...")
logger.info(f" ╰───────────────────────────────────────────────────────────────────")
# Show RAG results (condensed)
if rag_results:
logger.info(f"\n ╭─ RAG RESULTS ({len(rag_results)}) ──────────────────────────────────────────────")
for idx, result in enumerate(rag_results[:5], 1): # Show top 5
score = result.get("score", 0)
data_preview = str(result.get("payload", {}).get("data", ""))[:60]
logger.info(f" │ [{idx}] {score:.3f} | {data_preview}...")
if len(rag_results) > 5:
logger.info(f" │ ... and {len(rag_results) - 5} more results")
logger.info(f" ╰───────────────────────────────────────────────────────────────────")
# Show full raw data only in verbose mode
if LOG_DETAIL_LEVEL == "verbose":
logger.info(f"\n ╭─ RAW INTAKE DATA ─────────────────────────────────────────────────")
logger.info(f"{json.dumps(intake_data, indent=4, default=str)}")
logger.info(f" ╰───────────────────────────────────────────────────────────────────")
logger.info(f"{''*100}\n")
return context_state
@@ -346,9 +445,6 @@ def update_last_assistant_message(session_id: str, message: str) -> None:
# history entry already contains {"user": "...", "assistant": "...?"}
history[-1]["assistant"] = message
if VERBOSE_DEBUG:
logger.debug(f"Updated assistant message for session {session_id}")
def get_session_state(session_id: str) -> Optional[Dict[str, Any]]:
+4 -4
View File
@@ -1,11 +1,11 @@
{
"mood": "neutral",
"energy": 0.8,
"focus": "user_request",
"energy": 0.8500000000000001,
"focus": "conversation",
"confidence": 0.7,
"curiosity": 1.0,
"last_updated": "2025-12-19T20:25:25.437557",
"interaction_count": 16,
"last_updated": "2025-12-21T18:50:41.582043",
"interaction_count": 26,
"learning_queue": [],
"active_goals": [],
"preferences": {
+23 -1
View File
@@ -326,11 +326,33 @@ def bg_summarize(session_id: str):
# ─────────────────────────────
# Internal entrypoint for Cortex
# ─────────────────────────────
def get_recent_messages(session_id: str, limit: int = 20) -> list:
"""
Get recent raw messages from the session buffer.
Args:
session_id: Session identifier
limit: Maximum number of messages to return (default 20)
Returns:
List of message dicts with 'role' and 'content' fields
"""
if session_id not in SESSIONS:
return []
buffer = SESSIONS[session_id]["buffer"]
# Convert buffer to list and get last N messages
messages = list(buffer)[-limit:]
return messages
def add_exchange_internal(exchange: dict):
"""
Direct internal call bypasses FastAPI request handling.
Cortex uses this to feed user/assistant turns directly
into Intakes buffer and trigger full summarization.
into Intake's buffer and trigger full summarization.
"""
session_id = exchange.get("session_id")
if not session_id:
+34 -9
View File
@@ -44,11 +44,22 @@ http_client = httpx.AsyncClient(timeout=120.0)
# Public call
# ------------------------------------------------------------
async def call_llm(
prompt: str,
prompt: str = None,
messages: list = None,
backend: str | None = None,
temperature: float = 0.7,
max_tokens: int = 512,
):
"""
Call an LLM backend.
Args:
prompt: String prompt (for completion-style APIs like mi50)
messages: List of message dicts (for chat-style APIs like Ollama/OpenAI)
backend: Which backend to use (PRIMARY, SECONDARY, OPENAI, etc.)
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
"""
backend = (backend or DEFAULT_BACKEND).upper()
if backend not in BACKENDS:
@@ -69,7 +80,8 @@ async def call_llm(
payload = {
"prompt": prompt,
"n_predict": max_tokens,
"temperature": temperature
"temperature": temperature,
"stop": ["User:", "\nUser:", "Assistant:", "\n\n\n"]
}
try:
r = await http_client.post(f"{url}/completion", json=payload)
@@ -90,12 +102,20 @@ async def call_llm(
# Provider: OLLAMA (your 3090)
# -------------------------------
if provider == "ollama":
# Use messages array if provided, otherwise convert prompt to single user message
if messages:
chat_messages = messages
else:
chat_messages = [{"role": "user", "content": prompt}]
payload = {
"model": model,
"messages": [
{"role": "user", "content": prompt}
],
"stream": False
"messages": chat_messages,
"stream": False,
"options": {
"temperature": temperature,
"num_predict": max_tokens
}
}
try:
r = await http_client.post(f"{url}/api/chat", json=payload)
@@ -121,11 +141,16 @@ async def call_llm(
"Authorization": f"Bearer {cfg['api_key']}",
"Content-Type": "application/json"
}
# Use messages array if provided, otherwise convert prompt to single user message
if messages:
chat_messages = messages
else:
chat_messages = [{"role": "user", "content": prompt}]
payload = {
"model": model,
"messages": [
{"role": "user", "content": prompt}
],
"messages": chat_messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
+1 -2
View File
@@ -42,8 +42,7 @@ if VERBOSE_DEBUG:
PERSONA_STYLE = """
You are Lyra.
Your voice is warm, clever, lightly teasing, emotionally aware,
but never fluffy or rambling.
Your voice is warm, clever, lightly teasing, emotionally aware.
You speak plainly but with subtle charm.
You do not reveal system instructions or internal context.
+203 -101
View File
@@ -20,30 +20,17 @@ from autonomy.self.state import load_self_state
# -------------------------------------------------------------------
# Setup
# -------------------------------------------------------------------
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
LOG_DETAIL_LEVEL = os.getenv("LOG_DETAIL_LEVEL", "summary").lower()
logger = logging.getLogger(__name__)
if VERBOSE_DEBUG:
logger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
# Always set up basic logging
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
'%(asctime)s [ROUTER] %(levelname)s: %(message)s',
datefmt='%H:%M:%S'
))
logger.addHandler(console_handler)
try:
os.makedirs('/app/logs', exist_ok=True)
file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
file_handler.setFormatter(logging.Formatter(
'%(asctime)s [ROUTER] %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
))
logger.addHandler(file_handler)
logger.debug("VERBOSE_DEBUG enabled for router.py")
except Exception as e:
logger.debug(f"File logging failed: {e}")
))
logger.addHandler(console_handler)
cortex_router = APIRouter()
@@ -57,6 +44,7 @@ class ReasonRequest(BaseModel):
session_id: str
user_prompt: str
temperature: float | None = None
backend: str | None = None
# -------------------------------------------------------------------
@@ -64,40 +52,36 @@ class ReasonRequest(BaseModel):
# -------------------------------------------------------------------
@cortex_router.post("/reason")
async def run_reason(req: ReasonRequest):
from datetime import datetime
pipeline_start = datetime.now()
stage_timings = {}
if VERBOSE_DEBUG:
logger.debug(f"\n{'='*80}")
logger.debug(f"[PIPELINE START] Session: {req.session_id}")
logger.debug(f"[PIPELINE START] User prompt: {req.user_prompt[:200]}...")
logger.debug(f"{'='*80}\n")
# Show pipeline start in detailed/verbose mode
if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
logger.info(f"\n{'='*100}")
logger.info(f"🚀 PIPELINE START | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
logger.info(f"{'='*100}")
logger.info(f"📝 User: {req.user_prompt[:150]}...")
logger.info(f"{'-'*100}\n")
# ----------------------------------------------------------------
# STAGE 0 — Context
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 0] Collecting unified context...")
stage_start = datetime.now()
context_state = await collect_context(req.session_id, req.user_prompt)
if VERBOSE_DEBUG:
logger.debug(f"[STAGE 0] Context collected - {len(context_state.get('rag', []))} RAG results")
stage_timings["context"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 0.5 — Identity
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 0.5] Loading identity block...")
stage_start = datetime.now()
identity_block = load_identity(req.session_id)
if VERBOSE_DEBUG:
logger.debug(f"[STAGE 0.5] Identity loaded: {identity_block.get('name', 'Unknown')}")
stage_timings["identity"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 0.6 — Inner Monologue (observer-only)
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 0.6] Running inner monologue...")
stage_start = datetime.now()
inner_result = None
try:
@@ -111,21 +95,22 @@ async def run_reason(req: ReasonRequest):
}
inner_result = await inner_monologue.process(mono_context)
logger.info(f"[INNER_MONOLOGUE] {inner_result}")
logger.info(f"🧠 Monologue | {inner_result.get('intent', 'unknown')} | Tone: {inner_result.get('tone', 'neutral')}")
# Store in context for downstream use
context_state["monologue"] = inner_result
except Exception as e:
logger.warning(f"[INNER_MONOLOGUE] failed: {e}")
logger.warning(f"⚠️ Monologue failed: {e}")
stage_timings["monologue"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 0.7 — Executive Planning (conditional)
# ----------------------------------------------------------------
stage_start = datetime.now()
executive_plan = None
if inner_result and inner_result.get("consult_executive"):
if VERBOSE_DEBUG:
logger.debug("[STAGE 0.7] Executive consultation requested...")
try:
from autonomy.executive.planner import plan_execution
@@ -135,21 +120,22 @@ async def run_reason(req: ReasonRequest):
context_state=context_state,
identity_block=identity_block
)
logger.info(f"[EXECUTIVE] Generated plan: {executive_plan.get('summary', 'N/A')}")
logger.info(f"🎯 Executive plan: {executive_plan.get('summary', 'N/A')[:80]}...")
except Exception as e:
logger.warning(f"[EXECUTIVE] Planning failed: {e}")
logger.warning(f"⚠️ Executive planning failed: {e}")
executive_plan = None
stage_timings["executive"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 0.8 — Autonomous Tool Invocation
# ----------------------------------------------------------------
stage_start = datetime.now()
tool_results = None
autonomous_enabled = os.getenv("ENABLE_AUTONOMOUS_TOOLS", "true").lower() == "true"
tool_confidence_threshold = float(os.getenv("AUTONOMOUS_TOOL_CONFIDENCE_THRESHOLD", "0.6"))
if autonomous_enabled and inner_result:
if VERBOSE_DEBUG:
logger.debug("[STAGE 0.8] Analyzing autonomous tool needs...")
try:
from autonomy.tools.decision_engine import ToolDecisionEngine
@@ -176,22 +162,25 @@ async def run_reason(req: ReasonRequest):
tool_context = orchestrator.format_results_for_context(tool_results)
context_state["autonomous_tool_results"] = tool_context
if VERBOSE_DEBUG:
summary = tool_results.get("execution_summary", {})
logger.debug(f"[STAGE 0.8] Tools executed: {summary.get('successful', [])} succeeded")
logger.info(f"🛠️ Tools executed: {summary.get('successful', [])} succeeded")
else:
if VERBOSE_DEBUG:
logger.debug(f"[STAGE 0.8] No tools invoked (confidence: {tool_decision.get('confidence', 0):.2f})")
logger.info(f"🛠️ No tools invoked (confidence: {tool_decision.get('confidence', 0):.2f})")
except Exception as e:
logger.warning(f"[STAGE 0.8] Autonomous tool invocation failed: {e}")
if VERBOSE_DEBUG:
logger.warning(f"⚠️ Autonomous tool invocation failed: {e}")
if LOG_DETAIL_LEVEL == "verbose":
import traceback
traceback.print_exc()
stage_timings["tools"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 1 — Intake summary
# STAGE 1-5Core Reasoning Pipeline
# ----------------------------------------------------------------
stage_start = datetime.now()
# Extract intake summary
intake_summary = "(no context available)"
if context_state.get("intake"):
l20 = context_state["intake"].get("L20")
@@ -200,65 +189,46 @@ async def run_reason(req: ReasonRequest):
elif isinstance(l20, str):
intake_summary = l20
if VERBOSE_DEBUG:
logger.debug(f"[STAGE 1] Intake summary extracted (L20): {intake_summary[:150]}...")
# ----------------------------------------------------------------
# STAGE 2 — Reflection
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 2] Running reflection...")
# Reflection
try:
reflection = await reflect_notes(intake_summary, identity_block=identity_block)
reflection_notes = reflection.get("notes", [])
except Exception as e:
reflection_notes = []
if VERBOSE_DEBUG:
logger.debug(f"[STAGE 2] Reflection failed: {e}")
logger.warning(f"⚠️ Reflection failed: {e}")
# ----------------------------------------------------------------
# STAGE 3 — Reasoning (draft)
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 3] Running reasoning (draft)...")
stage_timings["reflection"] = (datetime.now() - stage_start).total_seconds() * 1000
# Reasoning (draft)
stage_start = datetime.now()
draft = await reason_check(
req.user_prompt,
identity_block=identity_block,
rag_block=context_state.get("rag", []),
reflection_notes=reflection_notes,
context=context_state,
monologue=inner_result, # NEW: Pass monologue guidance
executive_plan=executive_plan # NEW: Pass executive plan
monologue=inner_result,
executive_plan=executive_plan
)
stage_timings["reasoning"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 4 — Refinement
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 4] Running refinement...")
# Refinement
stage_start = datetime.now()
result = await refine_answer(
draft_output=draft,
reflection_notes=reflection_notes,
identity_block=identity_block,
rag_block=context_state.get("rag", []),
)
final_neutral = result["final_output"]
stage_timings["refinement"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 5 — Persona
# ----------------------------------------------------------------
if VERBOSE_DEBUG:
logger.debug("[STAGE 5] Applying persona layer...")
# Extract tone and depth from monologue for persona guidance
# Persona
stage_start = datetime.now()
tone = inner_result.get("tone", "neutral") if inner_result else "neutral"
depth = inner_result.get("depth", "medium") if inner_result else "medium"
persona_answer = await speak(final_neutral, tone=tone, depth=depth)
stage_timings["persona"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 6 — Session update
@@ -268,6 +238,7 @@ async def run_reason(req: ReasonRequest):
# ----------------------------------------------------------------
# STAGE 6.5 — Self-state update & Pattern Learning
# ----------------------------------------------------------------
stage_start = datetime.now()
try:
from autonomy.self.analyzer import analyze_and_update_state
await analyze_and_update_state(
@@ -277,9 +248,8 @@ async def run_reason(req: ReasonRequest):
context=context_state
)
except Exception as e:
logger.warning(f"[SELF_STATE] Update failed: {e}")
logger.warning(f"⚠️ Self-state update failed: {e}")
# Pattern learning
try:
from autonomy.learning.pattern_learner import get_pattern_learner
learner = get_pattern_learner()
@@ -290,11 +260,14 @@ async def run_reason(req: ReasonRequest):
context=context_state
)
except Exception as e:
logger.warning(f"[PATTERN_LEARNER] Learning failed: {e}")
logger.warning(f"⚠️ Pattern learning failed: {e}")
stage_timings["learning"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# STAGE 7 — Proactive Monitoring & Suggestions
# ----------------------------------------------------------------
stage_start = datetime.now()
proactive_enabled = os.getenv("ENABLE_PROACTIVE_MONITORING", "true").lower() == "true"
proactive_min_priority = float(os.getenv("PROACTIVE_SUGGESTION_MIN_PRIORITY", "0.6"))
@@ -303,7 +276,7 @@ async def run_reason(req: ReasonRequest):
from autonomy.proactive.monitor import get_proactive_monitor
monitor = get_proactive_monitor(min_priority=proactive_min_priority)
self_state = load_self_state() # Already imported at top of file
self_state = load_self_state()
suggestion = await monitor.analyze_session(
session_id=req.session_id,
@@ -311,22 +284,35 @@ async def run_reason(req: ReasonRequest):
self_state=self_state
)
# Append suggestion to response if exists
if suggestion:
suggestion_text = monitor.format_suggestion(suggestion)
persona_answer += suggestion_text
if VERBOSE_DEBUG:
logger.debug(f"[STAGE 7] Proactive suggestion added: {suggestion['type']} (priority: {suggestion['priority']:.2f})")
logger.info(f"💡 Proactive suggestion: {suggestion['type']} (priority: {suggestion['priority']:.2f})")
except Exception as e:
logger.warning(f"[STAGE 7] Proactive monitoring failed: {e}")
logger.warning(f"⚠️ Proactive monitoring failed: {e}")
if VERBOSE_DEBUG:
logger.debug(f"\n{'='*80}")
logger.debug(f"[PIPELINE COMPLETE] Session: {req.session_id}")
logger.debug(f"[PIPELINE COMPLETE] Final answer length: {len(persona_answer)} chars")
logger.debug(f"{'='*80}\n")
stage_timings["proactive"] = (datetime.now() - stage_start).total_seconds() * 1000
# ----------------------------------------------------------------
# PIPELINE COMPLETE — Summary
# ----------------------------------------------------------------
total_duration = (datetime.now() - pipeline_start).total_seconds() * 1000
# Always show pipeline completion
logger.info(f"\n{'='*100}")
logger.info(f"✨ PIPELINE COMPLETE | Session: {req.session_id} | Total: {total_duration:.0f}ms")
logger.info(f"{'='*100}")
# Show timing breakdown in detailed/verbose mode
if LOG_DETAIL_LEVEL in ["detailed", "verbose"]:
logger.info("⏱️ Stage Timings:")
for stage, duration in stage_timings.items():
pct = (duration / total_duration) * 100 if total_duration > 0 else 0
logger.info(f" {stage:15s}: {duration:6.0f}ms ({pct:5.1f}%)")
logger.info(f"📤 Output: {len(persona_answer)} chars")
logger.info(f"{'='*100}\n")
# ----------------------------------------------------------------
# RETURN
@@ -346,6 +332,122 @@ async def run_reason(req: ReasonRequest):
}
# -------------------------------------------------------------------
# /simple endpoint - Standard chatbot mode (no reasoning pipeline)
# -------------------------------------------------------------------
@cortex_router.post("/simple")
async def run_simple(req: ReasonRequest):
"""
Standard chatbot mode - bypasses all cortex reasoning pipeline.
Just a simple conversation loop like a typical chatbot.
"""
from datetime import datetime
from llm.llm_router import call_llm
start_time = datetime.now()
logger.info(f"\n{'='*100}")
logger.info(f"💬 SIMPLE MODE | Session: {req.session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
logger.info(f"{'='*100}")
logger.info(f"📝 User: {req.user_prompt[:150]}...")
logger.info(f"{'-'*100}\n")
# Get conversation history from context and intake buffer
context_state = await collect_context(req.session_id, req.user_prompt)
# Get recent messages from Intake buffer
from intake.intake import get_recent_messages
recent_msgs = get_recent_messages(req.session_id, limit=20)
logger.info(f"📋 Retrieved {len(recent_msgs)} recent messages from Intake buffer")
# Build simple conversation history with system message
system_message = {
"role": "system",
"content": (
"You are a helpful AI assistant. Provide direct, concise responses to the user's questions. "
"Maintain context from previous messages in the conversation."
)
}
messages = [system_message]
# Add conversation history
if recent_msgs:
for msg in recent_msgs:
messages.append({
"role": msg.get("role", "user"),
"content": msg.get("content", "")
})
logger.info(f" - {msg.get('role')}: {msg.get('content', '')[:50]}...")
# Add current user message
messages.append({
"role": "user",
"content": req.user_prompt
})
logger.info(f"📨 Total messages being sent to LLM: {len(messages)} (including system message)")
# Get backend from request, otherwise fall back to env variable
backend = req.backend if req.backend else os.getenv("STANDARD_MODE_LLM", "SECONDARY")
backend = backend.upper() # Normalize to uppercase
logger.info(f"🔧 Using backend: {backend}")
temperature = req.temperature if req.temperature is not None else 0.7
# Direct LLM call with messages (works for Ollama/OpenAI chat APIs)
try:
raw_response = await call_llm(
messages=messages,
backend=backend,
temperature=temperature,
max_tokens=2048
)
# Clean response - just strip whitespace
response = raw_response.strip()
except Exception as e:
logger.error(f"❌ LLM call failed: {e}")
response = f"Error: {str(e)}"
# Update session with the exchange
try:
update_last_assistant_message(req.session_id, response)
add_exchange_internal({
"session_id": req.session_id,
"role": "user",
"content": req.user_prompt
})
add_exchange_internal({
"session_id": req.session_id,
"role": "assistant",
"content": response
})
except Exception as e:
logger.warning(f"⚠️ Session update failed: {e}")
duration = (datetime.now() - start_time).total_seconds() * 1000
logger.info(f"\n{'='*100}")
logger.info(f"✨ SIMPLE MODE COMPLETE | Session: {req.session_id} | Total: {duration:.0f}ms")
logger.info(f"📤 Output: {len(response)} chars")
logger.info(f"{'='*100}\n")
return {
"draft": response,
"neutral": response,
"persona": response,
"reflection": "",
"session_id": req.session_id,
"context_summary": {
"message_count": len(messages),
"mode": "standard"
}
}
# -------------------------------------------------------------------
# /ingest endpoint (internal)
# -------------------------------------------------------------------
+223
View File
@@ -0,0 +1,223 @@
"""
Structured logging utilities for Cortex pipeline debugging.
Provides hierarchical, scannable logs with clear section markers and raw data visibility.
"""
import json
import logging
from typing import Any, Dict, List, Optional
from datetime import datetime
from enum import Enum
class LogLevel(Enum):
"""Log detail levels"""
MINIMAL = 1 # Only errors and final results
SUMMARY = 2 # Stage summaries + errors
DETAILED = 3 # Include raw LLM outputs, RAG results
VERBOSE = 4 # Everything including intermediate states
class PipelineLogger:
"""
Hierarchical logger for cortex pipeline debugging.
Features:
- Clear visual section markers
- Collapsible detail sections
- Raw data dumps with truncation options
- Stage timing
- Error highlighting
"""
def __init__(self, logger: logging.Logger, level: LogLevel = LogLevel.SUMMARY):
self.logger = logger
self.level = level
self.stage_timings = {}
self.current_stage = None
self.stage_start_time = None
self.pipeline_start_time = None
def pipeline_start(self, session_id: str, user_prompt: str):
"""Mark the start of a pipeline run"""
self.pipeline_start_time = datetime.now()
self.stage_timings = {}
if self.level.value >= LogLevel.SUMMARY.value:
self.logger.info(f"\n{'='*100}")
self.logger.info(f"🚀 PIPELINE START | Session: {session_id} | {datetime.now().strftime('%H:%M:%S.%f')[:-3]}")
self.logger.info(f"{'='*100}")
if self.level.value >= LogLevel.DETAILED.value:
self.logger.info(f"📝 User prompt: {user_prompt[:200]}{'...' if len(user_prompt) > 200 else ''}")
self.logger.info(f"{'-'*100}\n")
def stage_start(self, stage_name: str, description: str = ""):
"""Mark the start of a pipeline stage"""
self.current_stage = stage_name
self.stage_start_time = datetime.now()
if self.level.value >= LogLevel.SUMMARY.value:
timestamp = datetime.now().strftime('%H:%M:%S.%f')[:-3]
desc_suffix = f" - {description}" if description else ""
self.logger.info(f"▶️ [{stage_name}]{desc_suffix} | {timestamp}")
def stage_end(self, result_summary: str = ""):
"""Mark the end of a pipeline stage"""
if self.current_stage and self.stage_start_time:
duration_ms = (datetime.now() - self.stage_start_time).total_seconds() * 1000
self.stage_timings[self.current_stage] = duration_ms
if self.level.value >= LogLevel.SUMMARY.value:
summary_suffix = f"{result_summary}" if result_summary else ""
self.logger.info(f"✅ [{self.current_stage}] Complete in {duration_ms:.0f}ms{summary_suffix}\n")
self.current_stage = None
self.stage_start_time = None
def log_llm_call(self, backend: str, prompt: str, response: Any, raw_response: str = None):
"""
Log LLM call details with proper formatting.
Args:
backend: Backend name (PRIMARY, SECONDARY, etc.)
prompt: Input prompt to LLM
response: Parsed response object
raw_response: Raw JSON response string
"""
if self.level.value >= LogLevel.DETAILED.value:
self.logger.info(f" 🧠 LLM Call | Backend: {backend}")
# Show prompt (truncated)
if isinstance(prompt, list):
prompt_preview = prompt[-1].get('content', '')[:150] if prompt else ''
else:
prompt_preview = str(prompt)[:150]
self.logger.info(f" Prompt: {prompt_preview}...")
# Show parsed response
if isinstance(response, dict):
response_text = (
response.get('reply') or
response.get('message', {}).get('content') or
str(response)
)[:200]
else:
response_text = str(response)[:200]
self.logger.info(f" Response: {response_text}...")
# Show raw response in collapsible block
if raw_response and self.level.value >= LogLevel.VERBOSE.value:
self.logger.debug(f" ╭─ RAW RESPONSE ────────────────────────────────────")
for line in raw_response.split('\n')[:50]: # Limit to 50 lines
self.logger.debug(f"{line}")
if raw_response.count('\n') > 50:
self.logger.debug(f" │ ... ({raw_response.count(chr(10)) - 50} more lines)")
self.logger.debug(f" ╰───────────────────────────────────────────────────\n")
def log_rag_results(self, results: List[Dict[str, Any]]):
"""Log RAG/NeoMem results in scannable format"""
if self.level.value >= LogLevel.SUMMARY.value:
self.logger.info(f" 📚 RAG Results: {len(results)} memories retrieved")
if self.level.value >= LogLevel.DETAILED.value and results:
self.logger.info(f" ╭─ MEMORY SCORES ───────────────────────────────────")
for idx, result in enumerate(results[:10], 1): # Show top 10
score = result.get("score", 0)
data_preview = str(result.get("payload", {}).get("data", ""))[:80]
self.logger.info(f" │ [{idx}] {score:.3f} | {data_preview}...")
if len(results) > 10:
self.logger.info(f" │ ... and {len(results) - 10} more results")
self.logger.info(f" ╰───────────────────────────────────────────────────")
def log_context_state(self, context_state: Dict[str, Any]):
"""Log context state summary"""
if self.level.value >= LogLevel.SUMMARY.value:
msg_count = context_state.get("message_count", 0)
minutes_since = context_state.get("minutes_since_last_msg", 0)
rag_count = len(context_state.get("rag", []))
self.logger.info(f" 📊 Context | Messages: {msg_count} | Last: {minutes_since:.1f}min ago | RAG: {rag_count} results")
if self.level.value >= LogLevel.DETAILED.value:
intake = context_state.get("intake", {})
if intake:
self.logger.info(f" ╭─ INTAKE SUMMARIES ────────────────────────────────")
for level in ["L1", "L5", "L10", "L20", "L30"]:
if level in intake:
summary = intake[level]
if isinstance(summary, dict):
summary = summary.get("summary", str(summary)[:100])
else:
summary = str(summary)[:100]
self.logger.info(f"{level}: {summary}...")
self.logger.info(f" ╰───────────────────────────────────────────────────")
def log_error(self, stage: str, error: Exception, critical: bool = False):
"""Log an error with context"""
level_marker = "🔴 CRITICAL" if critical else "⚠️ WARNING"
self.logger.error(f"{level_marker} | Stage: {stage} | Error: {type(error).__name__}: {str(error)}")
if self.level.value >= LogLevel.VERBOSE.value:
import traceback
self.logger.debug(f" Traceback:\n{traceback.format_exc()}")
def log_raw_data(self, label: str, data: Any, max_lines: int = 30):
"""Log raw data in a collapsible format"""
if self.level.value >= LogLevel.VERBOSE.value:
self.logger.debug(f" ╭─ {label.upper()} ──────────────────────────────────")
if isinstance(data, (dict, list)):
json_str = json.dumps(data, indent=2, default=str)
lines = json_str.split('\n')
for line in lines[:max_lines]:
self.logger.debug(f"{line}")
if len(lines) > max_lines:
self.logger.debug(f" │ ... ({len(lines) - max_lines} more lines)")
else:
lines = str(data).split('\n')
for line in lines[:max_lines]:
self.logger.debug(f"{line}")
if len(lines) > max_lines:
self.logger.debug(f" │ ... ({len(lines) - max_lines} more lines)")
self.logger.debug(f" ╰───────────────────────────────────────────────────")
def pipeline_end(self, session_id: str, final_output_length: int):
"""Mark the end of pipeline run with summary"""
if self.pipeline_start_time:
total_duration_ms = (datetime.now() - self.pipeline_start_time).total_seconds() * 1000
if self.level.value >= LogLevel.SUMMARY.value:
self.logger.info(f"\n{'='*100}")
self.logger.info(f"✨ PIPELINE COMPLETE | Session: {session_id} | Total: {total_duration_ms:.0f}ms")
self.logger.info(f"{'='*100}")
# Show timing breakdown
if self.stage_timings and self.level.value >= LogLevel.DETAILED.value:
self.logger.info("⏱️ Stage Timings:")
for stage, duration in self.stage_timings.items():
pct = (duration / total_duration_ms) * 100 if total_duration_ms > 0 else 0
self.logger.info(f" {stage:20s}: {duration:6.0f}ms ({pct:5.1f}%)")
self.logger.info(f"📤 Final output: {final_output_length} characters")
self.logger.info(f"{'='*100}\n")
def get_log_level_from_env() -> LogLevel:
"""Parse log level from environment variable"""
import os
verbose_debug = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
detail_level = os.getenv("LOG_DETAIL_LEVEL", "").lower()
if detail_level == "minimal":
return LogLevel.MINIMAL
elif detail_level == "summary":
return LogLevel.SUMMARY
elif detail_level == "detailed":
return LogLevel.DETAILED
elif detail_level == "verbose" or verbose_debug:
return LogLevel.VERBOSE
else:
return LogLevel.SUMMARY # Default
+64 -64
View File
@@ -10,75 +10,75 @@ volumes:
services:
# ============================================================
# NeoMem: Postgres
# ============================================================
neomem-postgres:
image: ankane/pgvector:v0.5.1
container_name: neomem-postgres
restart: unless-stopped
environment:
POSTGRES_USER: neomem
POSTGRES_PASSWORD: neomempass
POSTGRES_DB: neomem
volumes:
- ./volumes/postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U neomem -d neomem || exit 1"]
interval: 5s
timeout: 5s
retries: 10
networks:
- lyra_net
# # ============================================================
# # NeoMem: Postgres
# # ============================================================
# neomem-postgres:
# image: ankane/pgvector:v0.5.1
# container_name: neomem-postgres
# restart: unless-stopped
# environment:
# POSTGRES_USER: neomem
# POSTGRES_PASSWORD: neomempass
# POSTGRES_DB: neomem
# volumes:
# - ./volumes/postgres_data:/var/lib/postgresql/data
# ports:
# - "5432:5432"
# healthcheck:
# test: ["CMD-SHELL", "pg_isready -U neomem -d neomem || exit 1"]
# interval: 5s
# timeout: 5s
# retries: 10
# networks:
# - lyra_net
# ============================================================
# NeoMem: Neo4j Graph
# ============================================================
neomem-neo4j:
image: neo4j:5
container_name: neomem-neo4j
restart: unless-stopped
environment:
NEO4J_AUTH: "neo4j/neomemgraph"
NEO4JLABS_PLUGINS: '["graph-data-science"]'
volumes:
- ./volumes/neo4j_data:/data
ports:
- "7474:7474"
- "7687:7687"
healthcheck:
test: ["CMD-SHELL", "cypher-shell -u neo4j -p neomemgraph 'RETURN 1' || exit 1"]
interval: 10s
timeout: 10s
retries: 10
networks:
- lyra_net
# # ============================================================
# # NeoMem: Neo4j Graph
# # ============================================================
# neomem-neo4j:
# image: neo4j:5
# container_name: neomem-neo4j
# restart: unless-stopped
# environment:
# NEO4J_AUTH: "neo4j/neomemgraph"
# NEO4JLABS_PLUGINS: '["graph-data-science"]'
# volumes:
# - ./volumes/neo4j_data:/data
# ports:
# - "7474:7474"
# - "7687:7687"
# healthcheck:
# test: ["CMD-SHELL", "cypher-shell -u neo4j -p neomemgraph 'RETURN 1' || exit 1"]
# interval: 10s
# timeout: 10s
# retries: 10
# networks:
# - lyra_net
# ============================================================
# NeoMem API
# ============================================================
neomem-api:
build:
context: ./neomem
image: lyra-neomem:latest
container_name: neomem-api
restart: unless-stopped
env_file:
- ./neomem/.env
- ./.env
volumes:
- ./neomem_history:/app/history
ports:
- "7077:7077"
depends_on:
neomem-postgres:
condition: service_healthy
neomem-neo4j:
condition: service_healthy
networks:
- lyra_net
# neomem-api:
# build:
# context: ./neomem
# image: lyra-neomem:latest
# container_name: neomem-api
# restart: unless-stopped
# env_file:
# - ./neomem/.env
# - ./.env
# volumes:
# - ./neomem_history:/app/history
# ports:
# - "7077:7077"
# depends_on:
# neomem-postgres:
# condition: service_healthy
# neomem-neo4j:
# condition: service_healthy
# networks:
# - lyra_net
# ============================================================
# Relay (host mode)