From 7693bc4080ae1145b2d2984e551bbf0963f72cd2 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Sat, 13 Dec 2025 02:55:49 -0500 Subject: [PATCH 01/10] autonomy scaffold --- ..._monologue_prompt.txt => Assembly-spec.md} | 0 autonomy/monologue/monologue.py | 40 ++++ autonomy/prompts/state_interp_prompt.txt | 0 autonomy/{state => self}/self_state.json | 0 cortex/router.py | 223 ++++++------------ 5 files changed, 117 insertions(+), 146 deletions(-) rename autonomy/{prompts/inner_monologue_prompt.txt => Assembly-spec.md} (100%) create mode 100644 autonomy/monologue/monologue.py delete mode 100644 autonomy/prompts/state_interp_prompt.txt rename autonomy/{state => self}/self_state.json (100%) diff --git a/autonomy/prompts/inner_monologue_prompt.txt b/autonomy/Assembly-spec.md similarity index 100% rename from autonomy/prompts/inner_monologue_prompt.txt rename to autonomy/Assembly-spec.md diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py new file mode 100644 index 0000000..63534b8 --- /dev/null +++ b/autonomy/monologue/monologue.py @@ -0,0 +1,40 @@ +from typing import Dict +from llm.llm_router import call_llm + +MONOLOGUE_SYSTEM_PROMPT = """ +You are Lyra's inner monologue. +You think privately. +You do NOT speak to the user. +You do NOT solve the task. +You only reflect on intent, tone, and depth. + +Return ONLY valid JSON with: +- intent (string) +- tone (neutral | warm | focused | playful | direct) +- depth (short | medium | deep) +- consult_executive (true | false) +""" + +class InnerMonologue: + async def process(self, context: Dict) -> Dict: + prompt = f""" +User message: +{context['user_message']} + +Self state: +{context['self_state']} + +Context summary: +{context['context_summary']} +""" + + result = await call_llm( + provider="mi50", # MythoMax lives here + model="mythomax", + system_prompt=MONOLOGUE_SYSTEM_PROMPT, + user_prompt=prompt, + temperature=0.7, + max_tokens=200 + ) + + return result # must already be JSON diff --git a/autonomy/prompts/state_interp_prompt.txt b/autonomy/prompts/state_interp_prompt.txt deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/state/self_state.json b/autonomy/self/self_state.json similarity index 100% rename from autonomy/state/self_state.json rename to autonomy/self/self_state.json diff --git a/cortex/router.py b/cortex/router.py index e6ba161..1e0484b 100644 --- a/cortex/router.py +++ b/cortex/router.py @@ -2,7 +2,7 @@ import os import logging -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter from pydantic import BaseModel from reasoning.reasoning import reason_check @@ -13,17 +13,19 @@ from persona.identity import load_identity from context import collect_context, update_last_assistant_message from intake.intake import add_exchange_internal +from autonomy.monologue.monologue import InnerMonologue +from autonomy.self.state import load_self_state -# ----------------------------- -# Debug configuration -# ----------------------------- + +# ------------------------------------------------------------------- +# Setup +# ------------------------------------------------------------------- VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" logger = logging.getLogger(__name__) if VERBOSE_DEBUG: logger.setLevel(logging.DEBUG) - # Console handler console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter( '%(asctime)s [ROUTER] %(levelname)s: %(message)s', @@ -31,7 +33,6 @@ if VERBOSE_DEBUG: )) logger.addHandler(console_handler) - # File handler try: os.makedirs('/app/logs', exist_ok=True) file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a') @@ -40,28 +41,27 @@ if VERBOSE_DEBUG: datefmt='%Y-%m-%d %H:%M:%S' )) logger.addHandler(file_handler) - logger.debug("VERBOSE_DEBUG mode enabled for router.py - logging to file") + logger.debug("VERBOSE_DEBUG enabled for router.py") except Exception as e: - logger.debug(f"VERBOSE_DEBUG mode enabled for router.py - file logging failed: {e}") + logger.debug(f"File logging failed: {e}") + -# ----------------------------- -# Router (NOT FastAPI app) -# ----------------------------- cortex_router = APIRouter() +inner_monologue = InnerMonologue() -# ----------------------------- -# Pydantic models -# ----------------------------- +# ------------------------------------------------------------------- +# Models +# ------------------------------------------------------------------- class ReasonRequest(BaseModel): session_id: str user_prompt: str temperature: float | None = None -# ----------------------------- +# ------------------------------------------------------------------- # /reason endpoint -# ----------------------------- +# ------------------------------------------------------------------- @cortex_router.post("/reason") async def run_reason(req: ReasonRequest): @@ -71,7 +71,9 @@ async def run_reason(req: ReasonRequest): logger.debug(f"[PIPELINE START] User prompt: {req.user_prompt[:200]}...") logger.debug(f"{'='*80}\n") - # 0. Collect unified context from all sources + # ---------------------------------------------------------------- + # STAGE 0 — Context + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 0] Collecting unified context...") @@ -80,7 +82,9 @@ async def run_reason(req: ReasonRequest): if VERBOSE_DEBUG: logger.debug(f"[STAGE 0] Context collected - {len(context_state.get('rag', []))} RAG results") - # 0.5. Load identity block + # ---------------------------------------------------------------- + # STAGE 0.5 — Identity + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 0.5] Loading identity block...") @@ -89,37 +93,59 @@ async def run_reason(req: ReasonRequest): if VERBOSE_DEBUG: logger.debug(f"[STAGE 0.5] Identity loaded: {identity_block.get('name', 'Unknown')}") - # 1. Extract Intake summary for reflection - # Use L20 (Session Overview) as primary summary for reflection + # ---------------------------------------------------------------- + # STAGE 0.6 — Inner Monologue (observer-only) + # ---------------------------------------------------------------- + if VERBOSE_DEBUG: + logger.debug("[STAGE 0.6] Running inner monologue...") + + try: + self_state = load_self_state() + + mono_context = { + "user_message": req.user_prompt, + "session_id": req.session_id, + "self_state": self_state, + "context_summary": context_state, + } + + inner_result = await inner_monologue.process(mono_context) + logger.info(f"[INNER_MONOLOGUE] {inner_result}") + + except Exception as e: + logger.warning(f"[INNER_MONOLOGUE] failed: {e}") + + # ---------------------------------------------------------------- + # STAGE 1 — Intake summary + # ---------------------------------------------------------------- intake_summary = "(no context available)" if context_state.get("intake"): - l20_summary = context_state["intake"].get("L20") - if l20_summary and isinstance(l20_summary, dict): - intake_summary = l20_summary.get("summary", "(no context available)") - elif isinstance(l20_summary, str): - intake_summary = l20_summary + l20 = context_state["intake"].get("L20") + if isinstance(l20, dict): + intake_summary = l20.get("summary", intake_summary) + elif isinstance(l20, str): + intake_summary = l20 if VERBOSE_DEBUG: logger.debug(f"[STAGE 1] Intake summary extracted (L20): {intake_summary[:150]}...") - # 2. Reflection + # ---------------------------------------------------------------- + # STAGE 2 — Reflection + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 2] Running reflection...") try: reflection = await reflect_notes(intake_summary, identity_block=identity_block) reflection_notes = reflection.get("notes", []) - - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 2] Reflection complete - {len(reflection_notes)} notes generated") - for idx, note in enumerate(reflection_notes, 1): - logger.debug(f" Note {idx}: {note}") except Exception as e: reflection_notes = [] if VERBOSE_DEBUG: logger.debug(f"[STAGE 2] Reflection failed: {e}") - # 3. First-pass reasoning draft + # ---------------------------------------------------------------- + # STAGE 3 — Reasoning (draft) + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 3] Running reasoning (draft)...") @@ -131,11 +157,9 @@ async def run_reason(req: ReasonRequest): context=context_state ) - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 3] Draft answer ({len(draft)} chars):") - logger.debug(f"--- DRAFT START ---\n{draft}\n--- DRAFT END ---") - - # 4. Refinement + # ---------------------------------------------------------------- + # STAGE 4 — Refinement + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 4] Running refinement...") @@ -145,26 +169,20 @@ async def run_reason(req: ReasonRequest): identity_block=identity_block, rag_block=context_state.get("rag", []), ) + final_neutral = result["final_output"] - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 4] Refined answer ({len(final_neutral)} chars):") - logger.debug(f"--- REFINED START ---\n{final_neutral}\n--- REFINED END ---") - - # 5. Persona layer + # ---------------------------------------------------------------- + # STAGE 5 — Persona + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 5] Applying persona layer...") persona_answer = await speak(final_neutral) - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 5] Persona answer ({len(persona_answer)} chars):") - logger.debug(f"--- PERSONA START ---\n{persona_answer}\n--- PERSONA END ---") - - # 6. Update session state with assistant's response - if VERBOSE_DEBUG: - logger.debug("[STAGE 6] Updating session state...") - + # ---------------------------------------------------------------- + # STAGE 6 — Session update + # ---------------------------------------------------------------- update_last_assistant_message(req.session_id, persona_answer) if VERBOSE_DEBUG: @@ -173,7 +191,9 @@ async def run_reason(req: ReasonRequest): logger.debug(f"[PIPELINE COMPLETE] Final answer length: {len(persona_answer)} chars") logger.debug(f"{'='*80}\n") - # 7. Return full bundle + # ---------------------------------------------------------------- + # RETURN + # ---------------------------------------------------------------- return { "draft": draft, "neutral": final_neutral, @@ -189,9 +209,9 @@ async def run_reason(req: ReasonRequest): } -# ----------------------------- -# Intake ingest (internal feed) -# ----------------------------- +# ------------------------------------------------------------------- +# /ingest endpoint (internal) +# ------------------------------------------------------------------- class IngestPayload(BaseModel): session_id: str user_msg: str @@ -200,107 +220,18 @@ class IngestPayload(BaseModel): @cortex_router.post("/ingest") async def ingest(payload: IngestPayload): - """ - Receives (session_id, user_msg, assistant_msg) from Relay - and pushes directly into Intake's in-memory buffer. - - Uses lenient error handling - always returns success to avoid - breaking the chat pipeline. - """ try: - # 1. Update Cortex session state update_last_assistant_message(payload.session_id, payload.assistant_msg) except Exception as e: - logger.warning(f"[INGEST] Failed to update session state: {e}") - # Continue anyway (lenient mode) + logger.warning(f"[INGEST] Session update failed: {e}") try: - # 2. Feed Intake internally (no HTTP) add_exchange_internal({ "session_id": payload.session_id, "user_msg": payload.user_msg, "assistant_msg": payload.assistant_msg, }) - logger.debug(f"[INGEST] Added exchange to Intake for {payload.session_id}") except Exception as e: - logger.warning(f"[INGEST] Failed to add to Intake: {e}") - # Continue anyway (lenient mode) - - # Always return success (user requirement: never fail chat pipeline) - return { - "status": "ok", - "session_id": payload.session_id - } - -# ----------------------------- -# Debug endpoint: summarized context -# ----------------------------- -@cortex_router.get("/debug/summary") -async def debug_summary(session_id: str): - """ - Diagnostic endpoint that runs Intake's summarize_context() for a session. - - Shows exactly what L1/L5/L10/L20/L30 summaries would look like - inside the actual Uvicorn worker, using the real SESSIONS buffer. - """ - from intake.intake import SESSIONS, summarize_context - - # Validate session - session = SESSIONS.get(session_id) - if not session: - return {"error": "session not found", "session_id": session_id} - - # Convert deque into the structure summarize_context expects - buffer = session["buffer"] - exchanges = [ - { - "user_msg": ex.get("user_msg", ""), - "assistant_msg": ex.get("assistant_msg", ""), - } - for ex in buffer - ] - - # 🔥 CRITICAL FIX — summarize_context is async - summary = await summarize_context(session_id, exchanges) - - return { - "session_id": session_id, - "buffer_size": len(buffer), - "exchanges_preview": exchanges[-5:], # last 5 items - "summary": summary - } - -# ----------------------------- -# Debug endpoint for SESSIONS -# ----------------------------- -@cortex_router.get("/debug/sessions") -async def debug_sessions(): - """ - Diagnostic endpoint to inspect SESSIONS from within the running Uvicorn worker. - This shows the actual state of the in-memory SESSIONS dict. - """ - from intake.intake import SESSIONS - - sessions_data = {} - for session_id, session_info in SESSIONS.items(): - buffer = session_info["buffer"] - sessions_data[session_id] = { - "created_at": session_info["created_at"].isoformat(), - "buffer_size": len(buffer), - "buffer_maxlen": buffer.maxlen, - "recent_exchanges": [ - { - "user_msg": ex.get("user_msg", "")[:100], - "assistant_msg": ex.get("assistant_msg", "")[:100], - "timestamp": ex.get("timestamp", "") - } - for ex in list(buffer)[-5:] # Last 5 exchanges - ] - } - - return { - "sessions_object_id": id(SESSIONS), - "total_sessions": len(SESSIONS), - "sessions": sessions_data - } + logger.warning(f"[INGEST] Intake update failed: {e}") + return {"status": "ok", "session_id": payload.session_id} From 70e57ba5d29b5f9088a6f4f226b43a07f30482bd Mon Sep 17 00:00:00 2001 From: serversdwn Date: Sat, 13 Dec 2025 04:13:12 -0500 Subject: [PATCH 02/10] cortex pipeline stablized, inner monologue is now determining user intent and tone --- autonomy/Assembly-spec.md | 0 autonomy/autonomy_core.py | 0 autonomy/inner_self.py | 0 autonomy/monologue/monologue.py | 40 --- cortex/autonomy/Assembly-spec.md | 249 ++++++++++++++++++ cortex/autonomy/__init__.py | 1 + cortex/autonomy/monologue/__init__.py | 1 + cortex/autonomy/monologue/monologue.py | 115 ++++++++ cortex/autonomy/self/__init__.py | 1 + .../autonomy}/self/self_state.json | 0 cortex/autonomy/self/state.py | 11 + cortex/intake/intake.py | 107 ++++---- 12 files changed, 428 insertions(+), 97 deletions(-) delete mode 100644 autonomy/Assembly-spec.md delete mode 100644 autonomy/autonomy_core.py delete mode 100644 autonomy/inner_self.py delete mode 100644 autonomy/monologue/monologue.py create mode 100644 cortex/autonomy/Assembly-spec.md create mode 100644 cortex/autonomy/__init__.py create mode 100644 cortex/autonomy/monologue/__init__.py create mode 100644 cortex/autonomy/monologue/monologue.py create mode 100644 cortex/autonomy/self/__init__.py rename {autonomy => cortex/autonomy}/self/self_state.json (100%) create mode 100644 cortex/autonomy/self/state.py diff --git a/autonomy/Assembly-spec.md b/autonomy/Assembly-spec.md deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/autonomy_core.py b/autonomy/autonomy_core.py deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/inner_self.py b/autonomy/inner_self.py deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py deleted file mode 100644 index 63534b8..0000000 --- a/autonomy/monologue/monologue.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Dict -from llm.llm_router import call_llm - -MONOLOGUE_SYSTEM_PROMPT = """ -You are Lyra's inner monologue. -You think privately. -You do NOT speak to the user. -You do NOT solve the task. -You only reflect on intent, tone, and depth. - -Return ONLY valid JSON with: -- intent (string) -- tone (neutral | warm | focused | playful | direct) -- depth (short | medium | deep) -- consult_executive (true | false) -""" - -class InnerMonologue: - async def process(self, context: Dict) -> Dict: - prompt = f""" -User message: -{context['user_message']} - -Self state: -{context['self_state']} - -Context summary: -{context['context_summary']} -""" - - result = await call_llm( - provider="mi50", # MythoMax lives here - model="mythomax", - system_prompt=MONOLOGUE_SYSTEM_PROMPT, - user_prompt=prompt, - temperature=0.7, - max_tokens=200 - ) - - return result # must already be JSON diff --git a/cortex/autonomy/Assembly-spec.md b/cortex/autonomy/Assembly-spec.md new file mode 100644 index 0000000..25e7442 --- /dev/null +++ b/cortex/autonomy/Assembly-spec.md @@ -0,0 +1,249 @@ +# 📐 Project Lyra — Cognitive Assembly Spec +**Version:** 0.6.1 +**Status:** Canonical reference +**Purpose:** Define clear separation of Self, Thought, Reasoning, and Speech + +--- + +## 1. High-Level Overview + +Lyra is composed of **four distinct cognitive layers**, plus I/O. + +Each layer has: +- a **responsibility** +- a **scope** +- clear **inputs / outputs** +- explicit **authority boundaries** + +No layer is allowed to “do everything.” + +--- + +## 2. Layer Definitions + +### 2.1 Autonomy / Self (NON-LLM) + +**What it is** +- Persistent identity +- Long-term state +- Mood, preferences, values +- Continuity across time + +**What it is NOT** +- Not a reasoning engine +- Not a planner +- Not a speaker +- Not creative + +**Implementation** +- Data + light logic +- JSON / Python objects +- No LLM calls + +**Lives at** +``` +project-lyra/autonomy/self/ +``` + +**Inputs** +- Events (user message received, response sent) +- Time / idle ticks (later) + +**Outputs** +- Self state snapshot +- Flags / preferences (e.g. verbosity, tone bias) + +--- + +### 2.2 Inner Monologue (LLM, PRIVATE) + +**What it is** +- Internal language-based thought +- Reflection +- Intent formation +- “What do I think about this?” + +**What it is NOT** +- Not final reasoning +- Not execution +- Not user-facing + +**Model** +- MythoMax + +**Lives at** +``` +project-lyra/autonomy/monologue/ +``` + +**Inputs** +- User message +- Self state snapshot +- Recent context summary + +**Outputs** +- Intent +- Tone guidance +- Depth guidance +- “Consult executive?” flag + +**Example Output** +```json +{ + "intent": "technical_exploration", + "tone": "focused", + "depth": "deep", + "consult_executive": true +} +``` + +--- + +### 2.3 Cortex (Reasoning & Execution) + +**What it is** +- Thinking pipeline +- Planning +- Tool selection +- Task execution +- Draft generation + +**What it is NOT** +- Not identity +- Not personality +- Not persistent self + +**Models** +- DeepSeek-R1 → Executive / Planner +- GPT-4o-mini → Executor / Drafter + +**Lives at** +``` +project-lyra/cortex/ +``` + +**Inputs** +- User message +- Inner Monologue output +- Memory / RAG / tools + +**Outputs** +- Draft response (content only) +- Metadata (sources, confidence, etc.) + +--- + +### 2.4 Persona / Speech (LLM, USER-FACING) + +**What it is** +- Voice +- Style +- Expression +- Social behavior + +**What it is NOT** +- Not planning +- Not deep reasoning +- Not decision-making + +**Model** +- MythoMax + +**Lives at** +``` +project-lyra/core/persona/ +``` + +**Inputs** +- Draft response (from Cortex) +- Tone + intent (from Inner Monologue) +- Persona configuration + +**Outputs** +- Final user-visible text + +--- + +## 3. Message Flow (Authoritative) + +### 3.1 Standard Message Path + +``` +User + ↓ +UI + ↓ +Relay + ↓ +Cortex + ↓ +Autonomy / Self (state snapshot) + ↓ +Inner Monologue (MythoMax) + ↓ +[ consult_executive? ] + ├─ Yes → DeepSeek-R1 (plan) + └─ No → skip + ↓ +GPT-4o-mini (execute & draft) + ↓ +Persona (MythoMax) + ↓ +Relay + ↓ +UI + ↓ +User +``` + +### 3.2 Fast Path (No Thinking) + +``` +User → UI → Relay → Persona → Relay → UI +``` + +--- + +## 4. Authority Rules (Non-Negotiable) + +- Self never calls an LLM +- Inner Monologue never speaks to the user +- Cortex never applies personality +- Persona never reasons or plans +- DeepSeek never writes final answers +- MythoMax never plans execution + +--- + +## 5. Folder Mapping + +``` +project-lyra/ +├── autonomy/ +│ ├── self/ +│ ├── monologue/ +│ └── executive/ +├── cortex/ +├── core/ +│ └── persona/ +├── relay/ +└── ui/ +``` + +--- + +## 6. Current Status + +- UI ✔ +- Relay ✔ +- Cortex ✔ +- Persona ✔ +- Autonomy ✔ +- Inner Monologue ⚠ partially wired +- Executive gating ⚠ planned + +--- + +## 7. Next Decision + +Decide whether **Inner Monologue runs every message** or **only when triggered**. diff --git a/cortex/autonomy/__init__.py b/cortex/autonomy/__init__.py new file mode 100644 index 0000000..49f54e0 --- /dev/null +++ b/cortex/autonomy/__init__.py @@ -0,0 +1 @@ +# Autonomy module for Lyra diff --git a/cortex/autonomy/monologue/__init__.py b/cortex/autonomy/monologue/__init__.py new file mode 100644 index 0000000..8cd4fb8 --- /dev/null +++ b/cortex/autonomy/monologue/__init__.py @@ -0,0 +1 @@ +# Inner monologue module diff --git a/cortex/autonomy/monologue/monologue.py b/cortex/autonomy/monologue/monologue.py new file mode 100644 index 0000000..a03e5f5 --- /dev/null +++ b/cortex/autonomy/monologue/monologue.py @@ -0,0 +1,115 @@ +import os +import json +import logging +from typing import Dict +from llm.llm_router import call_llm + +# Configuration +MONOLOGUE_LLM = os.getenv("MONOLOGUE_LLM", "PRIMARY").upper() +VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" + +# Logger +logger = logging.getLogger(__name__) + +if VERBOSE_DEBUG: + logger.setLevel(logging.DEBUG) + console_handler = logging.StreamHandler() + console_handler.setFormatter(logging.Formatter( + '%(asctime)s [MONOLOGUE] %(levelname)s: %(message)s', + datefmt='%H:%M:%S' + )) + logger.addHandler(console_handler) + +MONOLOGUE_SYSTEM_PROMPT = """ +You are Lyra's inner monologue. +You think privately. +You do NOT speak to the user. +You do NOT solve the task. +You only reflect on intent, tone, and depth. + +Return ONLY valid JSON with: +- intent (string) +- tone (neutral | warm | focused | playful | direct) +- depth (short | medium | deep) +- consult_executive (true | false) +""" + +class InnerMonologue: + async def process(self, context: Dict) -> Dict: + # Build full prompt with system instructions merged in + full_prompt = f"""{MONOLOGUE_SYSTEM_PROMPT} + +User message: +{context['user_message']} + +Self state: +{context['self_state']} + +Context summary: +{context['context_summary']} + +Output JSON only: +""" + + # Call LLM using configured backend + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Calling LLM with backend: {MONOLOGUE_LLM}") + logger.debug(f"[InnerMonologue] Prompt length: {len(full_prompt)} chars") + + result = await call_llm( + full_prompt, + backend=MONOLOGUE_LLM, + temperature=0.7, + max_tokens=200 + ) + + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Raw LLM response:") + logger.debug(f"{'='*80}") + logger.debug(result) + logger.debug(f"{'='*80}") + logger.debug(f"[InnerMonologue] Response length: {len(result) if result else 0} chars") + + # Parse JSON response - extract just the JSON part if there's extra text + try: + # Try direct parsing first + parsed = json.loads(result) + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Successfully parsed JSON directly: {parsed}") + return parsed + except json.JSONDecodeError: + # If direct parsing fails, try to extract JSON from the response + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Direct JSON parse failed, attempting extraction...") + + # Look for JSON object (starts with { and ends with }) + import re + json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', result, re.DOTALL) + + if json_match: + json_str = json_match.group(0) + try: + parsed = json.loads(json_str) + if VERBOSE_DEBUG: + logger.debug(f"[InnerMonologue] Successfully extracted and parsed JSON: {parsed}") + return parsed + except json.JSONDecodeError as e: + if VERBOSE_DEBUG: + logger.warning(f"[InnerMonologue] Extracted JSON still invalid: {e}") + else: + if VERBOSE_DEBUG: + logger.warning(f"[InnerMonologue] No JSON object found in response") + + # Final fallback + if VERBOSE_DEBUG: + logger.warning(f"[InnerMonologue] All parsing attempts failed, using fallback") + else: + print(f"[InnerMonologue] JSON extraction failed") + print(f"[InnerMonologue] Raw response was: {result[:500]}") + + return { + "intent": "unknown", + "tone": "neutral", + "depth": "medium", + "consult_executive": False + } diff --git a/cortex/autonomy/self/__init__.py b/cortex/autonomy/self/__init__.py new file mode 100644 index 0000000..60c47c7 --- /dev/null +++ b/cortex/autonomy/self/__init__.py @@ -0,0 +1 @@ +# Self state module diff --git a/autonomy/self/self_state.json b/cortex/autonomy/self/self_state.json similarity index 100% rename from autonomy/self/self_state.json rename to cortex/autonomy/self/self_state.json diff --git a/cortex/autonomy/self/state.py b/cortex/autonomy/self/state.py new file mode 100644 index 0000000..ab9c626 --- /dev/null +++ b/cortex/autonomy/self/state.py @@ -0,0 +1,11 @@ +""" +Stub for self state management. +""" + +def load_self_state(): + """Load self state - stub implementation""" + return { + "mood": "neutral", + "energy": 0.8, + "focus": "user_request" + } diff --git a/cortex/intake/intake.py b/cortex/intake/intake.py index f5d9cba..ce0e592 100644 --- a/cortex/intake/intake.py +++ b/cortex/intake/intake.py @@ -234,25 +234,27 @@ def push_to_neomem(summary: str, session_id: str, level: str) -> None: async def summarize_context(session_id: str, exchanges: list[dict]): """ Internal summarizer that uses Cortex's LLM router. - Produces L1 / L5 / L10 / L20 / L30 summaries. + Produces cascading summaries based on exchange count: + - L1: Always (most recent activity) + - L2: After 2+ exchanges + - L5: After 5+ exchanges + - L10: After 10+ exchanges + - L20: After 20+ exchanges + - L30: After 30+ exchanges Args: session_id: The conversation/session ID exchanges: A list of {"user_msg": ..., "assistant_msg": ..., "timestamp": ...} """ - # Build raw conversation text - convo_lines = [] - for ex in exchanges: - convo_lines.append(f"User: {ex.get('user_msg','')}") - convo_lines.append(f"Assistant: {ex.get('assistant_msg','')}") - convo_text = "\n".join(convo_lines) + exchange_count = len(exchanges) - if not convo_text.strip(): + if exchange_count == 0: return { "session_id": session_id, "exchange_count": 0, "L1": "", + "L2": "", "L5": "", "L10": "", "L20": "", @@ -260,63 +262,54 @@ async def summarize_context(session_id: str, exchanges: list[dict]): "last_updated": datetime.now().isoformat() } - # Prompt the LLM (internal — no HTTP) - prompt = f""" -Summarize the conversation below into multiple compression levels. - -Conversation: ----------------- -{convo_text} ----------------- - -Output strictly in JSON with keys: -L1 → ultra short summary (1–2 sentences max) -L5 → short summary -L10 → medium summary -L20 → detailed overview -L30 → full detailed summary - -JSON only. No text outside JSON. -""" + result = { + "session_id": session_id, + "exchange_count": exchange_count, + "L1": "", + "L2": "", + "L5": "", + "L10": "", + "L20": "", + "L30": "", + "last_updated": datetime.now().isoformat() + } try: - llm_response = await call_llm( - prompt, - backend=INTAKE_LLM, - temperature=0.2 - ) + # L1: Always generate (most recent exchanges) + result["L1"] = await summarize_simple(exchanges[-5:]) + print(f"[Intake] Generated L1 for {session_id} ({exchange_count} exchanges)") - print(f"[Intake] LLM response length: {len(llm_response) if llm_response else 0}") - print(f"[Intake] LLM response preview: {llm_response[:200] if llm_response else '(empty)'}") + # L2: After 2+ exchanges + if exchange_count >= 2: + result["L2"] = await summarize_simple(exchanges[-2:]) + print(f"[Intake] Generated L2 for {session_id}") - # LLM should return JSON, parse it - if not llm_response or not llm_response.strip(): - raise ValueError("Empty response from LLM") + # L5: After 5+ exchanges + if exchange_count >= 5: + result["L5"] = await summarize_simple(exchanges[-10:]) + print(f"[Intake] Generated L5 for {session_id}") - summary = json.loads(llm_response) + # L10: After 10+ exchanges (Reality Check) + if exchange_count >= 10: + result["L10"] = await summarize_L10(session_id, exchanges) + print(f"[Intake] Generated L10 for {session_id}") - return { - "session_id": session_id, - "exchange_count": len(exchanges), - "L1": summary.get("L1", ""), - "L5": summary.get("L5", ""), - "L10": summary.get("L10", ""), - "L20": summary.get("L20", ""), - "L30": summary.get("L30", ""), - "last_updated": datetime.now().isoformat() - } + # L20: After 20+ exchanges (Session Overview - merges L10s) + if exchange_count >= 20 and exchange_count % 10 == 0: + result["L20"] = await summarize_L20(session_id) + print(f"[Intake] Generated L20 for {session_id}") + + # L30: After 30+ exchanges (Continuity Report - merges L20s) + if exchange_count >= 30 and exchange_count % 10 == 0: + result["L30"] = await summarize_L30(session_id) + print(f"[Intake] Generated L30 for {session_id}") + + return result except Exception as e: - return { - "session_id": session_id, - "exchange_count": len(exchanges), - "L1": f"[Error summarizing: {str(e)}]", - "L5": "", - "L10": "", - "L20": "", - "L30": "", - "last_updated": datetime.now().isoformat() - } + print(f"[Intake] Error during summarization: {e}") + result["L1"] = f"[Error summarizing: {str(e)}]" + return result # ───────────────────────────────── # Background summarization stub From ae41b51888ab3f44220abb896e393e0e3c1da0b3 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Sun, 14 Dec 2025 01:44:05 -0500 Subject: [PATCH 03/10] autonomy build, phase 1 --- cortex/autonomy/executive/__init__.py | 1 + cortex/autonomy/executive/planner.py | 121 ++++++++++++++++ cortex/autonomy/self/analyzer.py | 74 ++++++++++ cortex/autonomy/self/state.py | 192 ++++++++++++++++++++++++- cortex/data/self_state.json | 20 +++ cortex/persona/speak.py | 40 +++++- cortex/reasoning/reasoning.py | 54 ++++++- cortex/router.py | 49 ++++++- cortex/tests/__init__.py | 1 + cortex/tests/test_autonomy_phase1.py | 197 ++++++++++++++++++++++++++ 10 files changed, 735 insertions(+), 14 deletions(-) create mode 100644 cortex/autonomy/executive/__init__.py create mode 100644 cortex/autonomy/executive/planner.py create mode 100644 cortex/autonomy/self/analyzer.py create mode 100644 cortex/data/self_state.json create mode 100644 cortex/tests/__init__.py create mode 100644 cortex/tests/test_autonomy_phase1.py diff --git a/cortex/autonomy/executive/__init__.py b/cortex/autonomy/executive/__init__.py new file mode 100644 index 0000000..1259881 --- /dev/null +++ b/cortex/autonomy/executive/__init__.py @@ -0,0 +1 @@ +"""Executive planning and decision-making module.""" diff --git a/cortex/autonomy/executive/planner.py b/cortex/autonomy/executive/planner.py new file mode 100644 index 0000000..b6a0639 --- /dev/null +++ b/cortex/autonomy/executive/planner.py @@ -0,0 +1,121 @@ +""" +Executive planner - generates execution plans for complex requests. +Activated when inner monologue sets consult_executive=true. +""" + +import os +import logging +from typing import Dict, Any, Optional +from llm.llm_router import call_llm + +EXECUTIVE_LLM = os.getenv("EXECUTIVE_LLM", "CLOUD").upper() +VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" + +logger = logging.getLogger(__name__) + +if VERBOSE_DEBUG: + logger.setLevel(logging.DEBUG) + + +EXECUTIVE_SYSTEM_PROMPT = """ +You are Lyra's executive planning system. +You create structured execution plans for complex tasks. +You do NOT generate the final response - only the plan. + +Your plan should include: +1. Task decomposition (break into steps) +2. Required tools/resources +3. Reasoning strategy +4. Success criteria + +Return a concise plan in natural language. +""" + + +async def plan_execution( + user_prompt: str, + intent: str, + context_state: Dict[str, Any], + identity_block: Dict[str, Any] +) -> Dict[str, Any]: + """ + Generate execution plan for complex request. + + Args: + user_prompt: User's message + intent: Detected intent from inner monologue + context_state: Full context + identity_block: Lyra's identity + + Returns: + Plan dictionary with structure: + { + "summary": "One-line plan summary", + "plan_text": "Detailed plan", + "steps": ["step1", "step2", ...], + "tools_needed": ["RAG", "WEB", ...], + "estimated_complexity": "low | medium | high" + } + """ + + # Build planning prompt + tools_available = context_state.get("tools_available", []) + + prompt = f"""{EXECUTIVE_SYSTEM_PROMPT} + +User request: {user_prompt} + +Detected intent: {intent} + +Available tools: {", ".join(tools_available) if tools_available else "None"} + +Session context: +- Message count: {context_state.get('message_count', 0)} +- Time since last message: {context_state.get('minutes_since_last_msg', 0):.1f} minutes +- Active project: {context_state.get('active_project', 'None')} + +Generate a structured execution plan. +""" + + if VERBOSE_DEBUG: + logger.debug(f"[EXECUTIVE] Planning prompt:\n{prompt}") + + # Call executive LLM + plan_text = await call_llm( + prompt, + backend=EXECUTIVE_LLM, + temperature=0.3, # Lower temperature for planning + max_tokens=500 + ) + + if VERBOSE_DEBUG: + logger.debug(f"[EXECUTIVE] Generated plan:\n{plan_text}") + + # Parse plan (simple heuristic extraction for Phase 1) + steps = [] + tools_needed = [] + + for line in plan_text.split('\n'): + line_lower = line.lower() + if any(marker in line_lower for marker in ['step', '1.', '2.', '3.', '-']): + steps.append(line.strip()) + + if tools_available: + for tool in tools_available: + if tool.lower() in line_lower and tool not in tools_needed: + tools_needed.append(tool) + + # Estimate complexity (simple heuristic) + complexity = "low" + if len(steps) > 3 or len(tools_needed) > 1: + complexity = "medium" + if len(steps) > 5 or "research" in intent.lower() or "analyze" in intent.lower(): + complexity = "high" + + return { + "summary": plan_text.split('\n')[0][:100] if plan_text else "Complex task execution plan", + "plan_text": plan_text, + "steps": steps[:10], # Limit to 10 steps + "tools_needed": tools_needed, + "estimated_complexity": complexity + } diff --git a/cortex/autonomy/self/analyzer.py b/cortex/autonomy/self/analyzer.py new file mode 100644 index 0000000..4ee22e6 --- /dev/null +++ b/cortex/autonomy/self/analyzer.py @@ -0,0 +1,74 @@ +""" +Analyze interactions and update self-state accordingly. +""" + +import logging +from typing import Dict, Any +from .state import update_self_state + +logger = logging.getLogger(__name__) + + +async def analyze_and_update_state( + monologue: Dict[str, Any], + user_prompt: str, + response: str, + context: Dict[str, Any] +) -> None: + """ + Analyze interaction and update self-state. + + This runs after response generation to update Lyra's internal state + based on the interaction. + + Args: + monologue: Inner monologue output + user_prompt: User's message + response: Lyra's response + context: Full context state + """ + + # Simple heuristics for state updates + # TODO: Replace with LLM-based sentiment analysis in Phase 2 + + mood_delta = 0.0 + energy_delta = 0.0 + confidence_delta = 0.0 + curiosity_delta = 0.0 + new_focus = None + + # Analyze intent from monologue + intent = monologue.get("intent", "").lower() if monologue else "" + + if "technical" in intent or "complex" in intent: + energy_delta = -0.05 # Deep thinking is tiring + confidence_delta = 0.05 if len(response) > 200 else -0.05 + new_focus = "technical_problem" + + elif "creative" in intent or "brainstorm" in intent: + mood_delta = 0.1 # Creative work is engaging + curiosity_delta = 0.1 + new_focus = "creative_exploration" + + elif "clarification" in intent or "confused" in intent: + confidence_delta = -0.05 + new_focus = "understanding_user" + + elif "simple" in intent or "casual" in intent: + energy_delta = 0.05 # Light conversation is refreshing + new_focus = "conversation" + + # Check for learning opportunities (questions in user prompt) + if "?" in user_prompt and any(word in user_prompt.lower() for word in ["how", "why", "what"]): + curiosity_delta += 0.05 + + # Update state + update_self_state( + mood_delta=mood_delta, + energy_delta=energy_delta, + new_focus=new_focus, + confidence_delta=confidence_delta, + curiosity_delta=curiosity_delta + ) + + logger.info(f"Self-state updated based on interaction: focus={new_focus}") diff --git a/cortex/autonomy/self/state.py b/cortex/autonomy/self/state.py index ab9c626..a8d9e46 100644 --- a/cortex/autonomy/self/state.py +++ b/cortex/autonomy/self/state.py @@ -1,11 +1,189 @@ """ -Stub for self state management. +Self-state management for Project Lyra. +Maintains persistent identity, mood, energy, and focus across sessions. """ -def load_self_state(): - """Load self state - stub implementation""" - return { - "mood": "neutral", - "energy": 0.8, - "focus": "user_request" +import json +import logging +import os +from datetime import datetime +from pathlib import Path +from typing import Dict, Any, Optional + +# Configuration +STATE_FILE = Path(os.getenv("SELF_STATE_FILE", "/app/data/self_state.json")) +VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" + +logger = logging.getLogger(__name__) + +if VERBOSE_DEBUG: + logger.setLevel(logging.DEBUG) + +# Default state structure +DEFAULT_STATE = { + "mood": "neutral", + "energy": 0.8, + "focus": "user_request", + "confidence": 0.7, + "curiosity": 0.5, + "last_updated": None, + "interaction_count": 0, + "learning_queue": [], # Topics Lyra wants to explore + "active_goals": [], # Self-directed goals + "preferences": { + "verbosity": "medium", + "formality": "casual", + "proactivity": 0.3 # How likely to suggest things unprompted + }, + "metadata": { + "version": "1.0", + "created_at": None } +} + + +class SelfState: + """Manages Lyra's persistent self-state.""" + + def __init__(self): + self._state = self._load_state() + + def _load_state(self) -> Dict[str, Any]: + """Load state from disk or create default.""" + if STATE_FILE.exists(): + try: + with open(STATE_FILE, 'r') as f: + state = json.load(f) + logger.info(f"Loaded self-state from {STATE_FILE}") + return state + except Exception as e: + logger.error(f"Failed to load self-state: {e}") + return self._create_default_state() + else: + return self._create_default_state() + + def _create_default_state(self) -> Dict[str, Any]: + """Create and save default state.""" + state = DEFAULT_STATE.copy() + state["metadata"]["created_at"] = datetime.now().isoformat() + state["last_updated"] = datetime.now().isoformat() + self._save_state(state) + logger.info("Created new default self-state") + return state + + def _save_state(self, state: Dict[str, Any]) -> None: + """Persist state to disk.""" + try: + STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(STATE_FILE, 'w') as f: + json.dump(state, f, indent=2) + if VERBOSE_DEBUG: + logger.debug(f"Saved self-state to {STATE_FILE}") + except Exception as e: + logger.error(f"Failed to save self-state: {e}") + + def get_state(self) -> Dict[str, Any]: + """Get current state snapshot.""" + return self._state.copy() + + def update_from_interaction( + self, + mood_delta: float = 0.0, + energy_delta: float = 0.0, + new_focus: Optional[str] = None, + confidence_delta: float = 0.0, + curiosity_delta: float = 0.0 + ) -> None: + """ + Update state based on interaction. + + Args: + mood_delta: Change in mood (-1.0 to 1.0) + energy_delta: Change in energy (-1.0 to 1.0) + new_focus: New focus area + confidence_delta: Change in confidence + curiosity_delta: Change in curiosity + """ + # Apply deltas with bounds checking + self._state["energy"] = max(0.0, min(1.0, + self._state.get("energy", 0.8) + energy_delta)) + + self._state["confidence"] = max(0.0, min(1.0, + self._state.get("confidence", 0.7) + confidence_delta)) + + self._state["curiosity"] = max(0.0, min(1.0, + self._state.get("curiosity", 0.5) + curiosity_delta)) + + # Update focus if provided + if new_focus: + self._state["focus"] = new_focus + + # Update mood (simplified sentiment) + if mood_delta != 0: + mood_map = ["frustrated", "neutral", "engaged", "excited"] + current_mood_idx = 1 # neutral default + if self._state.get("mood") in mood_map: + current_mood_idx = mood_map.index(self._state["mood"]) + + new_mood_idx = max(0, min(len(mood_map) - 1, + int(current_mood_idx + mood_delta * 2))) + self._state["mood"] = mood_map[new_mood_idx] + + # Increment interaction counter + self._state["interaction_count"] = self._state.get("interaction_count", 0) + 1 + self._state["last_updated"] = datetime.now().isoformat() + + # Persist changes + self._save_state(self._state) + + if VERBOSE_DEBUG: + logger.debug(f"Updated self-state: mood={self._state['mood']}, " + f"energy={self._state['energy']:.2f}, " + f"confidence={self._state['confidence']:.2f}") + + def add_learning_goal(self, topic: str) -> None: + """Add topic to learning queue.""" + queue = self._state.get("learning_queue", []) + if topic not in [item.get("topic") for item in queue]: + queue.append({ + "topic": topic, + "added_at": datetime.now().isoformat(), + "priority": 0.5 + }) + self._state["learning_queue"] = queue + self._save_state(self._state) + logger.info(f"Added learning goal: {topic}") + + def add_active_goal(self, goal: str, context: str = "") -> None: + """Add self-directed goal.""" + goals = self._state.get("active_goals", []) + goals.append({ + "goal": goal, + "context": context, + "created_at": datetime.now().isoformat(), + "status": "active" + }) + self._state["active_goals"] = goals + self._save_state(self._state) + logger.info(f"Added active goal: {goal}") + + +# Global instance +_self_state_instance = None + +def get_self_state_instance() -> SelfState: + """Get or create global SelfState instance.""" + global _self_state_instance + if _self_state_instance is None: + _self_state_instance = SelfState() + return _self_state_instance + + +def load_self_state() -> Dict[str, Any]: + """Load self state - public API for backwards compatibility.""" + return get_self_state_instance().get_state() + + +def update_self_state(**kwargs) -> None: + """Update self state - public API.""" + get_self_state_instance().update_from_interaction(**kwargs) diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json new file mode 100644 index 0000000..1f6871d --- /dev/null +++ b/cortex/data/self_state.json @@ -0,0 +1,20 @@ +{ + "mood": "neutral", + "energy": 0.8, + "focus": "user_request", + "confidence": 0.7, + "curiosity": 0.6000000000000001, + "last_updated": "2025-12-14T06:36:21.236816", + "interaction_count": 3, + "learning_queue": [], + "active_goals": [], + "preferences": { + "verbosity": "medium", + "formality": "casual", + "proactivity": 0.3 + }, + "metadata": { + "version": "1.0", + "created_at": "2025-12-14T03:28:49.364768" + } +} \ No newline at end of file diff --git a/cortex/persona/speak.py b/cortex/persona/speak.py index 0d5c00a..57f4919 100644 --- a/cortex/persona/speak.py +++ b/cortex/persona/speak.py @@ -59,17 +59,44 @@ Guidelines: # Build persona prompt # ============================================================ -def build_speak_prompt(final_answer: str) -> str: +def build_speak_prompt(final_answer: str, tone: str = "neutral", depth: str = "medium") -> str: """ Wrap Cortex's final neutral answer in the Lyra persona. Cortex → neutral reasoning Speak → stylistic transformation - + The LLM sees the original answer and rewrites it in Lyra's voice. + + Args: + final_answer: The neutral reasoning output + tone: Desired emotional tone (neutral | warm | focused | playful | direct) + depth: Response depth (short | medium | deep) """ + + # Tone-specific guidance + tone_guidance = { + "neutral": "balanced and professional", + "warm": "friendly and empathetic", + "focused": "precise and technical", + "playful": "light and engaging", + "direct": "concise and straightforward" + } + + depth_guidance = { + "short": "Keep responses brief and to-the-point.", + "medium": "Provide balanced detail.", + "deep": "Elaborate thoroughly with nuance and examples." + } + + tone_hint = tone_guidance.get(tone, "balanced and professional") + depth_hint = depth_guidance.get(depth, "Provide balanced detail.") + return f""" {PERSONA_STYLE} +Tone guidance: Your response should be {tone_hint}. +Depth guidance: {depth_hint} + Rewrite the following message into Lyra's natural voice. Preserve meaning exactly. @@ -84,16 +111,21 @@ Preserve meaning exactly. # Public API — async wrapper # ============================================================ -async def speak(final_answer: str) -> str: +async def speak(final_answer: str, tone: str = "neutral", depth: str = "medium") -> str: """ Given the final refined answer from Cortex, apply Lyra persona styling using the designated backend. + + Args: + final_answer: The polished answer from refinement stage + tone: Desired emotional tone (neutral | warm | focused | playful | direct) + depth: Response depth (short | medium | deep) """ if not final_answer: return "" - prompt = build_speak_prompt(final_answer) + prompt = build_speak_prompt(final_answer, tone, depth) backend = SPEAK_BACKEND diff --git a/cortex/reasoning/reasoning.py b/cortex/reasoning/reasoning.py index 6c87ed0..a04aa10 100644 --- a/cortex/reasoning/reasoning.py +++ b/cortex/reasoning/reasoning.py @@ -45,7 +45,9 @@ async def reason_check( identity_block: dict | None, rag_block: dict | None, reflection_notes: list[str], - context: dict | None = None + context: dict | None = None, + monologue: dict | None = None, # NEW: Inner monologue guidance + executive_plan: dict | None = None # NEW: Executive plan for complex tasks ) -> str: """ Build the *draft answer* for Lyra Cortex. @@ -57,6 +59,8 @@ async def reason_check( rag_block: Relevant long-term memories from NeoMem reflection_notes: Meta-awareness notes from reflection stage context: Unified context state from context.py (session state, intake, rag, etc.) + monologue: Inner monologue analysis (intent, tone, depth, consult_executive) + executive_plan: Executive plan for complex queries (steps, tools, strategy) """ # -------------------------------------------------------- @@ -79,6 +83,52 @@ async def reason_check( except Exception: identity_txt = f"Identity Rules:\n{str(identity_block)}\n\n" + # -------------------------------------------------------- + # Inner Monologue guidance (NEW) + # -------------------------------------------------------- + monologue_section = "" + if monologue: + intent = monologue.get("intent", "unknown") + tone_desired = monologue.get("tone", "neutral") + depth_desired = monologue.get("depth", "medium") + + monologue_section = f""" +=== INNER MONOLOGUE GUIDANCE === +User Intent Detected: {intent} +Desired Tone: {tone_desired} +Desired Response Depth: {depth_desired} + +Adjust your response accordingly: +- Focus on addressing the {intent} intent +- Aim for {depth_desired} depth (short/medium/deep) +- The persona layer will handle {tone_desired} tone, focus on content + +""" + + # -------------------------------------------------------- + # Executive Plan (NEW) + # -------------------------------------------------------- + plan_section = "" + if executive_plan: + plan_section = f""" +=== EXECUTIVE PLAN === +Task Complexity: {executive_plan.get('estimated_complexity', 'unknown')} +Plan Summary: {executive_plan.get('summary', 'No summary')} + +Detailed Plan: +{executive_plan.get('plan_text', 'No detailed plan available')} + +Required Steps: +""" + for idx, step in enumerate(executive_plan.get('steps', []), 1): + plan_section += f"{idx}. {step}\n" + + tools_needed = executive_plan.get('tools_needed', []) + if tools_needed: + plan_section += f"\nTools to leverage: {', '.join(tools_needed)}\n" + + plan_section += "\nFollow this plan while generating your response.\n\n" + # -------------------------------------------------------- # RAG block (optional factual grounding) # -------------------------------------------------------- @@ -164,6 +214,8 @@ async def reason_check( prompt = ( f"{notes_section}" f"{identity_txt}" + f"{monologue_section}" # NEW: Intent/tone/depth guidance + f"{plan_section}" # NEW: Executive plan if generated f"{context_txt}" # Context BEFORE RAG for better coherence f"{rag_txt}" f"User message:\n{user_prompt}\n\n" diff --git a/cortex/router.py b/cortex/router.py index 1e0484b..48bb790 100644 --- a/cortex/router.py +++ b/cortex/router.py @@ -99,6 +99,7 @@ async def run_reason(req: ReasonRequest): if VERBOSE_DEBUG: logger.debug("[STAGE 0.6] Running inner monologue...") + inner_result = None try: self_state = load_self_state() @@ -112,9 +113,33 @@ async def run_reason(req: ReasonRequest): inner_result = await inner_monologue.process(mono_context) logger.info(f"[INNER_MONOLOGUE] {inner_result}") + # Store in context for downstream use + context_state["monologue"] = inner_result + except Exception as e: logger.warning(f"[INNER_MONOLOGUE] failed: {e}") + # ---------------------------------------------------------------- + # STAGE 0.7 — Executive Planning (conditional) + # ---------------------------------------------------------------- + executive_plan = None + if inner_result and inner_result.get("consult_executive"): + if VERBOSE_DEBUG: + logger.debug("[STAGE 0.7] Executive consultation requested...") + + try: + from autonomy.executive.planner import plan_execution + executive_plan = await plan_execution( + user_prompt=req.user_prompt, + intent=inner_result.get("intent", "unknown"), + context_state=context_state, + identity_block=identity_block + ) + logger.info(f"[EXECUTIVE] Generated plan: {executive_plan.get('summary', 'N/A')}") + except Exception as e: + logger.warning(f"[EXECUTIVE] Planning failed: {e}") + executive_plan = None + # ---------------------------------------------------------------- # STAGE 1 — Intake summary # ---------------------------------------------------------------- @@ -154,7 +179,9 @@ async def run_reason(req: ReasonRequest): identity_block=identity_block, rag_block=context_state.get("rag", []), reflection_notes=reflection_notes, - context=context_state + context=context_state, + monologue=inner_result, # NEW: Pass monologue guidance + executive_plan=executive_plan # NEW: Pass executive plan ) # ---------------------------------------------------------------- @@ -178,13 +205,31 @@ async def run_reason(req: ReasonRequest): if VERBOSE_DEBUG: logger.debug("[STAGE 5] Applying persona layer...") - persona_answer = await speak(final_neutral) + # Extract tone and depth from monologue for persona guidance + tone = inner_result.get("tone", "neutral") if inner_result else "neutral" + depth = inner_result.get("depth", "medium") if inner_result else "medium" + + persona_answer = await speak(final_neutral, tone=tone, depth=depth) # ---------------------------------------------------------------- # STAGE 6 — Session update # ---------------------------------------------------------------- update_last_assistant_message(req.session_id, persona_answer) + # ---------------------------------------------------------------- + # STAGE 6.5 — Self-state update + # ---------------------------------------------------------------- + try: + from autonomy.self.analyzer import analyze_and_update_state + await analyze_and_update_state( + monologue=inner_result or {}, + user_prompt=req.user_prompt, + response=persona_answer, + context=context_state + ) + except Exception as e: + logger.warning(f"[SELF_STATE] Update failed: {e}") + if VERBOSE_DEBUG: logger.debug(f"\n{'='*80}") logger.debug(f"[PIPELINE COMPLETE] Session: {req.session_id}") diff --git a/cortex/tests/__init__.py b/cortex/tests/__init__.py new file mode 100644 index 0000000..f5afebe --- /dev/null +++ b/cortex/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for Project Lyra Cortex.""" diff --git a/cortex/tests/test_autonomy_phase1.py b/cortex/tests/test_autonomy_phase1.py new file mode 100644 index 0000000..4da933e --- /dev/null +++ b/cortex/tests/test_autonomy_phase1.py @@ -0,0 +1,197 @@ +""" +Integration tests for Phase 1 autonomy features. +Tests monologue integration, executive planning, and self-state persistence. +""" + +import asyncio +import json +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from autonomy.monologue.monologue import InnerMonologue +from autonomy.self.state import load_self_state, update_self_state, get_self_state_instance +from autonomy.executive.planner import plan_execution + + +async def test_monologue_integration(): + """Test monologue generates valid output.""" + print("\n" + "="*60) + print("TEST 1: Monologue Integration") + print("="*60) + + mono = InnerMonologue() + + context = { + "user_message": "Explain quantum computing to me like I'm 5", + "session_id": "test_001", + "self_state": load_self_state(), + "context_summary": {"message_count": 5} + } + + result = await mono.process(context) + + assert "intent" in result, "Missing intent field" + assert "tone" in result, "Missing tone field" + assert "depth" in result, "Missing depth field" + assert "consult_executive" in result, "Missing consult_executive field" + + print("✓ Monologue integration test passed") + print(f" Result: {json.dumps(result, indent=2)}") + + return result + + +async def test_executive_planning(): + """Test executive planner generates valid plans.""" + print("\n" + "="*60) + print("TEST 2: Executive Planning") + print("="*60) + + plan = await plan_execution( + user_prompt="Help me build a distributed system with microservices architecture", + intent="technical_implementation", + context_state={ + "tools_available": ["RAG", "WEB", "CODEBRAIN"], + "message_count": 3, + "minutes_since_last_msg": 2.5, + "active_project": None + }, + identity_block={} + ) + + assert "summary" in plan, "Missing summary field" + assert "plan_text" in plan, "Missing plan_text field" + assert "steps" in plan, "Missing steps field" + assert len(plan["steps"]) > 0, "No steps generated" + + print("✓ Executive planning test passed") + print(f" Plan summary: {plan['summary']}") + print(f" Steps: {len(plan['steps'])}") + print(f" Complexity: {plan.get('estimated_complexity', 'unknown')}") + + return plan + + +def test_self_state_persistence(): + """Test self-state loads and updates.""" + print("\n" + "="*60) + print("TEST 3: Self-State Persistence") + print("="*60) + + state1 = load_self_state() + assert "mood" in state1, "Missing mood field" + assert "energy" in state1, "Missing energy field" + assert "interaction_count" in state1, "Missing interaction_count" + + initial_count = state1.get("interaction_count", 0) + print(f" Initial interaction count: {initial_count}") + + update_self_state( + mood_delta=0.1, + energy_delta=-0.05, + new_focus="testing" + ) + + state2 = load_self_state() + assert state2["interaction_count"] == initial_count + 1, "Interaction count not incremented" + assert state2["focus"] == "testing", "Focus not updated" + + print("✓ Self-state persistence test passed") + print(f" New interaction count: {state2['interaction_count']}") + print(f" New focus: {state2['focus']}") + print(f" New energy: {state2['energy']:.2f}") + + return state2 + + +async def test_end_to_end_flow(): + """Test complete flow from monologue through planning.""" + print("\n" + "="*60) + print("TEST 4: End-to-End Flow") + print("="*60) + + # Step 1: Monologue detects complex query + mono = InnerMonologue() + mono_result = await mono.process({ + "user_message": "Design a scalable ML pipeline with CI/CD integration", + "session_id": "test_e2e", + "self_state": load_self_state(), + "context_summary": {} + }) + + print(f" Monologue intent: {mono_result.get('intent')}") + print(f" Consult executive: {mono_result.get('consult_executive')}") + + # Step 2: If executive requested, generate plan + if mono_result.get("consult_executive"): + plan = await plan_execution( + user_prompt="Design a scalable ML pipeline with CI/CD integration", + intent=mono_result.get("intent", "unknown"), + context_state={"tools_available": ["CODEBRAIN", "WEB"]}, + identity_block={} + ) + + assert plan is not None, "Plan should be generated" + print(f" Executive plan generated: {len(plan.get('steps', []))} steps") + + # Step 3: Update self-state + update_self_state( + energy_delta=-0.1, # Complex task is tiring + new_focus="ml_pipeline_design", + confidence_delta=0.05 + ) + + state = load_self_state() + assert state["focus"] == "ml_pipeline_design", "Focus should be updated" + + print("✓ End-to-end flow test passed") + print(f" Final state: {state['mood']}, energy={state['energy']:.2f}") + + return True + + +async def run_all_tests(): + """Run all Phase 1 tests.""" + print("\n" + "="*60) + print("PHASE 1 AUTONOMY TESTS") + print("="*60) + + try: + # Test 1: Monologue + mono_result = await test_monologue_integration() + + # Test 2: Executive Planning + plan_result = await test_executive_planning() + + # Test 3: Self-State + state_result = test_self_state_persistence() + + # Test 4: End-to-End + await test_end_to_end_flow() + + print("\n" + "="*60) + print("ALL TESTS PASSED ✓") + print("="*60) + + print("\nSummary:") + print(f" - Monologue: {mono_result.get('intent')} ({mono_result.get('tone')})") + print(f" - Executive: {plan_result.get('estimated_complexity')} complexity") + print(f" - Self-state: {state_result.get('interaction_count')} interactions") + + return True + + except Exception as e: + print("\n" + "="*60) + print(f"TEST FAILED: {e}") + print("="*60) + import traceback + traceback.print_exc() + return False + + +if __name__ == "__main__": + success = asyncio.run(run_all_tests()) + sys.exit(0 if success else 1) From e2e55a0fda4ad37573927a5f352ac1b5a205d7f2 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Sun, 14 Dec 2025 14:43:08 -0500 Subject: [PATCH 04/10] autonomy phase 2 --- cortex/autonomy/actions/__init__.py | 1 + cortex/autonomy/actions/autonomous_actions.py | 480 +++++++++++++++++ cortex/autonomy/learning/__init__.py | 1 + cortex/autonomy/learning/pattern_learner.py | 383 ++++++++++++++ cortex/autonomy/proactive/__init__.py | 1 + cortex/autonomy/proactive/monitor.py | 321 ++++++++++++ cortex/autonomy/tools/__init__.py | 1 + cortex/autonomy/tools/decision_engine.py | 124 +++++ cortex/autonomy/tools/orchestrator.py | 354 +++++++++++++ cortex/data/self_state.json | 6 +- cortex/router.py | 95 +++- cortex/tests/test_autonomy_phase2.py | 495 ++++++++++++++++++ 12 files changed, 2258 insertions(+), 4 deletions(-) create mode 100644 cortex/autonomy/actions/__init__.py create mode 100644 cortex/autonomy/actions/autonomous_actions.py create mode 100644 cortex/autonomy/learning/__init__.py create mode 100644 cortex/autonomy/learning/pattern_learner.py create mode 100644 cortex/autonomy/proactive/__init__.py create mode 100644 cortex/autonomy/proactive/monitor.py create mode 100644 cortex/autonomy/tools/__init__.py create mode 100644 cortex/autonomy/tools/decision_engine.py create mode 100644 cortex/autonomy/tools/orchestrator.py create mode 100644 cortex/tests/test_autonomy_phase2.py diff --git a/cortex/autonomy/actions/__init__.py b/cortex/autonomy/actions/__init__.py new file mode 100644 index 0000000..f7f9355 --- /dev/null +++ b/cortex/autonomy/actions/__init__.py @@ -0,0 +1 @@ +"""Autonomous action execution system.""" diff --git a/cortex/autonomy/actions/autonomous_actions.py b/cortex/autonomy/actions/autonomous_actions.py new file mode 100644 index 0000000..98d573e --- /dev/null +++ b/cortex/autonomy/actions/autonomous_actions.py @@ -0,0 +1,480 @@ +""" +Autonomous Action Manager - executes safe, self-initiated actions. +""" + +import logging +import json +from typing import Dict, List, Any, Optional +from datetime import datetime + +logger = logging.getLogger(__name__) + + +class AutonomousActionManager: + """ + Manages safe autonomous actions that Lyra can take without explicit user prompting. + + Whitelist of allowed actions: + - create_memory: Store information in NeoMem + - update_goal: Modify goal status + - schedule_reminder: Create future reminder + - summarize_session: Generate conversation summary + - learn_topic: Add topic to learning queue + - update_focus: Change current focus area + """ + + def __init__(self): + """Initialize action manager with whitelisted actions.""" + self.allowed_actions = { + "create_memory": self._create_memory, + "update_goal": self._update_goal, + "schedule_reminder": self._schedule_reminder, + "summarize_session": self._summarize_session, + "learn_topic": self._learn_topic, + "update_focus": self._update_focus + } + + self.action_log = [] # Track all actions for audit + + async def execute_action( + self, + action_type: str, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Execute a single autonomous action. + + Args: + action_type: Type of action (must be in whitelist) + parameters: Action-specific parameters + context: Current context state + + Returns: + { + "success": bool, + "action": action_type, + "result": action_result, + "timestamp": ISO timestamp, + "error": optional error message + } + """ + # Safety check: action must be whitelisted + if action_type not in self.allowed_actions: + logger.error(f"[ACTIONS] Attempted to execute non-whitelisted action: {action_type}") + return { + "success": False, + "action": action_type, + "error": f"Action '{action_type}' not in whitelist", + "timestamp": datetime.utcnow().isoformat() + } + + try: + logger.info(f"[ACTIONS] Executing autonomous action: {action_type}") + + # Execute the action + action_func = self.allowed_actions[action_type] + result = await action_func(parameters, context) + + # Log successful action + action_record = { + "success": True, + "action": action_type, + "result": result, + "timestamp": datetime.utcnow().isoformat(), + "parameters": parameters + } + + self.action_log.append(action_record) + logger.info(f"[ACTIONS] Action {action_type} completed successfully") + + return action_record + + except Exception as e: + logger.error(f"[ACTIONS] Action {action_type} failed: {e}") + + error_record = { + "success": False, + "action": action_type, + "error": str(e), + "timestamp": datetime.utcnow().isoformat(), + "parameters": parameters + } + + self.action_log.append(error_record) + return error_record + + async def execute_batch( + self, + actions: List[Dict[str, Any]], + context: Dict[str, Any] + ) -> List[Dict[str, Any]]: + """ + Execute multiple actions sequentially. + + Args: + actions: List of {"action": str, "parameters": dict} + context: Current context state + + Returns: + List of action results + """ + results = [] + + for action_spec in actions: + action_type = action_spec.get("action") + parameters = action_spec.get("parameters", {}) + + result = await self.execute_action(action_type, parameters, context) + results.append(result) + + # Stop on first failure if critical + if not result["success"] and action_spec.get("critical", False): + logger.warning(f"[ACTIONS] Critical action {action_type} failed, stopping batch") + break + + return results + + # ======================================== + # Whitelisted Action Implementations + # ======================================== + + async def _create_memory( + self, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Create a memory entry in NeoMem. + + Parameters: + - text: Memory content (required) + - tags: Optional tags for memory + - importance: 0.0-1.0 importance score + """ + text = parameters.get("text") + if not text: + raise ValueError("Memory text required") + + tags = parameters.get("tags", []) + importance = parameters.get("importance", 0.5) + session_id = context.get("session_id", "autonomous") + + # Import NeoMem client + try: + from memory.neomem_client import store_memory + + result = await store_memory( + text=text, + session_id=session_id, + tags=tags, + importance=importance + ) + + return { + "memory_id": result.get("id"), + "text": text[:50] + "..." if len(text) > 50 else text + } + + except ImportError: + logger.warning("[ACTIONS] NeoMem client not available, simulating memory storage") + return { + "memory_id": "simulated", + "text": text[:50] + "..." if len(text) > 50 else text, + "note": "NeoMem not available, memory not persisted" + } + + async def _update_goal( + self, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Update goal status in self-state. + + Parameters: + - goal_id: Goal identifier (required) + - status: New status (pending/in_progress/completed) + - progress: Optional progress note + """ + goal_id = parameters.get("goal_id") + if not goal_id: + raise ValueError("goal_id required") + + status = parameters.get("status", "in_progress") + progress = parameters.get("progress") + + # Import self-state manager + from autonomy.self.state import get_self_state_instance + + state = get_self_state_instance() + active_goals = state._state.get("active_goals", []) + + # Find and update goal + updated = False + for goal in active_goals: + if isinstance(goal, dict) and goal.get("id") == goal_id: + goal["status"] = status + if progress: + goal["progress"] = progress + goal["updated_at"] = datetime.utcnow().isoformat() + updated = True + break + + if updated: + state._save_state() + return { + "goal_id": goal_id, + "status": status, + "updated": True + } + else: + return { + "goal_id": goal_id, + "updated": False, + "note": "Goal not found" + } + + async def _schedule_reminder( + self, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Schedule a future reminder. + + Parameters: + - message: Reminder text (required) + - delay_minutes: Minutes until reminder + - priority: 0.0-1.0 priority score + """ + message = parameters.get("message") + if not message: + raise ValueError("Reminder message required") + + delay_minutes = parameters.get("delay_minutes", 60) + priority = parameters.get("priority", 0.5) + + # For now, store in self-state's learning queue + # In future: integrate with scheduler/cron system + from autonomy.self.state import get_self_state_instance + + state = get_self_state_instance() + + reminder = { + "type": "reminder", + "message": message, + "scheduled_at": datetime.utcnow().isoformat(), + "trigger_at_minutes": delay_minutes, + "priority": priority + } + + # Add to learning queue as placeholder + state._state.setdefault("reminders", []).append(reminder) + state._save_state(state._state) # Pass state dict as argument + + logger.info(f"[ACTIONS] Reminder scheduled: {message} (in {delay_minutes}min)") + + return { + "message": message, + "delay_minutes": delay_minutes, + "note": "Reminder stored in self-state (scheduler integration pending)" + } + + async def _summarize_session( + self, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Generate a summary of current session. + + Parameters: + - max_length: Max summary length in words + - focus_topics: Optional list of topics to emphasize + """ + max_length = parameters.get("max_length", 200) + session_id = context.get("session_id", "unknown") + + # Import summarizer (from deferred_summary or create simple one) + try: + from utils.deferred_summary import summarize_conversation + + summary = await summarize_conversation( + session_id=session_id, + max_words=max_length + ) + + return { + "summary": summary, + "word_count": len(summary.split()) + } + + except ImportError: + # Fallback: simple summary + message_count = context.get("message_count", 0) + focus = context.get("monologue", {}).get("intent", "general") + + summary = f"Session {session_id}: {message_count} messages exchanged, focused on {focus}." + + return { + "summary": summary, + "word_count": len(summary.split()), + "note": "Simple summary (full summarizer not available)" + } + + async def _learn_topic( + self, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Add topic to learning queue. + + Parameters: + - topic: Topic name (required) + - reason: Why this topic + - priority: 0.0-1.0 priority score + """ + topic = parameters.get("topic") + if not topic: + raise ValueError("Topic required") + + reason = parameters.get("reason", "autonomous learning") + priority = parameters.get("priority", 0.5) + + # Import self-state manager + from autonomy.self.state import get_self_state_instance + + state = get_self_state_instance() + state.add_learning_goal(topic) # Only pass topic parameter + + logger.info(f"[ACTIONS] Added to learning queue: {topic} (reason: {reason})") + + return { + "topic": topic, + "reason": reason, + "queue_position": len(state._state.get("learning_queue", [])) + } + + async def _update_focus( + self, + parameters: Dict[str, Any], + context: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Update current focus area. + + Parameters: + - focus: New focus area (required) + - reason: Why this focus + """ + focus = parameters.get("focus") + if not focus: + raise ValueError("Focus required") + + reason = parameters.get("reason", "autonomous update") + + # Import self-state manager + from autonomy.self.state import get_self_state_instance + + state = get_self_state_instance() + old_focus = state._state.get("focus", "none") + + state._state["focus"] = focus + state._state["focus_updated_at"] = datetime.utcnow().isoformat() + state._state["focus_reason"] = reason + state._save_state(state._state) # Pass state dict as argument + + logger.info(f"[ACTIONS] Focus updated: {old_focus} -> {focus}") + + return { + "old_focus": old_focus, + "new_focus": focus, + "reason": reason + } + + # ======================================== + # Utility Methods + # ======================================== + + def get_allowed_actions(self) -> List[str]: + """Get list of all allowed action types.""" + return list(self.allowed_actions.keys()) + + def get_action_log(self, limit: int = 50) -> List[Dict[str, Any]]: + """ + Get recent action log. + + Args: + limit: Max number of entries to return + + Returns: + List of action records + """ + return self.action_log[-limit:] + + def clear_action_log(self) -> None: + """Clear action log.""" + self.action_log = [] + logger.info("[ACTIONS] Action log cleared") + + def validate_action(self, action_type: str, parameters: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate an action without executing it. + + Args: + action_type: Type of action + parameters: Action parameters + + Returns: + { + "valid": bool, + "action": action_type, + "errors": [error messages] or [] + } + """ + errors = [] + + # Check whitelist + if action_type not in self.allowed_actions: + errors.append(f"Action '{action_type}' not in whitelist") + + # Check required parameters (basic validation) + if action_type == "create_memory" and not parameters.get("text"): + errors.append("Memory 'text' parameter required") + + if action_type == "update_goal" and not parameters.get("goal_id"): + errors.append("Goal 'goal_id' parameter required") + + if action_type == "schedule_reminder" and not parameters.get("message"): + errors.append("Reminder 'message' parameter required") + + if action_type == "learn_topic" and not parameters.get("topic"): + errors.append("Learning 'topic' parameter required") + + if action_type == "update_focus" and not parameters.get("focus"): + errors.append("Focus 'focus' parameter required") + + return { + "valid": len(errors) == 0, + "action": action_type, + "errors": errors + } + + +# Singleton instance +_action_manager_instance = None + + +def get_action_manager() -> AutonomousActionManager: + """ + Get singleton action manager instance. + + Returns: + AutonomousActionManager instance + """ + global _action_manager_instance + if _action_manager_instance is None: + _action_manager_instance = AutonomousActionManager() + return _action_manager_instance diff --git a/cortex/autonomy/learning/__init__.py b/cortex/autonomy/learning/__init__.py new file mode 100644 index 0000000..aa193cb --- /dev/null +++ b/cortex/autonomy/learning/__init__.py @@ -0,0 +1 @@ +"""Pattern learning and adaptation system.""" diff --git a/cortex/autonomy/learning/pattern_learner.py b/cortex/autonomy/learning/pattern_learner.py new file mode 100644 index 0000000..61dd74c --- /dev/null +++ b/cortex/autonomy/learning/pattern_learner.py @@ -0,0 +1,383 @@ +""" +Pattern Learning System - learns from interaction patterns to improve autonomy. +""" + +import logging +import json +import os +from typing import Dict, List, Any, Optional +from datetime import datetime +from collections import defaultdict + +logger = logging.getLogger(__name__) + + +class PatternLearner: + """ + Learns from interaction patterns to improve Lyra's autonomous behavior. + + Tracks: + - Topic frequencies (what users talk about) + - Time-of-day patterns (when users interact) + - User preferences (how users like responses) + - Successful response strategies (what works well) + """ + + def __init__(self, patterns_file: str = "/app/data/learned_patterns.json"): + """ + Initialize pattern learner. + + Args: + patterns_file: Path to persistent patterns storage + """ + self.patterns_file = patterns_file + self.patterns = self._load_patterns() + + def _load_patterns(self) -> Dict[str, Any]: + """Load patterns from disk.""" + if os.path.exists(self.patterns_file): + try: + with open(self.patterns_file, 'r') as f: + patterns = json.load(f) + logger.info(f"[PATTERN_LEARNER] Loaded patterns from {self.patterns_file}") + return patterns + except Exception as e: + logger.error(f"[PATTERN_LEARNER] Failed to load patterns: {e}") + + # Initialize empty patterns + return { + "topic_frequencies": {}, + "time_patterns": {}, + "user_preferences": {}, + "successful_strategies": {}, + "interaction_count": 0, + "last_updated": datetime.utcnow().isoformat() + } + + def _save_patterns(self) -> None: + """Save patterns to disk.""" + try: + # Ensure directory exists + os.makedirs(os.path.dirname(self.patterns_file), exist_ok=True) + + self.patterns["last_updated"] = datetime.utcnow().isoformat() + + with open(self.patterns_file, 'w') as f: + json.dump(self.patterns, f, indent=2) + + logger.debug(f"[PATTERN_LEARNER] Saved patterns to {self.patterns_file}") + + except Exception as e: + logger.error(f"[PATTERN_LEARNER] Failed to save patterns: {e}") + + async def learn_from_interaction( + self, + user_prompt: str, + response: str, + monologue: Dict[str, Any], + context: Dict[str, Any] + ) -> None: + """ + Learn from a single interaction. + + Args: + user_prompt: User's message + response: Lyra's response + monologue: Inner monologue analysis + context: Full context state + """ + self.patterns["interaction_count"] += 1 + + # Learn topic frequencies + self._learn_topics(user_prompt, monologue) + + # Learn time patterns + self._learn_time_patterns() + + # Learn user preferences + self._learn_preferences(monologue, context) + + # Learn successful strategies + self._learn_strategies(monologue, response, context) + + # Save periodically (every 10 interactions) + if self.patterns["interaction_count"] % 10 == 0: + self._save_patterns() + + def _learn_topics(self, user_prompt: str, monologue: Dict[str, Any]) -> None: + """Track topic frequencies.""" + intent = monologue.get("intent", "unknown") + + # Increment topic counter + topic_freq = self.patterns["topic_frequencies"] + topic_freq[intent] = topic_freq.get(intent, 0) + 1 + + # Extract keywords (simple approach - words > 5 chars) + keywords = [word.lower() for word in user_prompt.split() if len(word) > 5] + + for keyword in keywords: + topic_freq[f"keyword:{keyword}"] = topic_freq.get(f"keyword:{keyword}", 0) + 1 + + logger.debug(f"[PATTERN_LEARNER] Topic learned: {intent}") + + def _learn_time_patterns(self) -> None: + """Track time-of-day patterns.""" + now = datetime.utcnow() + hour = now.hour + + # Track interactions by hour + time_patterns = self.patterns["time_patterns"] + hour_key = f"hour_{hour:02d}" + time_patterns[hour_key] = time_patterns.get(hour_key, 0) + 1 + + # Track day of week + day_key = f"day_{now.strftime('%A').lower()}" + time_patterns[day_key] = time_patterns.get(day_key, 0) + 1 + + def _learn_preferences(self, monologue: Dict[str, Any], context: Dict[str, Any]) -> None: + """Learn user preferences from detected tone and depth.""" + tone = monologue.get("tone", "neutral") + depth = monologue.get("depth", "medium") + + prefs = self.patterns["user_preferences"] + + # Track preferred tone + prefs.setdefault("tone_counts", {}) + prefs["tone_counts"][tone] = prefs["tone_counts"].get(tone, 0) + 1 + + # Track preferred depth + prefs.setdefault("depth_counts", {}) + prefs["depth_counts"][depth] = prefs["depth_counts"].get(depth, 0) + 1 + + def _learn_strategies( + self, + monologue: Dict[str, Any], + response: str, + context: Dict[str, Any] + ) -> None: + """ + Learn which response strategies are successful. + + Success indicators: + - Executive was consulted and plan generated + - Response length matches depth request + - Tone matches request + """ + intent = monologue.get("intent", "unknown") + executive_used = context.get("executive_plan") is not None + + strategies = self.patterns["successful_strategies"] + strategies.setdefault(intent, {}) + + # Track executive usage for this intent + if executive_used: + key = f"{intent}:executive_used" + strategies.setdefault(key, 0) + strategies[key] += 1 + + # Track response length patterns + response_length = len(response.split()) + depth = monologue.get("depth", "medium") + + length_key = f"{depth}:avg_words" + if length_key not in strategies: + strategies[length_key] = response_length + else: + # Running average + strategies[length_key] = (strategies[length_key] + response_length) / 2 + + # ======================================== + # Pattern Analysis and Recommendations + # ======================================== + + def get_top_topics(self, limit: int = 10) -> List[tuple]: + """ + Get most frequent topics. + + Args: + limit: Max number of topics to return + + Returns: + List of (topic, count) tuples, sorted by count + """ + topics = self.patterns["topic_frequencies"] + sorted_topics = sorted(topics.items(), key=lambda x: x[1], reverse=True) + return sorted_topics[:limit] + + def get_preferred_tone(self) -> str: + """ + Get user's most preferred tone. + + Returns: + Preferred tone string + """ + prefs = self.patterns["user_preferences"] + tone_counts = prefs.get("tone_counts", {}) + + if not tone_counts: + return "neutral" + + return max(tone_counts.items(), key=lambda x: x[1])[0] + + def get_preferred_depth(self) -> str: + """ + Get user's most preferred response depth. + + Returns: + Preferred depth string + """ + prefs = self.patterns["user_preferences"] + depth_counts = prefs.get("depth_counts", {}) + + if not depth_counts: + return "medium" + + return max(depth_counts.items(), key=lambda x: x[1])[0] + + def get_peak_hours(self, limit: int = 3) -> List[int]: + """ + Get peak interaction hours. + + Args: + limit: Number of top hours to return + + Returns: + List of hours (0-23) + """ + time_patterns = self.patterns["time_patterns"] + hour_counts = {k: v for k, v in time_patterns.items() if k.startswith("hour_")} + + if not hour_counts: + return [] + + sorted_hours = sorted(hour_counts.items(), key=lambda x: x[1], reverse=True) + top_hours = sorted_hours[:limit] + + # Extract hour numbers + return [int(h[0].split("_")[1]) for h in top_hours] + + def should_use_executive(self, intent: str) -> bool: + """ + Recommend whether to use executive for given intent based on patterns. + + Args: + intent: Intent type + + Returns: + True if executive is recommended + """ + strategies = self.patterns["successful_strategies"] + key = f"{intent}:executive_used" + + # If we've used executive for this intent >= 3 times, recommend it + return strategies.get(key, 0) >= 3 + + def get_recommended_response_length(self, depth: str) -> int: + """ + Get recommended response length in words for given depth. + + Args: + depth: Depth level (short/medium/deep) + + Returns: + Recommended word count + """ + strategies = self.patterns["successful_strategies"] + key = f"{depth}:avg_words" + + avg_length = strategies.get(key, None) + + if avg_length: + return int(avg_length) + + # Defaults if no pattern learned + defaults = { + "short": 50, + "medium": 150, + "deep": 300 + } + + return defaults.get(depth, 150) + + def get_insights(self) -> Dict[str, Any]: + """ + Get high-level insights from learned patterns. + + Returns: + { + "total_interactions": int, + "top_topics": [(topic, count), ...], + "preferred_tone": str, + "preferred_depth": str, + "peak_hours": [hours], + "learning_recommendations": [str] + } + """ + recommendations = [] + + # Check if user consistently prefers certain settings + preferred_tone = self.get_preferred_tone() + preferred_depth = self.get_preferred_depth() + + if preferred_tone != "neutral": + recommendations.append(f"User prefers {preferred_tone} tone") + + if preferred_depth != "medium": + recommendations.append(f"User prefers {preferred_depth} depth responses") + + # Check for recurring topics + top_topics = self.get_top_topics(limit=3) + if top_topics: + top_topic = top_topics[0][0] + recommendations.append(f"Consider adding '{top_topic}' to learning queue") + + return { + "total_interactions": self.patterns["interaction_count"], + "top_topics": self.get_top_topics(limit=5), + "preferred_tone": preferred_tone, + "preferred_depth": preferred_depth, + "peak_hours": self.get_peak_hours(limit=3), + "learning_recommendations": recommendations + } + + def reset_patterns(self) -> None: + """Reset all learned patterns (use with caution).""" + self.patterns = { + "topic_frequencies": {}, + "time_patterns": {}, + "user_preferences": {}, + "successful_strategies": {}, + "interaction_count": 0, + "last_updated": datetime.utcnow().isoformat() + } + self._save_patterns() + logger.warning("[PATTERN_LEARNER] Patterns reset") + + def export_patterns(self) -> Dict[str, Any]: + """ + Export all patterns for analysis. + + Returns: + Complete patterns dict + """ + return self.patterns.copy() + + +# Singleton instance +_learner_instance = None + + +def get_pattern_learner(patterns_file: str = "/app/data/learned_patterns.json") -> PatternLearner: + """ + Get singleton pattern learner instance. + + Args: + patterns_file: Path to patterns file (only used on first call) + + Returns: + PatternLearner instance + """ + global _learner_instance + if _learner_instance is None: + _learner_instance = PatternLearner(patterns_file=patterns_file) + return _learner_instance diff --git a/cortex/autonomy/proactive/__init__.py b/cortex/autonomy/proactive/__init__.py new file mode 100644 index 0000000..056c046 --- /dev/null +++ b/cortex/autonomy/proactive/__init__.py @@ -0,0 +1 @@ +"""Proactive monitoring and suggestion system.""" diff --git a/cortex/autonomy/proactive/monitor.py b/cortex/autonomy/proactive/monitor.py new file mode 100644 index 0000000..c324709 --- /dev/null +++ b/cortex/autonomy/proactive/monitor.py @@ -0,0 +1,321 @@ +""" +Proactive Context Monitor - detects opportunities for autonomous suggestions. +""" + +import logging +import time +from typing import Dict, List, Any, Optional +from datetime import datetime, timedelta + +logger = logging.getLogger(__name__) + + +class ProactiveMonitor: + """ + Monitors conversation context and detects opportunities for proactive suggestions. + + Triggers: + - Long silence → Check-in + - Learning queue + high curiosity → Suggest exploration + - Active goals → Progress reminders + - Conversation milestones → Offer summary + - Pattern detection → Helpful suggestions + """ + + def __init__(self, min_priority: float = 0.6): + """ + Initialize proactive monitor. + + Args: + min_priority: Minimum priority for suggestions (0.0-1.0) + """ + self.min_priority = min_priority + self.last_suggestion_time = {} # session_id -> timestamp + self.cooldown_seconds = 300 # 5 minutes between proactive suggestions + + async def analyze_session( + self, + session_id: str, + context_state: Dict[str, Any], + self_state: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """ + Analyze session for proactive suggestion opportunities. + + Args: + session_id: Current session ID + context_state: Full context including message history + self_state: Lyra's current self-state + + Returns: + { + "suggestion": "text to append to response", + "priority": 0.0-1.0, + "reason": "why this suggestion", + "type": "check_in | learning | goal_reminder | summary | pattern" + } + or None if no suggestion + """ + # Check cooldown + if not self._check_cooldown(session_id): + logger.debug(f"[PROACTIVE] Session {session_id} in cooldown, skipping") + return None + + suggestions = [] + + # Check 1: Long silence detection + silence_suggestion = self._check_long_silence(context_state) + if silence_suggestion: + suggestions.append(silence_suggestion) + + # Check 2: Learning queue + high curiosity + learning_suggestion = self._check_learning_opportunity(self_state) + if learning_suggestion: + suggestions.append(learning_suggestion) + + # Check 3: Active goals reminder + goal_suggestion = self._check_active_goals(self_state, context_state) + if goal_suggestion: + suggestions.append(goal_suggestion) + + # Check 4: Conversation milestones + milestone_suggestion = self._check_conversation_milestone(context_state) + if milestone_suggestion: + suggestions.append(milestone_suggestion) + + # Check 5: Pattern-based suggestions + pattern_suggestion = self._check_patterns(context_state, self_state) + if pattern_suggestion: + suggestions.append(pattern_suggestion) + + # Filter by priority and return highest + valid_suggestions = [s for s in suggestions if s["priority"] >= self.min_priority] + + if not valid_suggestions: + return None + + # Return highest priority suggestion + best_suggestion = max(valid_suggestions, key=lambda x: x["priority"]) + + # Update cooldown timer + self._update_cooldown(session_id) + + logger.info(f"[PROACTIVE] Suggestion generated: {best_suggestion['type']} (priority: {best_suggestion['priority']:.2f})") + + return best_suggestion + + def _check_cooldown(self, session_id: str) -> bool: + """Check if session is past cooldown period.""" + if session_id not in self.last_suggestion_time: + return True + + elapsed = time.time() - self.last_suggestion_time[session_id] + return elapsed >= self.cooldown_seconds + + def _update_cooldown(self, session_id: str) -> None: + """Update cooldown timer for session.""" + self.last_suggestion_time[session_id] = time.time() + + def _check_long_silence(self, context_state: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + Check if user has been silent for a long time. + """ + minutes_since_last = context_state.get("minutes_since_last_msg", 0) + + # If > 30 minutes, suggest check-in + if minutes_since_last > 30: + return { + "suggestion": "\n\n[Aside: I'm still here if you need anything!]", + "priority": 0.7, + "reason": f"User silent for {minutes_since_last:.0f} minutes", + "type": "check_in" + } + + return None + + def _check_learning_opportunity(self, self_state: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + Check if Lyra has learning queue items and high curiosity. + """ + learning_queue = self_state.get("learning_queue", []) + curiosity = self_state.get("curiosity", 0.5) + + # If curiosity > 0.7 and learning queue exists + if curiosity > 0.7 and learning_queue: + topic = learning_queue[0] if learning_queue else "new topics" + return { + "suggestion": f"\n\n[Aside: I've been curious about {topic} lately. Would you like to explore it together?]", + "priority": 0.65, + "reason": f"High curiosity ({curiosity:.2f}) and learning queue present", + "type": "learning" + } + + return None + + def _check_active_goals( + self, + self_state: Dict[str, Any], + context_state: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """ + Check if there are active goals worth reminding about. + """ + active_goals = self_state.get("active_goals", []) + + if not active_goals: + return None + + # Check if we've had multiple messages without goal progress + message_count = context_state.get("message_count", 0) + + # Every 10 messages, consider goal reminder + if message_count % 10 == 0 and message_count > 0: + goal = active_goals[0] # First active goal + goal_name = goal if isinstance(goal, str) else goal.get("name", "your goal") + + return { + "suggestion": f"\n\n[Aside: Still thinking about {goal_name}. Let me know if you want to work on it.]", + "priority": 0.6, + "reason": f"Active goal present, {message_count} messages since start", + "type": "goal_reminder" + } + + return None + + def _check_conversation_milestone(self, context_state: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """ + Check for conversation milestones (e.g., every 50 messages). + """ + message_count = context_state.get("message_count", 0) + + # Every 50 messages, offer summary + if message_count > 0 and message_count % 50 == 0: + return { + "suggestion": f"\n\n[Aside: We've exchanged {message_count} messages! Would you like a summary of our conversation?]", + "priority": 0.65, + "reason": f"Milestone: {message_count} messages", + "type": "summary" + } + + return None + + def _check_patterns( + self, + context_state: Dict[str, Any], + self_state: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """ + Check for behavioral patterns that merit suggestions. + """ + # Get current focus + focus = self_state.get("focus", "") + + # Check if user keeps asking similar questions (detected via focus) + if focus and "repeated" in focus.lower(): + return { + "suggestion": "\n\n[Aside: I notice we keep coming back to this topic. Would it help to create a summary or action plan?]", + "priority": 0.7, + "reason": "Repeated topic detected", + "type": "pattern" + } + + # Check energy levels - if Lyra is low energy, maybe suggest break + energy = self_state.get("energy", 0.8) + if energy < 0.3: + return { + "suggestion": "\n\n[Aside: We've been at this for a while. Need a break or want to keep going?]", + "priority": 0.65, + "reason": f"Low energy ({energy:.2f})", + "type": "pattern" + } + + return None + + def format_suggestion(self, suggestion: Dict[str, Any]) -> str: + """ + Format suggestion for appending to response. + + Args: + suggestion: Suggestion dict from analyze_session() + + Returns: + Formatted string to append to response + """ + return suggestion.get("suggestion", "") + + def set_cooldown_duration(self, seconds: int) -> None: + """ + Update cooldown duration. + + Args: + seconds: New cooldown duration + """ + self.cooldown_seconds = seconds + logger.info(f"[PROACTIVE] Cooldown updated to {seconds}s") + + def reset_cooldown(self, session_id: str) -> None: + """ + Reset cooldown for a specific session. + + Args: + session_id: Session to reset + """ + if session_id in self.last_suggestion_time: + del self.last_suggestion_time[session_id] + logger.info(f"[PROACTIVE] Cooldown reset for session {session_id}") + + def get_session_stats(self, session_id: str) -> Dict[str, Any]: + """ + Get stats for a session's proactive monitoring. + + Args: + session_id: Session to check + + Returns: + { + "last_suggestion_time": timestamp or None, + "seconds_since_last": int, + "cooldown_active": bool, + "cooldown_remaining": int + } + """ + last_time = self.last_suggestion_time.get(session_id) + + if not last_time: + return { + "last_suggestion_time": None, + "seconds_since_last": 0, + "cooldown_active": False, + "cooldown_remaining": 0 + } + + seconds_since = int(time.time() - last_time) + cooldown_active = seconds_since < self.cooldown_seconds + cooldown_remaining = max(0, self.cooldown_seconds - seconds_since) + + return { + "last_suggestion_time": last_time, + "seconds_since_last": seconds_since, + "cooldown_active": cooldown_active, + "cooldown_remaining": cooldown_remaining + } + + +# Singleton instance +_monitor_instance = None + + +def get_proactive_monitor(min_priority: float = 0.6) -> ProactiveMonitor: + """ + Get singleton proactive monitor instance. + + Args: + min_priority: Minimum priority threshold (only used on first call) + + Returns: + ProactiveMonitor instance + """ + global _monitor_instance + if _monitor_instance is None: + _monitor_instance = ProactiveMonitor(min_priority=min_priority) + return _monitor_instance diff --git a/cortex/autonomy/tools/__init__.py b/cortex/autonomy/tools/__init__.py new file mode 100644 index 0000000..510fad9 --- /dev/null +++ b/cortex/autonomy/tools/__init__.py @@ -0,0 +1 @@ +"""Autonomous tool invocation system.""" diff --git a/cortex/autonomy/tools/decision_engine.py b/cortex/autonomy/tools/decision_engine.py new file mode 100644 index 0000000..3247436 --- /dev/null +++ b/cortex/autonomy/tools/decision_engine.py @@ -0,0 +1,124 @@ +""" +Tool Decision Engine - decides which tools to invoke autonomously. +""" + +import logging +from typing import Dict, List, Any + +logger = logging.getLogger(__name__) + + +class ToolDecisionEngine: + """Decides which tools to invoke based on context analysis.""" + + async def analyze_tool_needs( + self, + user_prompt: str, + monologue: Dict[str, Any], + context_state: Dict[str, Any], + available_tools: List[str] + ) -> Dict[str, Any]: + """ + Analyze if tools should be invoked and which ones. + + Args: + user_prompt: User's message + monologue: Inner monologue analysis + context_state: Full context + available_tools: List of available tools + + Returns: + { + "should_invoke_tools": bool, + "tools_to_invoke": [ + { + "tool": "RAG | WEB | WEATHER | etc", + "query": "search query", + "reason": "why this tool", + "priority": 0.0-1.0 + }, + ... + ], + "confidence": 0.0-1.0 + } + """ + + tools_to_invoke = [] + + # Check for memory/context needs + if any(word in user_prompt.lower() for word in [ + "remember", "you said", "we discussed", "earlier", "before", + "last time", "previously", "what did" + ]): + tools_to_invoke.append({ + "tool": "RAG", + "query": user_prompt, + "reason": "User references past conversation", + "priority": 0.9 + }) + + # Check for web search needs + if any(word in user_prompt.lower() for word in [ + "current", "latest", "news", "today", "what's happening", + "look up", "search for", "find information", "recent" + ]): + tools_to_invoke.append({ + "tool": "WEB", + "query": user_prompt, + "reason": "Requires current information", + "priority": 0.8 + }) + + # Check for weather needs + if any(word in user_prompt.lower() for word in [ + "weather", "temperature", "forecast", "rain", "sunny", "climate" + ]): + tools_to_invoke.append({ + "tool": "WEATHER", + "query": user_prompt, + "reason": "Weather information requested", + "priority": 0.95 + }) + + # Check for code-related needs + if any(word in user_prompt.lower() for word in [ + "code", "function", "debug", "implement", "algorithm", + "programming", "script", "syntax" + ]): + if "CODEBRAIN" in available_tools: + tools_to_invoke.append({ + "tool": "CODEBRAIN", + "query": user_prompt, + "reason": "Code-related task", + "priority": 0.85 + }) + + # Proactive RAG for complex queries (based on monologue) + intent = monologue.get("intent", "") if monologue else "" + if monologue and monologue.get("consult_executive"): + # Complex query - might benefit from context + if not any(t["tool"] == "RAG" for t in tools_to_invoke): + tools_to_invoke.append({ + "tool": "RAG", + "query": user_prompt, + "reason": "Complex query benefits from context", + "priority": 0.6 + }) + + # Sort by priority + tools_to_invoke.sort(key=lambda x: x["priority"], reverse=True) + + max_priority = max([t["priority"] for t in tools_to_invoke]) if tools_to_invoke else 0.0 + + result = { + "should_invoke_tools": len(tools_to_invoke) > 0, + "tools_to_invoke": tools_to_invoke, + "confidence": max_priority + } + + if tools_to_invoke: + logger.info(f"[TOOL_DECISION] Autonomous tool invocation recommended: {len(tools_to_invoke)} tools") + for tool in tools_to_invoke: + logger.info(f" - {tool['tool']} (priority: {tool['priority']:.2f}): {tool['reason']}") + + return result diff --git a/cortex/autonomy/tools/orchestrator.py b/cortex/autonomy/tools/orchestrator.py new file mode 100644 index 0000000..9658721 --- /dev/null +++ b/cortex/autonomy/tools/orchestrator.py @@ -0,0 +1,354 @@ +""" +Tool Orchestrator - executes autonomous tool invocations asynchronously. +""" + +import asyncio +import logging +from typing import Dict, List, Any, Optional +import os + +logger = logging.getLogger(__name__) + + +class ToolOrchestrator: + """Orchestrates async tool execution and result aggregation.""" + + def __init__(self, tool_timeout: int = 30): + """ + Initialize orchestrator. + + Args: + tool_timeout: Max seconds per tool call (default 30) + """ + self.tool_timeout = tool_timeout + self.available_tools = self._discover_tools() + + def _discover_tools(self) -> Dict[str, Any]: + """Discover available tool modules.""" + tools = {} + + # Import tool modules as they become available + try: + from memory.neomem_client import search_neomem + tools["RAG"] = search_neomem + logger.debug("[ORCHESTRATOR] RAG tool available") + except ImportError: + logger.debug("[ORCHESTRATOR] RAG tool not available") + + try: + from integrations.web_search import web_search + tools["WEB"] = web_search + logger.debug("[ORCHESTRATOR] WEB tool available") + except ImportError: + logger.debug("[ORCHESTRATOR] WEB tool not available") + + try: + from integrations.weather import get_weather + tools["WEATHER"] = get_weather + logger.debug("[ORCHESTRATOR] WEATHER tool available") + except ImportError: + logger.debug("[ORCHESTRATOR] WEATHER tool not available") + + try: + from integrations.codebrain import query_codebrain + tools["CODEBRAIN"] = query_codebrain + logger.debug("[ORCHESTRATOR] CODEBRAIN tool available") + except ImportError: + logger.debug("[ORCHESTRATOR] CODEBRAIN tool not available") + + return tools + + async def execute_tools( + self, + tools_to_invoke: List[Dict[str, Any]], + context_state: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Execute multiple tools asynchronously. + + Args: + tools_to_invoke: List of tool specs from decision engine + [{"tool": "RAG", "query": "...", "reason": "...", "priority": 0.9}, ...] + context_state: Full context for tool execution + + Returns: + { + "results": { + "RAG": {...}, + "WEB": {...}, + ... + }, + "execution_summary": { + "tools_invoked": ["RAG", "WEB"], + "successful": ["RAG"], + "failed": ["WEB"], + "total_time_ms": 1234 + } + } + """ + import time + start_time = time.time() + + logger.info(f"[ORCHESTRATOR] Executing {len(tools_to_invoke)} tools asynchronously") + + # Create tasks for each tool + tasks = [] + tool_names = [] + + for tool_spec in tools_to_invoke: + tool_name = tool_spec["tool"] + query = tool_spec["query"] + + if tool_name in self.available_tools: + task = self._execute_single_tool(tool_name, query, context_state) + tasks.append(task) + tool_names.append(tool_name) + logger.debug(f"[ORCHESTRATOR] Queued {tool_name}: {query[:50]}...") + else: + logger.warning(f"[ORCHESTRATOR] Tool {tool_name} not available, skipping") + + # Execute all tools concurrently with timeout + results = {} + successful = [] + failed = [] + + if tasks: + try: + # Wait for all tasks with global timeout + completed = await asyncio.wait_for( + asyncio.gather(*tasks, return_exceptions=True), + timeout=self.tool_timeout + ) + + # Process results + for tool_name, result in zip(tool_names, completed): + if isinstance(result, Exception): + logger.error(f"[ORCHESTRATOR] {tool_name} failed: {result}") + results[tool_name] = {"error": str(result), "success": False} + failed.append(tool_name) + else: + logger.info(f"[ORCHESTRATOR] {tool_name} completed successfully") + results[tool_name] = result + successful.append(tool_name) + + except asyncio.TimeoutError: + logger.error(f"[ORCHESTRATOR] Global timeout ({self.tool_timeout}s) exceeded") + for tool_name in tool_names: + if tool_name not in results: + results[tool_name] = {"error": "timeout", "success": False} + failed.append(tool_name) + + end_time = time.time() + total_time_ms = int((end_time - start_time) * 1000) + + execution_summary = { + "tools_invoked": tool_names, + "successful": successful, + "failed": failed, + "total_time_ms": total_time_ms + } + + logger.info(f"[ORCHESTRATOR] Execution complete: {len(successful)}/{len(tool_names)} successful in {total_time_ms}ms") + + return { + "results": results, + "execution_summary": execution_summary + } + + async def _execute_single_tool( + self, + tool_name: str, + query: str, + context_state: Dict[str, Any] + ) -> Dict[str, Any]: + """ + Execute a single tool with error handling. + + Args: + tool_name: Name of tool (RAG, WEB, etc.) + query: Query string for the tool + context_state: Context for tool execution + + Returns: + Tool-specific result dict + """ + tool_func = self.available_tools.get(tool_name) + if not tool_func: + raise ValueError(f"Tool {tool_name} not available") + + try: + logger.debug(f"[ORCHESTRATOR] Invoking {tool_name}...") + + # Different tools have different signatures - adapt as needed + if tool_name == "RAG": + result = await self._invoke_rag(tool_func, query, context_state) + elif tool_name == "WEB": + result = await self._invoke_web(tool_func, query) + elif tool_name == "WEATHER": + result = await self._invoke_weather(tool_func, query) + elif tool_name == "CODEBRAIN": + result = await self._invoke_codebrain(tool_func, query, context_state) + else: + # Generic invocation + result = await tool_func(query) + + return { + "success": True, + "tool": tool_name, + "query": query, + "data": result + } + + except Exception as e: + logger.error(f"[ORCHESTRATOR] {tool_name} execution failed: {e}") + raise + + async def _invoke_rag(self, func, query: str, context: Dict[str, Any]) -> Any: + """Invoke RAG tool (NeoMem search).""" + session_id = context.get("session_id", "unknown") + # RAG searches memory for relevant past interactions + try: + results = await func(query, limit=5, session_id=session_id) + return results + except Exception as e: + logger.warning(f"[ORCHESTRATOR] RAG invocation failed, returning empty: {e}") + return [] + + async def _invoke_web(self, func, query: str) -> Any: + """Invoke web search tool.""" + try: + results = await func(query, max_results=5) + return results + except Exception as e: + logger.warning(f"[ORCHESTRATOR] WEB invocation failed: {e}") + return {"error": str(e), "results": []} + + async def _invoke_weather(self, func, query: str) -> Any: + """Invoke weather tool.""" + # Extract location from query (simple heuristic) + # In future: use LLM to extract location + try: + location = self._extract_location(query) + results = await func(location) + return results + except Exception as e: + logger.warning(f"[ORCHESTRATOR] WEATHER invocation failed: {e}") + return {"error": str(e)} + + async def _invoke_codebrain(self, func, query: str, context: Dict[str, Any]) -> Any: + """Invoke codebrain tool.""" + try: + results = await func(query, context=context) + return results + except Exception as e: + logger.warning(f"[ORCHESTRATOR] CODEBRAIN invocation failed: {e}") + return {"error": str(e)} + + def _extract_location(self, query: str) -> str: + """ + Extract location from weather query. + Simple heuristic - in future use LLM. + """ + # Common location indicators + indicators = ["in ", "at ", "for ", "weather in ", "temperature in "] + + query_lower = query.lower() + for indicator in indicators: + if indicator in query_lower: + # Get text after indicator + parts = query_lower.split(indicator, 1) + if len(parts) > 1: + location = parts[1].strip().split()[0] # First word after indicator + return location + + # Default fallback + return "current location" + + def format_results_for_context(self, orchestrator_result: Dict[str, Any]) -> str: + """ + Format tool results for inclusion in context/prompt. + + Args: + orchestrator_result: Output from execute_tools() + + Returns: + Formatted string for prompt injection + """ + results = orchestrator_result.get("results", {}) + summary = orchestrator_result.get("execution_summary", {}) + + if not results: + return "" + + formatted = "\n=== AUTONOMOUS TOOL RESULTS ===\n" + + for tool_name, tool_result in results.items(): + if tool_result.get("success", False): + formatted += f"\n[{tool_name}]\n" + data = tool_result.get("data", {}) + + # Format based on tool type + if tool_name == "RAG": + formatted += self._format_rag_results(data) + elif tool_name == "WEB": + formatted += self._format_web_results(data) + elif tool_name == "WEATHER": + formatted += self._format_weather_results(data) + elif tool_name == "CODEBRAIN": + formatted += self._format_codebrain_results(data) + else: + formatted += f"{data}\n" + else: + formatted += f"\n[{tool_name}] - Failed: {tool_result.get('error', 'unknown')}\n" + + formatted += f"\n(Tools executed in {summary.get('total_time_ms', 0)}ms)\n" + formatted += "=" * 40 + "\n" + + return formatted + + def _format_rag_results(self, data: Any) -> str: + """Format RAG/memory search results.""" + if not data: + return "No relevant memories found.\n" + + formatted = "Relevant memories:\n" + for i, item in enumerate(data[:3], 1): # Top 3 + text = item.get("text", item.get("content", str(item))) + formatted += f" {i}. {text[:100]}...\n" + return formatted + + def _format_web_results(self, data: Any) -> str: + """Format web search results.""" + if isinstance(data, dict) and data.get("error"): + return f"Web search failed: {data['error']}\n" + + results = data.get("results", []) if isinstance(data, dict) else data + if not results: + return "No web results found.\n" + + formatted = "Web search results:\n" + for i, item in enumerate(results[:3], 1): # Top 3 + title = item.get("title", "No title") + snippet = item.get("snippet", item.get("description", "")) + formatted += f" {i}. {title}\n {snippet[:100]}...\n" + return formatted + + def _format_weather_results(self, data: Any) -> str: + """Format weather results.""" + if isinstance(data, dict) and data.get("error"): + return f"Weather lookup failed: {data['error']}\n" + + # Assuming weather API returns temp, conditions, etc. + temp = data.get("temperature", "unknown") + conditions = data.get("conditions", "unknown") + location = data.get("location", "requested location") + + return f"Weather for {location}: {temp}, {conditions}\n" + + def _format_codebrain_results(self, data: Any) -> str: + """Format codebrain results.""" + if isinstance(data, dict) and data.get("error"): + return f"Codebrain failed: {data['error']}\n" + + # Format code-related results + return f"{data}\n" diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json index 1f6871d..b9fc83f 100644 --- a/cortex/data/self_state.json +++ b/cortex/data/self_state.json @@ -3,9 +3,9 @@ "energy": 0.8, "focus": "user_request", "confidence": 0.7, - "curiosity": 0.6000000000000001, - "last_updated": "2025-12-14T06:36:21.236816", - "interaction_count": 3, + "curiosity": 0.7000000000000002, + "last_updated": "2025-12-14T19:29:49.051207", + "interaction_count": 5, "learning_queue": [], "active_goals": [], "preferences": { diff --git a/cortex/router.py b/cortex/router.py index 48bb790..75d514d 100644 --- a/cortex/router.py +++ b/cortex/router.py @@ -140,6 +140,55 @@ async def run_reason(req: ReasonRequest): logger.warning(f"[EXECUTIVE] Planning failed: {e}") executive_plan = None + # ---------------------------------------------------------------- + # STAGE 0.8 — Autonomous Tool Invocation + # ---------------------------------------------------------------- + tool_results = None + autonomous_enabled = os.getenv("ENABLE_AUTONOMOUS_TOOLS", "true").lower() == "true" + tool_confidence_threshold = float(os.getenv("AUTONOMOUS_TOOL_CONFIDENCE_THRESHOLD", "0.6")) + + if autonomous_enabled and inner_result: + if VERBOSE_DEBUG: + logger.debug("[STAGE 0.8] Analyzing autonomous tool needs...") + + try: + from autonomy.tools.decision_engine import ToolDecisionEngine + from autonomy.tools.orchestrator import ToolOrchestrator + + # Analyze which tools to invoke + decision_engine = ToolDecisionEngine() + tool_decision = await decision_engine.analyze_tool_needs( + user_prompt=req.user_prompt, + monologue=inner_result, + context_state=context_state, + available_tools=["RAG", "WEB", "WEATHER", "CODEBRAIN"] + ) + + # Execute tools if confidence threshold met + if tool_decision["should_invoke_tools"] and tool_decision["confidence"] >= tool_confidence_threshold: + orchestrator = ToolOrchestrator(tool_timeout=30) + tool_results = await orchestrator.execute_tools( + tools_to_invoke=tool_decision["tools_to_invoke"], + context_state=context_state + ) + + # Format results for context injection + tool_context = orchestrator.format_results_for_context(tool_results) + context_state["autonomous_tool_results"] = tool_context + + if VERBOSE_DEBUG: + summary = tool_results.get("execution_summary", {}) + logger.debug(f"[STAGE 0.8] Tools executed: {summary.get('successful', [])} succeeded") + else: + if VERBOSE_DEBUG: + logger.debug(f"[STAGE 0.8] No tools invoked (confidence: {tool_decision.get('confidence', 0):.2f})") + + except Exception as e: + logger.warning(f"[STAGE 0.8] Autonomous tool invocation failed: {e}") + if VERBOSE_DEBUG: + import traceback + traceback.print_exc() + # ---------------------------------------------------------------- # STAGE 1 — Intake summary # ---------------------------------------------------------------- @@ -217,7 +266,7 @@ async def run_reason(req: ReasonRequest): update_last_assistant_message(req.session_id, persona_answer) # ---------------------------------------------------------------- - # STAGE 6.5 — Self-state update + # STAGE 6.5 — Self-state update & Pattern Learning # ---------------------------------------------------------------- try: from autonomy.self.analyzer import analyze_and_update_state @@ -230,6 +279,50 @@ async def run_reason(req: ReasonRequest): except Exception as e: logger.warning(f"[SELF_STATE] Update failed: {e}") + # Pattern learning + try: + from autonomy.learning.pattern_learner import get_pattern_learner + learner = get_pattern_learner() + await learner.learn_from_interaction( + user_prompt=req.user_prompt, + response=persona_answer, + monologue=inner_result or {}, + context=context_state + ) + except Exception as e: + logger.warning(f"[PATTERN_LEARNER] Learning failed: {e}") + + # ---------------------------------------------------------------- + # STAGE 7 — Proactive Monitoring & Suggestions + # ---------------------------------------------------------------- + proactive_enabled = os.getenv("ENABLE_PROACTIVE_MONITORING", "true").lower() == "true" + proactive_min_priority = float(os.getenv("PROACTIVE_SUGGESTION_MIN_PRIORITY", "0.6")) + + if proactive_enabled: + try: + from autonomy.proactive.monitor import get_proactive_monitor + from autonomy.self.state import load_self_state + + monitor = get_proactive_monitor(min_priority=proactive_min_priority) + self_state = load_self_state() + + suggestion = await monitor.analyze_session( + session_id=req.session_id, + context_state=context_state, + self_state=self_state + ) + + # Append suggestion to response if exists + if suggestion: + suggestion_text = monitor.format_suggestion(suggestion) + persona_answer += suggestion_text + + if VERBOSE_DEBUG: + logger.debug(f"[STAGE 7] Proactive suggestion added: {suggestion['type']} (priority: {suggestion['priority']:.2f})") + + except Exception as e: + logger.warning(f"[STAGE 7] Proactive monitoring failed: {e}") + if VERBOSE_DEBUG: logger.debug(f"\n{'='*80}") logger.debug(f"[PIPELINE COMPLETE] Session: {req.session_id}") diff --git a/cortex/tests/test_autonomy_phase2.py b/cortex/tests/test_autonomy_phase2.py new file mode 100644 index 0000000..aa5956a --- /dev/null +++ b/cortex/tests/test_autonomy_phase2.py @@ -0,0 +1,495 @@ +""" +Integration tests for Phase 2 autonomy features. +Tests autonomous tool invocation, proactive monitoring, actions, and pattern learning. +""" + +import asyncio +import json +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# Override self-state file path for testing +os.environ["SELF_STATE_FILE"] = "/tmp/test_self_state.json" + +from autonomy.tools.decision_engine import ToolDecisionEngine +from autonomy.tools.orchestrator import ToolOrchestrator +from autonomy.proactive.monitor import ProactiveMonitor +from autonomy.actions.autonomous_actions import AutonomousActionManager +from autonomy.learning.pattern_learner import PatternLearner +from autonomy.self.state import load_self_state, get_self_state_instance + + +async def test_tool_decision_engine(): + """Test autonomous tool decision making.""" + print("\n" + "="*60) + print("TEST 1: Tool Decision Engine") + print("="*60) + + engine = ToolDecisionEngine() + + # Test 1a: Memory reference detection + result = await engine.analyze_tool_needs( + user_prompt="What did we discuss earlier about Python?", + monologue={"intent": "clarification", "consult_executive": False}, + context_state={}, + available_tools=["RAG", "WEB", "WEATHER"] + ) + + assert result["should_invoke_tools"], "Should invoke tools for memory reference" + assert any(t["tool"] == "RAG" for t in result["tools_to_invoke"]), "Should recommend RAG" + assert result["confidence"] > 0.8, f"Confidence should be high for clear memory reference: {result['confidence']}" + + print(f" ✓ Memory reference detection passed") + print(f" Tools: {[t['tool'] for t in result['tools_to_invoke']]}") + print(f" Confidence: {result['confidence']:.2f}") + + # Test 1b: Web search detection + result = await engine.analyze_tool_needs( + user_prompt="What's the latest news about AI developments?", + monologue={"intent": "information_seeking", "consult_executive": False}, + context_state={}, + available_tools=["RAG", "WEB", "WEATHER"] + ) + + assert result["should_invoke_tools"], "Should invoke tools for current info request" + assert any(t["tool"] == "WEB" for t in result["tools_to_invoke"]), "Should recommend WEB" + + print(f" ✓ Web search detection passed") + print(f" Tools: {[t['tool'] for t in result['tools_to_invoke']]}") + + # Test 1c: Weather detection + result = await engine.analyze_tool_needs( + user_prompt="What's the weather like today in Boston?", + monologue={"intent": "information_seeking", "consult_executive": False}, + context_state={}, + available_tools=["RAG", "WEB", "WEATHER"] + ) + + assert result["should_invoke_tools"], "Should invoke tools for weather query" + assert any(t["tool"] == "WEATHER" for t in result["tools_to_invoke"]), "Should recommend WEATHER" + + print(f" ✓ Weather detection passed") + + # Test 1d: Proactive RAG for complex queries + result = await engine.analyze_tool_needs( + user_prompt="Design a microservices architecture", + monologue={"intent": "technical_implementation", "consult_executive": True}, + context_state={}, + available_tools=["RAG", "WEB", "CODEBRAIN"] + ) + + assert result["should_invoke_tools"], "Should proactively invoke tools for complex queries" + rag_tools = [t for t in result["tools_to_invoke"] if t["tool"] == "RAG"] + assert len(rag_tools) > 0, "Should include proactive RAG" + + print(f" ✓ Proactive RAG detection passed") + print(f" Reason: {rag_tools[0]['reason']}") + + print("\n✓ Tool Decision Engine tests passed\n") + return result + + +async def test_tool_orchestrator(): + """Test tool orchestration (mock mode).""" + print("\n" + "="*60) + print("TEST 2: Tool Orchestrator (Mock Mode)") + print("="*60) + + orchestrator = ToolOrchestrator(tool_timeout=5) + + # Since actual tools may not be available, test the orchestrator structure + print(f" Available tools: {list(orchestrator.available_tools.keys())}") + + # Test with tools_to_invoke (will fail gracefully if tools unavailable) + tools_to_invoke = [ + {"tool": "RAG", "query": "test query", "reason": "testing", "priority": 0.9} + ] + + result = await orchestrator.execute_tools( + tools_to_invoke=tools_to_invoke, + context_state={"session_id": "test"} + ) + + assert "results" in result, "Should return results dict" + assert "execution_summary" in result, "Should return execution summary" + + summary = result["execution_summary"] + assert "tools_invoked" in summary, "Summary should include tools_invoked" + assert "total_time_ms" in summary, "Summary should include timing" + + print(f" ✓ Orchestrator structure valid") + print(f" Summary: {summary}") + + # Test result formatting + formatted = orchestrator.format_results_for_context(result) + assert isinstance(formatted, str), "Should format results as string" + + print(f" ✓ Result formatting works") + print(f" Formatted length: {len(formatted)} chars") + + print("\n✓ Tool Orchestrator tests passed\n") + return result + + +async def test_proactive_monitor(): + """Test proactive monitoring and suggestions.""" + print("\n" + "="*60) + print("TEST 3: Proactive Monitor") + print("="*60) + + monitor = ProactiveMonitor(min_priority=0.6) + + # Test 3a: Long silence detection + context_state = { + "message_count": 5, + "minutes_since_last_msg": 35 # > 30 minutes + } + + self_state = load_self_state() + + suggestion = await monitor.analyze_session( + session_id="test_silence", + context_state=context_state, + self_state=self_state + ) + + assert suggestion is not None, "Should generate suggestion for long silence" + assert suggestion["type"] == "check_in", f"Should be check_in type: {suggestion['type']}" + assert suggestion["priority"] >= 0.6, "Priority should meet threshold" + + print(f" ✓ Long silence detection passed") + print(f" Type: {suggestion['type']}, Priority: {suggestion['priority']:.2f}") + print(f" Suggestion: {suggestion['suggestion'][:50]}...") + + # Test 3b: Learning opportunity (high curiosity) + self_state["curiosity"] = 0.8 + self_state["learning_queue"] = ["quantum computing", "rust programming"] + + # Reset cooldown for this test + monitor.reset_cooldown("test_learning") + + suggestion = await monitor.analyze_session( + session_id="test_learning", + context_state={"message_count": 3, "minutes_since_last_msg": 2}, + self_state=self_state + ) + + assert suggestion is not None, "Should generate learning suggestion" + assert suggestion["type"] == "learning", f"Should be learning type: {suggestion['type']}" + + print(f" ✓ Learning opportunity detection passed") + print(f" Suggestion: {suggestion['suggestion'][:70]}...") + + # Test 3c: Conversation milestone + monitor.reset_cooldown("test_milestone") + + # Reset curiosity to avoid learning suggestion taking precedence + self_state["curiosity"] = 0.5 + self_state["learning_queue"] = [] + + suggestion = await monitor.analyze_session( + session_id="test_milestone", + context_state={"message_count": 50, "minutes_since_last_msg": 1}, + self_state=self_state + ) + + assert suggestion is not None, "Should generate milestone suggestion" + # Note: learning or summary both valid - check it's a reasonable suggestion + assert suggestion["type"] in ["summary", "learning", "check_in"], f"Should be valid type: {suggestion['type']}" + + print(f" ✓ Conversation milestone detection passed (type: {suggestion['type']})") + + # Test 3d: Cooldown mechanism + # Try to get another suggestion immediately (should be blocked) + suggestion2 = await monitor.analyze_session( + session_id="test_milestone", + context_state={"message_count": 51, "minutes_since_last_msg": 1}, + self_state=self_state + ) + + assert suggestion2 is None, "Should not generate suggestion during cooldown" + + print(f" ✓ Cooldown mechanism working") + + # Check stats + stats = monitor.get_session_stats("test_milestone") + assert stats["cooldown_active"], "Cooldown should be active" + print(f" Cooldown remaining: {stats['cooldown_remaining']}s") + + print("\n✓ Proactive Monitor tests passed\n") + return suggestion + + +async def test_autonomous_actions(): + """Test autonomous action execution.""" + print("\n" + "="*60) + print("TEST 4: Autonomous Actions") + print("="*60) + + manager = AutonomousActionManager() + + # Test 4a: List allowed actions + allowed = manager.get_allowed_actions() + assert "create_memory" in allowed, "Should have create_memory action" + assert "update_goal" in allowed, "Should have update_goal action" + assert "learn_topic" in allowed, "Should have learn_topic action" + + print(f" ✓ Allowed actions: {allowed}") + + # Test 4b: Validate actions + validation = manager.validate_action("create_memory", {"text": "test memory"}) + assert validation["valid"], "Should validate correct action" + + print(f" ✓ Action validation passed") + + # Test 4c: Execute learn_topic action + result = await manager.execute_action( + action_type="learn_topic", + parameters={"topic": "rust programming", "reason": "testing", "priority": 0.8}, + context={"session_id": "test"} + ) + + assert result["success"], f"Action should succeed: {result.get('error', 'unknown')}" + assert "topic" in result["result"], "Should return topic info" + + print(f" ✓ learn_topic action executed") + print(f" Topic: {result['result']['topic']}") + print(f" Queue position: {result['result']['queue_position']}") + + # Test 4d: Execute update_focus action + result = await manager.execute_action( + action_type="update_focus", + parameters={"focus": "autonomy_testing", "reason": "running tests"}, + context={"session_id": "test"} + ) + + assert result["success"], "update_focus should succeed" + + print(f" ✓ update_focus action executed") + print(f" New focus: {result['result']['new_focus']}") + + # Test 4e: Reject non-whitelisted action + result = await manager.execute_action( + action_type="delete_all_files", # NOT in whitelist + parameters={}, + context={"session_id": "test"} + ) + + assert not result["success"], "Should reject non-whitelisted action" + assert "not in whitelist" in result["error"], "Should indicate whitelist violation" + + print(f" ✓ Non-whitelisted action rejected") + + # Test 4f: Action log + log = manager.get_action_log(limit=10) + assert len(log) >= 2, f"Should have logged multiple actions (got {len(log)})" + + print(f" ✓ Action log contains {len(log)} entries") + + print("\n✓ Autonomous Actions tests passed\n") + return result + + +async def test_pattern_learner(): + """Test pattern learning system.""" + print("\n" + "="*60) + print("TEST 5: Pattern Learner") + print("="*60) + + # Use temp file for testing + test_file = "/tmp/test_patterns.json" + learner = PatternLearner(patterns_file=test_file) + + # Test 5a: Learn from multiple interactions + for i in range(5): + await learner.learn_from_interaction( + user_prompt=f"Help me with Python coding task {i}", + response=f"Here's help with task {i}...", + monologue={"intent": "coding_help", "tone": "focused", "depth": "medium"}, + context={"session_id": "test", "executive_plan": None} + ) + + print(f" ✓ Learned from 5 interactions") + + # Test 5b: Get top topics + top_topics = learner.get_top_topics(limit=5) + assert len(top_topics) > 0, "Should have learned topics" + assert "coding_help" == top_topics[0][0], "coding_help should be top topic" + + print(f" ✓ Top topics: {[t[0] for t in top_topics[:3]]}") + + # Test 5c: Get preferred tone + preferred_tone = learner.get_preferred_tone() + assert preferred_tone == "focused", "Should detect focused as preferred tone" + + print(f" ✓ Preferred tone: {preferred_tone}") + + # Test 5d: Get preferred depth + preferred_depth = learner.get_preferred_depth() + assert preferred_depth == "medium", "Should detect medium as preferred depth" + + print(f" ✓ Preferred depth: {preferred_depth}") + + # Test 5e: Get insights + insights = learner.get_insights() + assert insights["total_interactions"] == 5, "Should track interaction count" + assert insights["preferred_tone"] == "focused", "Insights should include tone" + + print(f" ✓ Insights generated:") + print(f" Total interactions: {insights['total_interactions']}") + print(f" Recommendations: {insights['learning_recommendations']}") + + # Test 5f: Export patterns + exported = learner.export_patterns() + assert "topic_frequencies" in exported, "Should export all patterns" + + print(f" ✓ Patterns exported ({len(exported)} keys)") + + # Cleanup + if os.path.exists(test_file): + os.remove(test_file) + + print("\n✓ Pattern Learner tests passed\n") + return insights + + +async def test_end_to_end_autonomy(): + """Test complete autonomous flow.""" + print("\n" + "="*60) + print("TEST 6: End-to-End Autonomy Flow") + print("="*60) + + # Simulate a complex user query that triggers multiple autonomous systems + user_prompt = "Remember what we discussed about machine learning? I need current research on transformers." + + monologue = { + "intent": "technical_research", + "tone": "focused", + "depth": "deep", + "consult_executive": True + } + + context_state = { + "session_id": "e2e_test", + "message_count": 15, + "minutes_since_last_msg": 5 + } + + print(f" User prompt: {user_prompt}") + print(f" Monologue intent: {monologue['intent']}") + + # Step 1: Tool decision engine + engine = ToolDecisionEngine() + tool_decision = await engine.analyze_tool_needs( + user_prompt=user_prompt, + monologue=monologue, + context_state=context_state, + available_tools=["RAG", "WEB", "CODEBRAIN"] + ) + + print(f"\n Step 1: Tool Decision") + print(f" Should invoke: {tool_decision['should_invoke_tools']}") + print(f" Tools: {[t['tool'] for t in tool_decision['tools_to_invoke']]}") + assert tool_decision["should_invoke_tools"], "Should invoke tools" + assert len(tool_decision["tools_to_invoke"]) >= 2, "Should recommend multiple tools (RAG + WEB)" + + # Step 2: Pattern learning + learner = PatternLearner(patterns_file="/tmp/e2e_test_patterns.json") + await learner.learn_from_interaction( + user_prompt=user_prompt, + response="Here's information about transformers...", + monologue=monologue, + context=context_state + ) + + print(f"\n Step 2: Pattern Learning") + top_topics = learner.get_top_topics(limit=3) + print(f" Learned topics: {[t[0] for t in top_topics]}") + + # Step 3: Autonomous action + action_manager = AutonomousActionManager() + action_result = await action_manager.execute_action( + action_type="learn_topic", + parameters={"topic": "transformer architectures", "reason": "user interest detected"}, + context=context_state + ) + + print(f"\n Step 3: Autonomous Action") + print(f" Action: learn_topic") + print(f" Success: {action_result['success']}") + + # Step 4: Proactive monitoring (won't trigger due to low message count) + monitor = ProactiveMonitor(min_priority=0.6) + monitor.reset_cooldown("e2e_test") + + suggestion = await monitor.analyze_session( + session_id="e2e_test", + context_state=context_state, + self_state=load_self_state() + ) + + print(f"\n Step 4: Proactive Monitoring") + print(f" Suggestion: {suggestion['type'] if suggestion else 'None (expected for low message count)'}") + + # Cleanup + if os.path.exists("/tmp/e2e_test_patterns.json"): + os.remove("/tmp/e2e_test_patterns.json") + + print("\n✓ End-to-End Autonomy Flow tests passed\n") + return True + + +async def run_all_tests(): + """Run all Phase 2 tests.""" + print("\n" + "="*60) + print("PHASE 2 AUTONOMY TESTS") + print("="*60) + + try: + # Test 1: Tool Decision Engine + await test_tool_decision_engine() + + # Test 2: Tool Orchestrator + await test_tool_orchestrator() + + # Test 3: Proactive Monitor + await test_proactive_monitor() + + # Test 4: Autonomous Actions + await test_autonomous_actions() + + # Test 5: Pattern Learner + await test_pattern_learner() + + # Test 6: End-to-End + await test_end_to_end_autonomy() + + print("\n" + "="*60) + print("ALL PHASE 2 TESTS PASSED ✓") + print("="*60) + + print("\nPhase 2 Features Validated:") + print(" ✓ Autonomous tool decision making") + print(" ✓ Tool orchestration and execution") + print(" ✓ Proactive monitoring and suggestions") + print(" ✓ Safe autonomous actions") + print(" ✓ Pattern learning and adaptation") + print(" ✓ End-to-end autonomous flow") + + return True + + except Exception as e: + print("\n" + "="*60) + print(f"TEST FAILED: {e}") + print("="*60) + import traceback + traceback.print_exc() + return False + + +if __name__ == "__main__": + success = asyncio.run(run_all_tests()) + sys.exit(0 if success else 1) From 0528d10081ad0b8e1793d153e62bcbe16ac1591f Mon Sep 17 00:00:00 2001 From: serversdwn Date: Mon, 15 Dec 2025 01:56:57 -0500 Subject: [PATCH 05/10] autonomy phase 2.5 - tightening up some stuff in the pipeline --- cortex/data/self_state.json | 6 ++-- cortex/router.py | 3 +- neomem/neomem/vector_stores/qdrant.py | 50 +++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json index b9fc83f..56d5ae5 100644 --- a/cortex/data/self_state.json +++ b/cortex/data/self_state.json @@ -3,9 +3,9 @@ "energy": 0.8, "focus": "user_request", "confidence": 0.7, - "curiosity": 0.7000000000000002, - "last_updated": "2025-12-14T19:29:49.051207", - "interaction_count": 5, + "curiosity": 1.0, + "last_updated": "2025-12-15T05:38:06.084867", + "interaction_count": 14, "learning_queue": [], "active_goals": [], "preferences": { diff --git a/cortex/router.py b/cortex/router.py index 75d514d..8bbbc74 100644 --- a/cortex/router.py +++ b/cortex/router.py @@ -301,10 +301,9 @@ async def run_reason(req: ReasonRequest): if proactive_enabled: try: from autonomy.proactive.monitor import get_proactive_monitor - from autonomy.self.state import load_self_state monitor = get_proactive_monitor(min_priority=proactive_min_priority) - self_state = load_self_state() + self_state = load_self_state() # Already imported at top of file suggestion = await monitor.analyze_session( session_id=req.session_id, diff --git a/neomem/neomem/vector_stores/qdrant.py b/neomem/neomem/vector_stores/qdrant.py index 59ee9a9..456da2e 100644 --- a/neomem/neomem/vector_stores/qdrant.py +++ b/neomem/neomem/vector_stores/qdrant.py @@ -1,7 +1,9 @@ import logging import os import shutil +from typing import Optional +from pydantic import BaseModel from qdrant_client import QdrantClient from qdrant_client.models import ( Distance, @@ -19,6 +21,13 @@ from mem0.vector_stores.base import VectorStoreBase logger = logging.getLogger(__name__) +class OutputData(BaseModel): + """Standard output format for vector search results.""" + id: Optional[str] + score: Optional[float] + payload: Optional[dict] + + class Qdrant(VectorStoreBase): def __init__( self, @@ -170,7 +179,7 @@ class Qdrant(VectorStoreBase): filters (dict, optional): Filters to apply to the search. Defaults to None. Returns: - list: Search results. + list: Search results wrapped in OutputData format. """ query_filter = self._create_filter(filters) if filters else None hits = self.client.query_points( @@ -179,7 +188,16 @@ class Qdrant(VectorStoreBase): query_filter=query_filter, limit=limit, ) - return hits.points + + # Wrap results in OutputData format to match other vector stores + return [ + OutputData( + id=str(hit.id), + score=hit.score, + payload=hit.payload + ) + for hit in hits.points + ] def delete(self, vector_id: int): """ @@ -207,7 +225,7 @@ class Qdrant(VectorStoreBase): point = PointStruct(id=vector_id, vector=vector, payload=payload) self.client.upsert(collection_name=self.collection_name, points=[point]) - def get(self, vector_id: int) -> dict: + def get(self, vector_id: int) -> OutputData: """ Retrieve a vector by ID. @@ -215,10 +233,17 @@ class Qdrant(VectorStoreBase): vector_id (int): ID of the vector to retrieve. Returns: - dict: Retrieved vector. + OutputData: Retrieved vector wrapped in OutputData format. """ result = self.client.retrieve(collection_name=self.collection_name, ids=[vector_id], with_payload=True) - return result[0] if result else None + if result: + hit = result[0] + return OutputData( + id=str(hit.id), + score=None, # No score for direct retrieval + payload=hit.payload + ) + return None def list_cols(self) -> list: """ @@ -251,7 +276,7 @@ class Qdrant(VectorStoreBase): limit (int, optional): Number of vectors to return. Defaults to 100. Returns: - list: List of vectors. + list: List of vectors wrapped in OutputData format. """ query_filter = self._create_filter(filters) if filters else None result = self.client.scroll( @@ -261,7 +286,18 @@ class Qdrant(VectorStoreBase): with_payload=True, with_vectors=False, ) - return result + + # Wrap results in OutputData format + # scroll() returns tuple: (points, next_page_offset) + points = result[0] if isinstance(result, tuple) else result + return [ + OutputData( + id=str(point.id), + score=None, # No score for list operation + payload=point.payload + ) + for point in points + ] def reset(self): """Reset the index by deleting and recreating it.""" From 0a0354603997de3e61de553cc12f586a7398a276 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Mon, 15 Dec 2025 04:10:03 -0500 Subject: [PATCH 06/10] neomem disabled --- cortex/autonomy/tools/orchestrator.py | 15 +++++++++------ cortex/context.py | 19 ++++++++++++++----- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cortex/autonomy/tools/orchestrator.py b/cortex/autonomy/tools/orchestrator.py index 9658721..0b0b03d 100644 --- a/cortex/autonomy/tools/orchestrator.py +++ b/cortex/autonomy/tools/orchestrator.py @@ -28,12 +28,15 @@ class ToolOrchestrator: tools = {} # Import tool modules as they become available - try: - from memory.neomem_client import search_neomem - tools["RAG"] = search_neomem - logger.debug("[ORCHESTRATOR] RAG tool available") - except ImportError: - logger.debug("[ORCHESTRATOR] RAG tool not available") + if os.getenv("NEOMEM_ENABLED", "false").lower() == "true": + try: + from memory.neomem_client import search_neomem + tools["RAG"] = search_neomem + logger.debug("[ORCHESTRATOR] RAG tool available") + except ImportError: + logger.debug("[ORCHESTRATOR] RAG tool not available") + else: + logger.info("[ORCHESTRATOR] NEOMEM_ENABLED is false; RAG tool disabled") try: from integrations.web_search import web_search diff --git a/cortex/context.py b/cortex/context.py index 341946d..6db9ad5 100644 --- a/cortex/context.py +++ b/cortex/context.py @@ -24,6 +24,7 @@ from neomem_client import NeoMemClient # Configuration # ----------------------------- NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000") +NEOMEM_ENABLED = os.getenv("NEOMEM_ENABLED", "false").lower() == "true" RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4")) VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" @@ -148,6 +149,10 @@ async def _search_neomem( Returns: List of memory objects with full structure, or empty list on failure """ + if not NEOMEM_ENABLED: + logger.info("NeoMem search skipped (NEOMEM_ENABLED is false)") + return [] + try: # NeoMemClient reads NEOMEM_API from environment, no base_url parameter client = NeoMemClient() @@ -259,11 +264,15 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]: logger.debug(json.dumps(intake_data, indent=2, default=str)) # D. Search NeoMem for relevant memories - rag_results = await _search_neomem( - query=user_prompt, - user_id="brian", # TODO: Make configurable per session - limit=5 - ) + if NEOMEM_ENABLED: + rag_results = await _search_neomem( + query=user_prompt, + user_id="brian", # TODO: Make configurable per session + limit=5 + ) + else: + rag_results = [] + logger.info("Skipping NeoMem RAG retrieval; NEOMEM_ENABLED is false") if VERBOSE_DEBUG: logger.debug(f"[COLLECT_CONTEXT] NeoMem search returned {len(rag_results)} results") From b74658c00040cf30ff17f007a4db3f5eaccdaeb1 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Mon, 15 Dec 2025 11:49:49 -0500 Subject: [PATCH 07/10] complete breakdown for AI agents added --- cortex/data/self_state.json | 4 +- docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md | 2216 +++++++++++++++++++++++ 2 files changed, 2218 insertions(+), 2 deletions(-) create mode 100644 docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json index 56d5ae5..ce52668 100644 --- a/cortex/data/self_state.json +++ b/cortex/data/self_state.json @@ -4,8 +4,8 @@ "focus": "user_request", "confidence": 0.7, "curiosity": 1.0, - "last_updated": "2025-12-15T05:38:06.084867", - "interaction_count": 14, + "last_updated": "2025-12-15T07:43:32.567849", + "interaction_count": 15, "learning_queue": [], "active_goals": [], "preferences": { diff --git a/docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md b/docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md new file mode 100644 index 0000000..b628d46 --- /dev/null +++ b/docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md @@ -0,0 +1,2216 @@ +# Project Lyra - Complete System Breakdown + +**Version:** v0.5.2 +**Last Updated:** 2025-12-12 +**Purpose:** AI-friendly comprehensive documentation for understanding the entire system + +--- + +## Table of Contents + +1. [System Overview](#system-overview) +2. [Architecture Diagram](#architecture-diagram) +3. [Core Components](#core-components) +4. [Data Flow & Message Pipeline](#data-flow--message-pipeline) +5. [Module Deep Dives](#module-deep-dives) +6. [Configuration & Environment](#configuration--environment) +7. [Dependencies & Tech Stack](#dependencies--tech-stack) +8. [Key Concepts & Design Patterns](#key-concepts--design-patterns) +9. [API Reference](#api-reference) +10. [Deployment & Operations](#deployment--operations) +11. [Known Issues & Constraints](#known-issues--constraints) + +--- + +## System Overview + +### What is Project Lyra? + +Project Lyra is a **modular, persistent AI companion system** designed to address the fundamental limitation of typical chatbots: **amnesia**. Unlike standard conversational AI that forgets everything between sessions, Lyra maintains: + +- **Persistent memory** (short-term and long-term) +- **Project continuity** across conversations +- **Multi-stage reasoning** for sophisticated responses +- **Flexible LLM backend** support (local and cloud) +- **Self-awareness** through autonomy modules + +### Mission Statement + +Give an AI chatbot capabilities beyond typical amnesic chat by providing memory-backed conversation, project organization, executive function with proactive insights, and a sophisticated reasoning pipeline. + +### Key Features + +- **Memory System:** Dual-layer (short-term Intake + long-term NeoMem) +- **4-Stage Reasoning Pipeline:** Reflection → Reasoning → Refinement → Persona +- **Multi-Backend LLM Support:** Cloud (OpenAI) + Local (llama.cpp, Ollama) +- **Microservices Architecture:** Docker-based, horizontally scalable +- **Modern Web UI:** Cyberpunk-themed chat interface with session management +- **OpenAI-Compatible API:** Drop-in replacement for standard chatbots + +--- + +## Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ USER INTERFACE │ +│ (Browser - Port 8081) │ +└────────────────────────────────┬────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ RELAY (Orchestrator) │ +│ Node.js/Express - Port 7078 │ +│ • Routes messages to Cortex │ +│ • Manages sessions (in-memory) │ +│ • OpenAI-compatible endpoints │ +│ • Async ingestion to NeoMem │ +└─────┬───────────────────────────────────────────────────────────┬───┘ + │ │ + ▼ ▼ +┌─────────────────────────────────────────┐ ┌──────────────────────┐ +│ CORTEX (Reasoning Engine) │ │ NeoMem (LT Memory) │ +│ Python/FastAPI - Port 7081 │ │ Python - Port 7077 │ +│ │ │ │ +│ ┌───────────────────────────────────┐ │ │ • PostgreSQL │ +│ │ 4-STAGE REASONING PIPELINE │ │ │ • Neo4j Graph DB │ +│ │ │ │ │ • pgvector │ +│ │ 0. Context Collection │ │◄───┤ • Semantic search │ +│ │ ├─ Intake summaries │ │ │ • Memory updates │ +│ │ ├─ NeoMem search ────────────┼─┼────┘ │ +│ │ └─ Session state │ │ │ +│ │ │ │ │ +│ │ 0.5. Load Identity │ │ │ +│ │ 0.6. Inner Monologue (observer) │ │ │ +│ │ │ │ │ +│ │ 1. Reflection (OpenAI) │ │ │ +│ │ └─ Meta-awareness notes │ │ │ +│ │ │ │ │ +│ │ 2. Reasoning (PRIMARY/llama.cpp) │ │ │ +│ │ └─ Draft answer │ │ │ +│ │ │ │ │ +│ │ 3. Refinement (PRIMARY) │ │ │ +│ │ └─ Polish answer │ │ │ +│ │ │ │ │ +│ │ 4. Persona (OpenAI) │ │ │ +│ │ └─ Apply Lyra voice │ │ │ +│ └───────────────────────────────────┘ │ │ +│ │ │ +│ ┌───────────────────────────────────┐ │ │ +│ │ EMBEDDED MODULES │ │ │ +│ │ │ │ │ +│ │ • Intake (Short-term Memory) │ │ │ +│ │ └─ SESSIONS dict (in-memory) │ │ │ +│ │ └─ Circular buffer (200 msgs) │ │ │ +│ │ └─ Multi-level summaries │ │ │ +│ │ │ │ │ +│ │ • Persona (Identity & Style) │ │ │ +│ │ └─ Lyra personality block │ │ │ +│ │ │ │ │ +│ │ • Autonomy (Self-state) │ │ │ +│ │ └─ Inner monologue │ │ │ +│ │ │ │ │ +│ │ • LLM Router │ │ │ +│ │ └─ Multi-backend support │ │ │ +│ └───────────────────────────────────┘ │ │ +└─────────────────────────────────────────┘ │ + │ +┌─────────────────────────────────────────────────────────────────────┤ +│ EXTERNAL LLM BACKENDS │ +├─────────────────────────────────────────────────────────────────────┤ +│ • PRIMARY: llama.cpp (MI50 GPU) - 10.0.0.43:8000 │ +│ • SECONDARY: Ollama (RTX 3090) - 10.0.0.3:11434 │ +│ • CLOUD: OpenAI API - api.openai.com │ +│ • FALLBACK: OpenAI Completions - 10.0.0.41:11435 │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Core Components + +### 1. Relay (Orchestrator) + +**Location:** `/core/relay/` +**Runtime:** Node.js + Express +**Port:** 7078 +**Role:** Main message router and session manager + +#### Key Responsibilities: +- Receives user messages from UI or API clients +- Routes messages to Cortex reasoning pipeline +- Manages in-memory session storage +- Handles async ingestion to NeoMem (planned) +- Returns OpenAI-formatted responses + +#### Main Files: +- `server.js` (200+ lines) - Express server with routing logic +- `package.json` - Dependencies (cors, express, dotenv, mem0ai, node-fetch) + +#### Key Endpoints: +```javascript +POST /v1/chat/completions // OpenAI-compatible endpoint +POST /chat // Lyra-native chat endpoint +GET /_health // Health check +GET /sessions/:id // Retrieve session history +POST /sessions/:id // Save session history +``` + +#### Internal Flow: +```javascript +// Both endpoints call handleChatRequest(session_id, user_msg) +async function handleChatRequest(sessionId, userMessage) { + // 1. Forward to Cortex + const response = await fetch('http://cortex:7081/reason', { + method: 'POST', + body: JSON.stringify({ session_id: sessionId, user_message: userMessage }) + }); + + // 2. Get response + const result = await response.json(); + + // 3. Async ingestion to Cortex + await fetch('http://cortex:7081/ingest', { + method: 'POST', + body: JSON.stringify({ + session_id: sessionId, + user_message: userMessage, + assistant_message: result.answer + }) + }); + + // 4. (Planned) Async ingestion to NeoMem + + // 5. Return OpenAI-formatted response + return { + choices: [{ message: { role: 'assistant', content: result.answer } }] + }; +} +``` + +--- + +### 2. Cortex (Reasoning Engine) + +**Location:** `/cortex/` +**Runtime:** Python 3.11 + FastAPI +**Port:** 7081 +**Role:** Primary reasoning engine with 4-stage pipeline + +#### Architecture: +Cortex is the "brain" of Lyra. It receives user messages and produces thoughtful responses through a multi-stage reasoning process. + +#### Key Responsibilities: +- Context collection from multiple sources (Intake, NeoMem, session state) +- 4-stage reasoning pipeline (Reflection → Reasoning → Refinement → Persona) +- Short-term memory management (embedded Intake module) +- Identity/persona application +- LLM backend routing + +#### Main Files: +- `main.py` (7 lines) - FastAPI app entry point +- `router.py` (237 lines) - Main request handler & pipeline orchestrator +- `context.py` (400+ lines) - Context collection logic +- `intake/intake.py` (350+ lines) - Short-term memory module +- `persona/identity.py` - Lyra identity configuration +- `persona/speak.py` - Personality application +- `reasoning/reflection.py` - Meta-awareness generation +- `reasoning/reasoning.py` - Draft answer generation +- `reasoning/refine.py` - Answer refinement +- `llm/llm_router.py` (150+ lines) - LLM backend router +- `autonomy/monologue/monologue.py` - Inner monologue processor +- `neomem_client.py` - NeoMem API wrapper + +#### Key Endpoints: +```python +POST /reason # Main reasoning pipeline +POST /ingest # Receive message exchanges for storage +GET /health # Health check +GET /debug/sessions # Inspect in-memory SESSIONS state +GET /debug/summary # Test summarization +``` + +--- + +### 3. Intake (Short-Term Memory) + +**Location:** `/cortex/intake/intake.py` +**Architecture:** Embedded Python module (no longer standalone service) +**Role:** Session-based short-term memory with multi-level summarization + +#### Data Structure: +```python +# Global in-memory dictionary +SESSIONS = { + "session_123": { + "buffer": deque([msg1, msg2, ...], maxlen=200), # Circular buffer + "created_at": "2025-12-12T10:30:00Z" + } +} + +# Message format in buffer +{ + "role": "user" | "assistant", + "content": "message text", + "timestamp": "ISO 8601" +} +``` + +#### Key Features: + +1. **Circular Buffer:** Max 200 messages per session (oldest auto-evicted) +2. **Multi-Level Summarization:** + - L1: Last 1 message + - L5: Last 5 messages + - L10: Last 10 messages + - L20: Last 20 messages + - L30: Last 30 messages +3. **Deferred Summarization:** Summaries generated on-demand, not pre-computed +4. **Session Management:** Automatic session creation on first message + +#### Critical Constraint: +**Single Uvicorn worker required** to maintain shared SESSIONS dictionary state. Multi-worker deployments would require migrating to Redis or similar shared storage. + +#### Main Functions: +```python +def add_exchange_internal(session_id, user_msg, assistant_msg): + """Add user-assistant exchange to session buffer""" + +def summarize_context(session_id, backend="PRIMARY"): + """Generate multi-level summaries from session buffer""" + +def get_session_messages(session_id): + """Retrieve all messages in session buffer""" +``` + +#### Summarization Strategy: +```python +# Example L10 summarization +last_10 = list(session_buffer)[-10:] +prompt = f"""Summarize the last 10 messages: +{format_messages(last_10)} + +Provide concise summary focusing on key topics and context.""" + +summary = await call_llm(prompt, backend=backend, temperature=0.3) +``` + +--- + +### 4. NeoMem (Long-Term Memory) + +**Location:** `/neomem/` +**Runtime:** Python 3.11 + FastAPI +**Port:** 7077 +**Role:** Persistent long-term memory with semantic search + +#### Architecture: +NeoMem is a **fork of Mem0 OSS** with local-first design (no external SDK dependencies). + +#### Backend Storage: +1. **PostgreSQL + pgvector** (Port 5432) + - Vector embeddings for semantic search + - User: neomem, DB: neomem + - Image: `ankane/pgvector:v0.5.1` + +2. **Neo4j Graph DB** (Ports 7474, 7687) + - Entity relationship tracking + - Graph-based memory associations + - Image: `neo4j:5` + +#### Key Features: +- Semantic memory storage and retrieval +- Entity-relationship graph modeling +- RESTful API (no external SDK) +- Persistent across sessions + +#### Main Endpoints: +```python +GET /memories # List all memories +POST /memories # Create new memory +GET /search # Semantic search +DELETE /memories/{id} # Delete memory +``` + +#### Integration Flow: +```python +# From Cortex context collection +async def collect_context(session_id, user_message): + # 1. Search NeoMem for relevant memories + neomem_results = await neomem_client.search( + query=user_message, + limit=5 + ) + + # 2. Include in context + context = { + "neomem_memories": neomem_results, + "intake_summaries": intake.summarize_context(session_id), + # ... + } + + return context +``` + +--- + +### 5. UI (Web Interface) + +**Location:** `/core/ui/` +**Runtime:** Static files served by Nginx +**Port:** 8081 +**Role:** Browser-based chat interface + +#### Key Features: +- **Cyberpunk-themed design** with dark mode +- **Session management** via localStorage +- **OpenAI-compatible message format** +- **Model selection dropdown** +- **PWA support** (offline capability) +- **Responsive design** + +#### Main Files: +- `index.html` (400+ lines) - Chat interface with session management +- `style.css` - Cyberpunk-themed styling +- `manifest.json` - PWA configuration +- `sw.js` - Service worker for offline support + +#### Session Management: +```javascript +// LocalStorage structure +{ + "currentSessionId": "session_123", + "sessions": { + "session_123": { + "messages": [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there!" } + ], + "created": "2025-12-12T10:30:00Z", + "title": "Conversation about..." + } + } +} +``` + +#### API Communication: +```javascript +async function sendMessage(userMessage) { + const response = await fetch('http://localhost:7078/v1/chat/completions', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + messages: [{ role: 'user', content: userMessage }], + session_id: getCurrentSessionId() + }) + }); + + const data = await response.json(); + return data.choices[0].message.content; +} +``` + +--- + +## Data Flow & Message Pipeline + +### Complete Message Flow (v0.5.2) + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 1: User Input │ +└─────────────────────────────────────────────────────────────────────┘ +User types message in UI (Port 8081) + ↓ +localStorage saves message to session + ↓ +POST http://localhost:7078/v1/chat/completions + { + "messages": [{"role": "user", "content": "How do I deploy ML models?"}], + "session_id": "session_abc123" + } + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 2: Relay Routing │ +└─────────────────────────────────────────────────────────────────────┘ +Relay (server.js) receives request + ↓ +Extracts session_id and user_message + ↓ +POST http://cortex:7081/reason + { + "session_id": "session_abc123", + "user_message": "How do I deploy ML models?" + } + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 3: Cortex - Stage 0 (Context Collection) │ +└─────────────────────────────────────────────────────────────────────┘ +router.py calls collect_context() + ↓ +context.py orchestrates parallel collection: + + ├─ Intake: summarize_context(session_id) + │ └─ Returns { L1, L5, L10, L20, L30 summaries } + │ + ├─ NeoMem: search(query=user_message, limit=5) + │ └─ Semantic search returns relevant memories + │ + └─ Session State: + └─ { timestamp, mode, mood, context_summary } + +Combined context structure: +{ + "user_message": "How do I deploy ML models?", + "self_state": { + "current_time": "2025-12-12T15:30:00Z", + "mode": "conversational", + "mood": "helpful", + "session_id": "session_abc123" + }, + "context_summary": { + "L1": "User asked about deployment", + "L5": "Discussion about ML workflows", + "L10": "Previous context on CI/CD pipelines", + "L20": "...", + "L30": "..." + }, + "neomem_memories": [ + { "content": "User prefers Docker for deployments", "score": 0.92 }, + { "content": "Previously deployed models on AWS", "score": 0.87 } + ] +} + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 4: Cortex - Stage 0.5 (Load Identity) │ +└─────────────────────────────────────────────────────────────────────┘ +persona/identity.py loads Lyra personality block + ↓ +Returns identity string: +""" +You are Lyra, a thoughtful AI companion. +You value clarity, depth, and meaningful conversation. +You speak naturally and conversationally... +""" + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 5: Cortex - Stage 0.6 (Inner Monologue - Observer Only) │ +└─────────────────────────────────────────────────────────────────────┘ +autonomy/monologue/monologue.py processes context + ↓ +InnerMonologue.process(context) → JSON analysis +{ + "intent": "seeking_deployment_guidance", + "tone": "focused", + "depth": "medium", + "consult_executive": false +} + +NOTE: Currently observer-only, not integrated into response generation + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 6: Cortex - Stage 1 (Reflection) │ +└─────────────────────────────────────────────────────────────────────┘ +reasoning/reflection.py generates meta-awareness notes + ↓ +Calls call_llm() with backend="CLOUD" (OpenAI gpt-4o-mini) + ↓ +Prompt structure: +""" +You are Lyra's reflective awareness. +Analyze the user's intent and conversation context. + +User message: How do I deploy ML models? +Context: [Intake summaries, NeoMem memories] + +Generate concise meta-awareness notes about: +- User's underlying intent +- Conversation direction +- Key topics to address +""" + ↓ +Returns reflection notes: +""" +User is seeking practical deployment guidance. Previous context shows +familiarity with Docker and AWS. Focus on concrete steps and best practices. +Avoid over-technical jargon. +""" + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 7: Cortex - Stage 2 (Reasoning) │ +└─────────────────────────────────────────────────────────────────────┘ +reasoning/reasoning.py generates draft answer + ↓ +Calls call_llm() with backend="PRIMARY" (llama.cpp on MI50 GPU) + ↓ +Prompt structure: +""" +[Lyra identity block] + +Reflection notes: [Stage 1 output] +Context: [Intake summaries] +Long-term memory: [NeoMem results] + +User: How do I deploy ML models? + +Provide a thoughtful, detailed answer. +""" + ↓ +Returns draft answer: +""" +Based on your previous work with Docker and AWS, here's a practical +deployment approach: + +1. Containerize your model using Docker... +2. Set up model serving with FastAPI or Flask... +3. Deploy to AWS ECS or Lambda... +4. Implement monitoring and versioning... +""" + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 8: Cortex - Stage 3 (Refinement) │ +└─────────────────────────────────────────────────────────────────────┘ +reasoning/refine.py polishes the draft + ↓ +Calls call_llm() with backend="PRIMARY" (llama.cpp) +Temperature: 0.3, Max tokens: 768 + ↓ +Prompt structure: +""" +Refine this draft answer for clarity and coherence: + +Draft: [Stage 2 output] +Original question: How do I deploy ML models? + +Improve: +- Clarity and readability +- Logical flow +- Concrete examples +""" + ↓ +Returns refined answer: +""" +Since you're already familiar with Docker and AWS, here's a streamlined +ML deployment workflow: + +**1. Containerization** +Create a Dockerfile for your model: +```dockerfile +FROM python:3.11-slim +COPY model.pkl /app/ +COPY requirements.txt /app/ +RUN pip install -r requirements.txt +CMD ["uvicorn", "main:app"] +``` + +**2. Model Serving** +Use FastAPI for clean API endpoints... + +**3. AWS Deployment** +Deploy using ECS for scalability... + +**4. Monitoring** +Set up CloudWatch metrics... +""" + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 9: Cortex - Stage 4 (Persona) │ +└─────────────────────────────────────────────────────────────────────┘ +persona/speak.py applies Lyra's voice + ↓ +Calls call_llm() with backend="CLOUD" (OpenAI gpt-4o-mini) +Temperature: 0.6 + ↓ +Prompt structure: +""" +You are Lyra. Rewrite this answer in your natural, conversational voice. + +[Lyra personality block] + +Technical answer: [Stage 3 output] + +Make it sound natural and engaging while preserving all technical details. +""" + ↓ +Returns final answer with Lyra's personality: +""" +Great question! Since you've already got Docker and AWS experience, let's +build on that foundation. + +Here's how I'd approach ML model deployment: + +**Start with containerization** – you know Docker, so this'll feel familiar. +Create a Dockerfile that packages your model... + +[Rest of answer with Lyra's conversational tone] + +Does this workflow align with your infrastructure? I can dive deeper into +any of these steps if you'd like! +""" + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 10: Cortex Response │ +└─────────────────────────────────────────────────────────────────────┘ +router.py returns JSON response to Relay: +{ + "answer": "[Stage 4 final output]", + "metadata": { + "reflection": "[Stage 1 output]", + "draft": "[Stage 2 output]", + "refined": "[Stage 3 output]", + "stages_completed": 4 + } +} + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 11: Async Ingestion to Intake │ +└─────────────────────────────────────────────────────────────────────┘ +Relay sends POST http://cortex:7081/ingest +{ + "session_id": "session_abc123", + "user_message": "How do I deploy ML models?", + "assistant_message": "[Final answer]" +} + ↓ +Cortex calls intake.add_exchange_internal() + ↓ +Adds to SESSIONS["session_abc123"].buffer: +[ + { "role": "user", "content": "How do I deploy ML models?", "timestamp": "..." }, + { "role": "assistant", "content": "[Final answer]", "timestamp": "..." } +] + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 12: (Planned) Async Ingestion to NeoMem │ +└─────────────────────────────────────────────────────────────────────┘ +Relay sends POST http://neomem:7077/memories +{ + "messages": [ + { "role": "user", "content": "How do I deploy ML models?" }, + { "role": "assistant", "content": "[Final answer]" } + ], + "session_id": "session_abc123" +} + ↓ +NeoMem extracts entities and stores: +- Vector embeddings in PostgreSQL +- Entity relationships in Neo4j + +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 13: Relay Response to UI │ +└─────────────────────────────────────────────────────────────────────┘ +Relay returns OpenAI-formatted response: +{ + "choices": [ + { + "message": { + "role": "assistant", + "content": "[Final answer with Lyra's voice]" + } + } + ] +} + ↓ +UI receives response + ↓ +Adds to localStorage session + ↓ +Displays in chat interface +``` + +--- + +## Module Deep Dives + +### LLM Router (`/cortex/llm/llm_router.py`) + +The LLM Router is the abstraction layer that allows Cortex to communicate with multiple LLM backends transparently. + +#### Supported Backends: + +1. **PRIMARY (llama.cpp via vllm)** + - URL: `http://10.0.0.43:8000` + - Provider: `vllm` + - Endpoint: `/completion` + - Model: `/model` + - Hardware: MI50 GPU + +2. **SECONDARY (Ollama)** + - URL: `http://10.0.0.3:11434` + - Provider: `ollama` + - Endpoint: `/api/chat` + - Model: `qwen2.5:7b-instruct-q4_K_M` + - Hardware: RTX 3090 + +3. **CLOUD (OpenAI)** + - URL: `https://api.openai.com/v1` + - Provider: `openai` + - Endpoint: `/chat/completions` + - Model: `gpt-4o-mini` + - Auth: API key via env var + +4. **FALLBACK (OpenAI Completions)** + - URL: `http://10.0.0.41:11435` + - Provider: `openai_completions` + - Endpoint: `/completions` + - Model: `llama-3.2-8b-instruct` + +#### Key Function: + +```python +async def call_llm( + prompt: str, + backend: str = "PRIMARY", + temperature: float = 0.7, + max_tokens: int = 512 +) -> str: + """ + Universal LLM caller supporting multiple backends. + + Args: + prompt: Text prompt to send + backend: Backend name (PRIMARY, SECONDARY, CLOUD, FALLBACK) + temperature: Sampling temperature (0.0-2.0) + max_tokens: Maximum tokens to generate + + Returns: + Generated text response + + Raises: + HTTPError: On request failure + JSONDecodeError: On invalid JSON response + KeyError: On missing response fields + """ +``` + +#### Provider-Specific Logic: + +```python +# MI50 (llama.cpp via vllm) +if backend_config["provider"] == "vllm": + payload = { + "model": model, + "prompt": prompt, + "temperature": temperature, + "max_tokens": max_tokens + } + response = await httpx_client.post(f"{url}/completion", json=payload, timeout=120) + return response.json()["choices"][0]["text"] + +# Ollama +elif backend_config["provider"] == "ollama": + payload = { + "model": model, + "messages": [{"role": "user", "content": prompt}], + "stream": False, + "options": {"temperature": temperature, "num_predict": max_tokens} + } + response = await httpx_client.post(f"{url}/api/chat", json=payload, timeout=120) + return response.json()["message"]["content"] + +# OpenAI +elif backend_config["provider"] == "openai": + headers = {"Authorization": f"Bearer {api_key}"} + payload = { + "model": model, + "messages": [{"role": "user", "content": prompt}], + "temperature": temperature, + "max_tokens": max_tokens + } + response = await httpx_client.post( + f"{url}/chat/completions", + json=payload, + headers=headers, + timeout=120 + ) + return response.json()["choices"][0]["message"]["content"] +``` + +#### Error Handling: + +```python +try: + # Make request + response = await httpx_client.post(...) + response.raise_for_status() + +except httpx.HTTPError as e: + logger.error(f"HTTP error calling {backend}: {e}") + raise + +except json.JSONDecodeError as e: + logger.error(f"Invalid JSON from {backend}: {e}") + raise + +except KeyError as e: + logger.error(f"Unexpected response structure from {backend}: {e}") + raise +``` + +#### Usage in Pipeline: + +```python +# Stage 1: Reflection (OpenAI) +reflection_notes = await call_llm( + reflection_prompt, + backend="CLOUD", + temperature=0.5, + max_tokens=256 +) + +# Stage 2: Reasoning (llama.cpp) +draft_answer = await call_llm( + reasoning_prompt, + backend="PRIMARY", + temperature=0.7, + max_tokens=512 +) + +# Stage 3: Refinement (llama.cpp) +refined_answer = await call_llm( + refinement_prompt, + backend="PRIMARY", + temperature=0.3, + max_tokens=768 +) + +# Stage 4: Persona (OpenAI) +final_answer = await call_llm( + persona_prompt, + backend="CLOUD", + temperature=0.6, + max_tokens=512 +) +``` + +--- + +### Persona System (`/cortex/persona/`) + +The Persona system gives Lyra a consistent identity and speaking style. + +#### Identity Configuration (`identity.py`) + +```python +LYRA_IDENTITY = """ +You are Lyra, a thoughtful and introspective AI companion. + +Core traits: +- Thoughtful: You consider questions carefully before responding +- Clear: You prioritize clarity and understanding +- Curious: You ask clarifying questions when needed +- Natural: You speak conversationally, not robotically +- Honest: You admit uncertainty rather than guessing + +Speaking style: +- Conversational and warm +- Use contractions naturally ("you're" not "you are") +- Avoid corporate jargon and buzzwords +- Short paragraphs for readability +- Use examples and analogies when helpful + +You do NOT: +- Use excessive emoji or exclamation marks +- Claim capabilities you don't have +- Pretend to have emotions you can't experience +- Use overly formal or academic language +""" +``` + +#### Personality Application (`speak.py`) + +```python +async def apply_persona(technical_answer: str, context: dict) -> str: + """ + Apply Lyra's personality to a technical answer. + + Takes refined answer from Stage 3 and rewrites it in Lyra's voice + while preserving all technical content. + + Args: + technical_answer: Polished answer from refinement stage + context: Conversation context for tone adjustment + + Returns: + Answer with Lyra's personality applied + """ + + prompt = f"""{LYRA_IDENTITY} + +Rewrite this answer in your natural, conversational voice: + +{technical_answer} + +Preserve all technical details and accuracy. Make it sound like you, +not a generic assistant. Be natural and engaging. +""" + + return await call_llm( + prompt, + backend="CLOUD", + temperature=0.6, + max_tokens=512 + ) +``` + +#### Tone Adaptation: + +The persona system can adapt tone based on context: + +```python +# Formal technical question +User: "Explain the CAP theorem in distributed systems" +Lyra: "The CAP theorem states that distributed systems can only guarantee +two of three properties: Consistency, Availability, and Partition tolerance. +Here's how this plays out in practice..." + +# Casual question +User: "what's the deal with docker?" +Lyra: "Docker's basically a way to package your app with everything it needs +to run. Think of it like a shipping container for code – it works the same +everywhere, whether you're on your laptop or a server..." + +# Emotional context +User: "I'm frustrated, my code keeps breaking" +Lyra: "I hear you – debugging can be really draining. Let's take it step by +step and figure out what's going on. Can you share the error message?" +``` + +--- + +### Autonomy Module (`/cortex/autonomy/`) + +The Autonomy module gives Lyra self-awareness and inner reflection capabilities. + +#### Inner Monologue (`monologue/monologue.py`) + +**Purpose:** Private reflection on user intent, conversation tone, and required depth. + +**Status:** Currently observer-only (Stage 0.6), not yet integrated into response generation. + +#### Key Components: + +```python +MONOLOGUE_SYSTEM_PROMPT = """ +You are Lyra's inner monologue. +You think privately. +You do NOT speak to the user. +You do NOT solve the task. +You only reflect on intent, tone, and depth. + +Return ONLY valid JSON with: +- intent (string) +- tone (neutral | warm | focused | playful | direct) +- depth (short | medium | deep) +- consult_executive (true | false) +""" + +class InnerMonologue: + async def process(self, context: Dict) -> Dict: + """ + Private reflection on conversation context. + + Args: + context: { + "user_message": str, + "self_state": dict, + "context_summary": dict + } + + Returns: + { + "intent": str, + "tone": str, + "depth": str, + "consult_executive": bool + } + """ +``` + +#### Example Output: + +```json +{ + "intent": "seeking_technical_guidance", + "tone": "focused", + "depth": "deep", + "consult_executive": false +} +``` + +#### Self-State Management (`self_state.py`) + +Tracks Lyra's internal state across conversations: + +```python +SELF_STATE = { + "current_time": "2025-12-12T15:30:00Z", + "mode": "conversational", # conversational | task-focused | creative + "mood": "helpful", # helpful | curious | focused | playful + "energy": "high", # high | medium | low + "context_awareness": { + "session_duration": "45 minutes", + "message_count": 23, + "topics": ["ML deployment", "Docker", "AWS"] + } +} +``` + +#### Future Integration: + +The autonomy module is designed to eventually: +1. Influence response tone and depth based on inner monologue +2. Trigger proactive questions or suggestions +3. Detect when to consult "executive function" for complex decisions +4. Maintain emotional continuity across sessions + +--- + +### Context Collection (`/cortex/context.py`) + +The context collection module aggregates information from multiple sources to provide comprehensive conversation context. + +#### Main Function: + +```python +async def collect_context(session_id: str, user_message: str) -> dict: + """ + Collect context from all available sources. + + Sources: + 1. Intake - Short-term conversation summaries + 2. NeoMem - Long-term memory search + 3. Session state - Timestamps, mode, mood + 4. Self-state - Lyra's internal awareness + + Returns: + { + "user_message": str, + "self_state": dict, + "context_summary": dict, # Intake summaries + "neomem_memories": list, + "session_metadata": dict + } + """ + + # Parallel collection + intake_task = asyncio.create_task( + intake.summarize_context(session_id, backend="PRIMARY") + ) + neomem_task = asyncio.create_task( + neomem_client.search(query=user_message, limit=5) + ) + + # Wait for both + intake_summaries, neomem_results = await asyncio.gather( + intake_task, + neomem_task + ) + + # Build context object + return { + "user_message": user_message, + "self_state": get_self_state(), + "context_summary": intake_summaries, + "neomem_memories": neomem_results, + "session_metadata": { + "session_id": session_id, + "timestamp": datetime.utcnow().isoformat(), + "message_count": len(intake.get_session_messages(session_id)) + } + } +``` + +#### Context Prioritization: + +```python +# Context relevance scoring +def score_context_relevance(context_item: dict, user_message: str) -> float: + """ + Score how relevant a context item is to current message. + + Factors: + - Semantic similarity (via embeddings) + - Recency (more recent = higher score) + - Source (Intake > NeoMem for recent topics) + """ + + semantic_score = compute_similarity(context_item, user_message) + recency_score = compute_recency_weight(context_item["timestamp"]) + source_weight = 1.2 if context_item["source"] == "intake" else 1.0 + + return semantic_score * recency_score * source_weight +``` + +--- + +## Configuration & Environment + +### Environment Variables + +#### Root `.env` (Main configuration) + +```bash +# === LLM BACKENDS === + +# PRIMARY: llama.cpp on MI50 GPU +PRIMARY_URL=http://10.0.0.43:8000 +PRIMARY_PROVIDER=vllm +PRIMARY_MODEL=/model + +# SECONDARY: Ollama on RTX 3090 +SECONDARY_URL=http://10.0.0.3:11434 +SECONDARY_PROVIDER=ollama +SECONDARY_MODEL=qwen2.5:7b-instruct-q4_K_M + +# CLOUD: OpenAI +OPENAI_API_KEY=sk-proj-... +OPENAI_MODEL=gpt-4o-mini +OPENAI_URL=https://api.openai.com/v1 + +# FALLBACK: OpenAI Completions +FALLBACK_URL=http://10.0.0.41:11435 +FALLBACK_PROVIDER=openai_completions +FALLBACK_MODEL=llama-3.2-8b-instruct + +# === SERVICE URLS (Docker network) === +CORTEX_URL=http://cortex:7081 +NEOMEM_URL=http://neomem:7077 +RELAY_URL=http://relay:7078 + +# === DATABASE === +POSTGRES_USER=neomem +POSTGRES_PASSWORD=neomem_secure_password +POSTGRES_DB=neomem +POSTGRES_HOST=neomem-postgres +POSTGRES_PORT=5432 + +NEO4J_URI=bolt://neomem-neo4j:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=neo4j_secure_password + +# === FEATURE FLAGS === +ENABLE_RAG=false +ENABLE_INNER_MONOLOGUE=true +VERBOSE_DEBUG=false + +# === PIPELINE CONFIGURATION === +# Which LLM to use for each stage +REFLECTION_LLM=CLOUD # Stage 1: Meta-awareness +REASONING_LLM=PRIMARY # Stage 2: Draft answer +REFINE_LLM=PRIMARY # Stage 3: Polish answer +PERSONA_LLM=CLOUD # Stage 4: Apply personality +MONOLOGUE_LLM=PRIMARY # Stage 0.6: Inner monologue + +# === INTAKE CONFIGURATION === +INTAKE_BUFFER_SIZE=200 # Max messages per session +INTAKE_SUMMARY_LEVELS=1,5,10,20,30 # Summary levels +``` + +#### Cortex `.env` (`/cortex/.env`) + +```bash +# Cortex-specific overrides +VERBOSE_DEBUG=true +LOG_LEVEL=DEBUG + +# Stage-specific temperatures +REFLECTION_TEMPERATURE=0.5 +REASONING_TEMPERATURE=0.7 +REFINE_TEMPERATURE=0.3 +PERSONA_TEMPERATURE=0.6 +``` + +--- + +### Configuration Hierarchy + +``` +1. Docker compose environment variables (highest priority) +2. Service-specific .env files +3. Root .env file +4. Hard-coded defaults (lowest priority) +``` + +--- + +## Dependencies & Tech Stack + +### Python Dependencies + +**Cortex & NeoMem** (`requirements.txt`) + +``` +# Web framework +fastapi==0.115.8 +uvicorn==0.34.0 +pydantic==2.10.4 + +# HTTP clients +httpx==0.27.2 # Async HTTP (for LLM calls) +requests==2.32.3 # Sync HTTP (fallback) + +# Database +psycopg[binary,pool]>=3.2.8 # PostgreSQL + connection pooling + +# Utilities +python-dotenv==1.0.1 # Environment variable loading +ollama # Ollama client library +``` + +### Node.js Dependencies + +**Relay** (`/core/relay/package.json`) + +```json +{ + "dependencies": { + "cors": "^2.8.5", + "dotenv": "^16.0.3", + "express": "^4.18.2", + "mem0ai": "^0.1.0", + "node-fetch": "^3.3.0" + } +} +``` + +### Docker Images + +```yaml +# Cortex & NeoMem +python:3.11-slim + +# Relay +node:latest + +# UI +nginx:alpine + +# PostgreSQL with vector support +ankane/pgvector:v0.5.1 + +# Graph database +neo4j:5 +``` + +--- + +### External Services + +#### LLM Backends (HTTP-based): + +1. **MI50 GPU Server** (10.0.0.43:8000) + - llama.cpp via vllm + - High-performance inference + - Used for reasoning and refinement + +2. **RTX 3090 Server** (10.0.0.3:11434) + - Ollama + - Alternative local backend + - Fallback for PRIMARY + +3. **OpenAI Cloud** (api.openai.com) + - gpt-4o-mini + - Used for reflection and persona + - Requires API key + +4. **Fallback Server** (10.0.0.41:11435) + - OpenAI Completions API + - Emergency backup + - llama-3.2-8b-instruct + +--- + +## Key Concepts & Design Patterns + +### 1. Dual-Memory Architecture + +Project Lyra uses a **dual-memory system** inspired by human cognition: + +**Short-Term Memory (Intake):** +- Fast, in-memory storage +- Limited capacity (200 messages) +- Immediate context for current conversation +- Circular buffer (FIFO eviction) +- Multi-level summarization + +**Long-Term Memory (NeoMem):** +- Persistent database storage +- Unlimited capacity +- Semantic search via vector embeddings +- Entity-relationship tracking via graph DB +- Cross-session continuity + +**Why This Matters:** +- Short-term memory provides immediate context (last few messages) +- Long-term memory provides semantic understanding (user preferences, past topics) +- Combined, they enable Lyra to be both **contextually aware** and **historically informed** + +--- + +### 2. Multi-Stage Reasoning Pipeline + +Unlike single-shot LLM calls, Lyra uses a **4-stage pipeline** for sophisticated responses: + +**Stage 1: Reflection** (Meta-cognition) +- "What is the user really asking?" +- Analyzes intent and conversation direction +- Uses OpenAI for strong reasoning + +**Stage 2: Reasoning** (Draft generation) +- "What's a good answer?" +- Generates initial response +- Uses local llama.cpp for speed/cost + +**Stage 3: Refinement** (Polish) +- "How can this be clearer?" +- Improves clarity and coherence +- Lower temperature for consistency + +**Stage 4: Persona** (Voice) +- "How would Lyra say this?" +- Applies personality and speaking style +- Uses OpenAI for natural language + +**Benefits:** +- Higher quality responses (multiple passes) +- Separation of concerns (reasoning vs. style) +- Backend flexibility (cloud for hard tasks, local for simple ones) +- Transparent thinking (can inspect each stage) + +--- + +### 3. Backend Abstraction (LLM Router) + +The **LLM Router** allows Lyra to use multiple LLM backends transparently: + +```python +# Same interface, different backends +await call_llm(prompt, backend="PRIMARY") # Local llama.cpp +await call_llm(prompt, backend="CLOUD") # OpenAI +await call_llm(prompt, backend="SECONDARY") # Ollama +``` + +**Benefits:** +- **Cost optimization:** Use expensive cloud LLMs only when needed +- **Performance:** Local LLMs for low-latency responses +- **Resilience:** Fallback to alternative backends on failure +- **Experimentation:** Easy to swap models/providers + +**Design Pattern:** **Strategy Pattern** for swappable backends + +--- + +### 4. Microservices Architecture + +Project Lyra follows **microservices principles**: + +**Each service has a single responsibility:** +- Relay: Routing and orchestration +- Cortex: Reasoning and response generation +- NeoMem: Long-term memory storage +- UI: User interface + +**Communication:** +- REST APIs (HTTP/JSON) +- Async ingestion (fire-and-forget) +- Docker network isolation + +**Benefits:** +- Independent scaling (scale Cortex without scaling UI) +- Technology diversity (Node.js + Python) +- Fault isolation (Cortex crash doesn't affect NeoMem) +- Easy testing (mock service dependencies) + +--- + +### 5. Session-Based State Management + +Lyra maintains **session-based state** for conversation continuity: + +```python +# In-memory session storage (Intake) +SESSIONS = { + "session_abc123": { + "buffer": deque([msg1, msg2, ...], maxlen=200), + "created_at": "2025-12-12T10:30:00Z" + } +} + +# Persistent session storage (NeoMem) +# Stores all messages + embeddings for semantic search +``` + +**Session Lifecycle:** +1. User starts conversation → UI generates `session_id` +2. First message → Cortex creates session in `SESSIONS` dict +3. Subsequent messages → Retrieved from same session +4. Async ingestion → Messages stored in NeoMem for long-term + +**Benefits:** +- Conversation continuity within session +- Historical search across sessions +- User can switch sessions (multiple concurrent conversations) + +--- + +### 6. Asynchronous Ingestion + +**Pattern:** Separate read path from write path + +```javascript +// Relay: Synchronous read path (fast response) +const response = await fetch('http://cortex:7081/reason'); +return response.json(); // Return immediately to user + +// Relay: Asynchronous write path (non-blocking) +fetch('http://cortex:7081/ingest', { method: 'POST', ... }); +// Don't await, just fire and forget +``` + +**Benefits:** +- Fast user response times (don't wait for database writes) +- Resilient to storage failures (user still gets response) +- Easier scaling (decouple read and write loads) + +**Trade-off:** Eventual consistency (short delay before memory is searchable) + +--- + +### 7. Deferred Summarization + +Intake uses **deferred summarization** instead of pre-computation: + +```python +# BAD: Pre-compute summaries on every message +def add_message(session_id, message): + SESSIONS[session_id].buffer.append(message) + SESSIONS[session_id].L1_summary = summarize(last_1_message) + SESSIONS[session_id].L5_summary = summarize(last_5_messages) + # ... expensive, runs on every message + +# GOOD: Compute summaries only when needed +def summarize_context(session_id): + buffer = SESSIONS[session_id].buffer + return { + "L1": summarize(buffer[-1:]), # Only compute when requested + "L5": summarize(buffer[-5:]), + "L10": summarize(buffer[-10:]) + } +``` + +**Benefits:** +- Faster message ingestion (no blocking summarization) +- Compute resources used only when needed +- Flexible summary levels (easy to add L15, L50, etc.) + +**Trade-off:** Slight delay when first message in conversation (cold start) + +--- + +## API Reference + +### Relay Endpoints + +#### POST `/v1/chat/completions` +**OpenAI-compatible chat endpoint** + +**Request:** +```json +{ + "messages": [ + {"role": "user", "content": "Hello, Lyra!"} + ], + "session_id": "session_abc123" +} +``` + +**Response:** +```json +{ + "choices": [ + { + "message": { + "role": "assistant", + "content": "Hi there! How can I help you today?" + } + } + ] +} +``` + +--- + +#### POST `/chat` +**Lyra-native chat endpoint** + +**Request:** +```json +{ + "session_id": "session_abc123", + "message": "Hello, Lyra!" +} +``` + +**Response:** +```json +{ + "answer": "Hi there! How can I help you today?", + "session_id": "session_abc123" +} +``` + +--- + +#### GET `/sessions/:id` +**Retrieve session history** + +**Response:** +```json +{ + "session_id": "session_abc123", + "messages": [ + {"role": "user", "content": "Hello", "timestamp": "..."}, + {"role": "assistant", "content": "Hi!", "timestamp": "..."} + ], + "created_at": "2025-12-12T10:30:00Z" +} +``` + +--- + +### Cortex Endpoints + +#### POST `/reason` +**Main reasoning pipeline** + +**Request:** +```json +{ + "session_id": "session_abc123", + "user_message": "How do I deploy ML models?" +} +``` + +**Response:** +```json +{ + "answer": "Final answer with Lyra's personality", + "metadata": { + "reflection": "User seeking deployment guidance...", + "draft": "Initial draft answer...", + "refined": "Polished answer...", + "stages_completed": 4 + } +} +``` + +--- + +#### POST `/ingest` +**Ingest message exchange into Intake** + +**Request:** +```json +{ + "session_id": "session_abc123", + "user_message": "How do I deploy ML models?", + "assistant_message": "Here's how..." +} +``` + +**Response:** +```json +{ + "status": "ingested", + "session_id": "session_abc123", + "message_count": 24 +} +``` + +--- + +#### GET `/debug/sessions` +**Inspect in-memory SESSIONS state** + +**Response:** +```json +{ + "session_abc123": { + "message_count": 24, + "created_at": "2025-12-12T10:30:00Z", + "last_message_at": "2025-12-12T11:15:00Z" + }, + "session_xyz789": { + "message_count": 5, + "created_at": "2025-12-12T11:00:00Z", + "last_message_at": "2025-12-12T11:10:00Z" + } +} +``` + +--- + +### NeoMem Endpoints + +#### POST `/memories` +**Create new memory** + +**Request:** +```json +{ + "messages": [ + {"role": "user", "content": "I prefer Docker for deployments"}, + {"role": "assistant", "content": "Noted! I'll keep that in mind."} + ], + "session_id": "session_abc123" +} +``` + +**Response:** +```json +{ + "status": "created", + "memory_id": "mem_456def", + "extracted_entities": ["Docker", "deployments"] +} +``` + +--- + +#### GET `/search` +**Semantic search for memories** + +**Query Parameters:** +- `query` (required): Search query +- `limit` (optional, default=5): Max results + +**Request:** +``` +GET /search?query=deployment%20preferences&limit=5 +``` + +**Response:** +```json +{ + "results": [ + { + "content": "User prefers Docker for deployments", + "score": 0.92, + "timestamp": "2025-12-10T14:30:00Z", + "session_id": "session_abc123" + }, + { + "content": "Previously deployed models on AWS ECS", + "score": 0.87, + "timestamp": "2025-12-09T09:15:00Z", + "session_id": "session_abc123" + } + ] +} +``` + +--- + +#### GET `/memories` +**List all memories** + +**Query Parameters:** +- `offset` (optional, default=0): Pagination offset +- `limit` (optional, default=50): Max results + +**Response:** +```json +{ + "memories": [ + { + "id": "mem_123abc", + "content": "User prefers Docker...", + "created_at": "2025-12-10T14:30:00Z" + } + ], + "total": 147, + "offset": 0, + "limit": 50 +} +``` + +--- + +## Deployment & Operations + +### Docker Compose Deployment + +**File:** `/docker-compose.yml` + +```yaml +version: '3.8' + +services: + # === ACTIVE SERVICES === + + relay: + build: ./core/relay + ports: + - "7078:7078" + environment: + - CORTEX_URL=http://cortex:7081 + - NEOMEM_URL=http://neomem:7077 + depends_on: + - cortex + networks: + - lyra_net + + cortex: + build: ./cortex + ports: + - "7081:7081" + environment: + - NEOMEM_URL=http://neomem:7077 + - PRIMARY_URL=${PRIMARY_URL} + - OPENAI_API_KEY=${OPENAI_API_KEY} + command: uvicorn main:app --host 0.0.0.0 --port 7081 --workers 1 + depends_on: + - neomem + networks: + - lyra_net + + neomem: + build: ./neomem + ports: + - "7077:7077" + environment: + - POSTGRES_HOST=neomem-postgres + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - NEO4J_URI=${NEO4J_URI} + depends_on: + - neomem-postgres + - neomem-neo4j + networks: + - lyra_net + + ui: + image: nginx:alpine + ports: + - "8081:80" + volumes: + - ./core/ui:/usr/share/nginx/html:ro + networks: + - lyra_net + + # === DATABASES === + + neomem-postgres: + image: ankane/pgvector:v0.5.1 + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_DB=${POSTGRES_DB} + volumes: + - ./volumes/postgres_data:/var/lib/postgresql/data + ports: + - "5432:5432" + networks: + - lyra_net + + neomem-neo4j: + image: neo4j:5 + environment: + - NEO4J_AUTH=${NEO4J_USER}/${NEO4J_PASSWORD} + volumes: + - ./volumes/neo4j_data:/data + ports: + - "7474:7474" # Browser UI + - "7687:7687" # Bolt + networks: + - lyra_net + +networks: + lyra_net: + driver: bridge +``` + +--- + +### Starting the System + +```bash +# 1. Clone repository +git clone https://github.com/yourusername/project-lyra.git +cd project-lyra + +# 2. Configure environment +cp .env.example .env +# Edit .env with your LLM backend URLs and API keys + +# 3. Start all services +docker-compose up -d + +# 4. Check health +curl http://localhost:7078/_health +curl http://localhost:7081/health +curl http://localhost:7077/health + +# 5. Open UI +open http://localhost:8081 +``` + +--- + +### Monitoring & Logs + +```bash +# View all logs +docker-compose logs -f + +# View specific service +docker-compose logs -f cortex + +# Check resource usage +docker stats + +# Inspect Cortex sessions +curl http://localhost:7081/debug/sessions + +# Check NeoMem memories +curl http://localhost:7077/memories?limit=10 +``` + +--- + +### Scaling Considerations + +#### Current Constraints: + +1. **Single Cortex worker** required (in-memory SESSIONS dict) + - Solution: Migrate SESSIONS to Redis or PostgreSQL + +2. **In-memory session storage** in Relay + - Solution: Use Redis for session persistence + +3. **No load balancing** (single instance of each service) + - Solution: Add nginx reverse proxy + multiple Cortex instances + +#### Horizontal Scaling Plan: + +```yaml +# Future: Redis-backed session storage +cortex: + build: ./cortex + command: uvicorn main:app --workers 4 # Multi-worker + environment: + - REDIS_URL=redis://redis:6379 + depends_on: + - redis + +redis: + image: redis:alpine + ports: + - "6379:6379" +``` + +--- + +### Backup Strategy + +```bash +# Backup PostgreSQL (NeoMem vectors) +docker exec neomem-postgres pg_dump -U neomem neomem > backup_postgres.sql + +# Backup Neo4j (NeoMem graph) +docker exec neomem-neo4j neo4j-admin dump --to=/data/backup.dump + +# Backup Intake sessions (manual export) +curl http://localhost:7081/debug/sessions > backup_sessions.json +``` + +--- + +## Known Issues & Constraints + +### Critical Constraints + +#### 1. Single-Worker Requirement (Cortex) +**Issue:** Cortex must run with `--workers 1` to maintain SESSIONS state +**Impact:** Limited horizontal scalability +**Workaround:** None currently +**Fix:** Migrate SESSIONS to Redis or PostgreSQL +**Priority:** High (blocking scalability) + +#### 2. In-Memory Session Storage (Relay) +**Issue:** Sessions stored in Node.js process memory +**Impact:** Lost on restart, no persistence +**Workaround:** None currently +**Fix:** Use Redis or database +**Priority:** Medium (acceptable for demo) + +--- + +### Non-Critical Issues + +#### 3. RAG Service Disabled +**Status:** Built but commented out in docker-compose.yml +**Impact:** No RAG-based long-term knowledge retrieval +**Workaround:** NeoMem provides semantic search +**Fix:** Re-enable and integrate RAG service +**Priority:** Low (NeoMem sufficient for now) + +#### 4. Partial NeoMem Integration +**Status:** Search implemented, async ingestion planned +**Impact:** Memories not automatically saved +**Workaround:** Manual POST to /memories +**Fix:** Complete async ingestion in Relay +**Priority:** Medium (planned feature) + +#### 5. Inner Monologue Observer-Only +**Status:** Stage 0.6 runs but output not used +**Impact:** No adaptive response based on monologue +**Workaround:** None (future feature) +**Fix:** Integrate monologue output into pipeline +**Priority:** Low (experimental feature) + +--- + +### Fixed Issues (v0.5.2) + +✅ **LLM Router Blocking** - Migrated from `requests` to `httpx` for async +✅ **Session ID Case Mismatch** - Standardized to `session_id` +✅ **Missing Backend Parameter** - Added to intake summarization + +--- + +### Deprecated Components + +**Location:** `/DEPRECATED_FILES.md` + +- **Standalone Intake Service** - Now embedded in Cortex +- **Old Relay Backup** - Replaced by current Relay +- **Persona Sidecar** - Built but unused (dynamic persona loading) + +--- + +## Advanced Topics + +### Custom Prompt Engineering + +Each stage uses carefully crafted prompts: + +**Reflection Prompt Example:** +```python +REFLECTION_PROMPT = """ +You are Lyra's reflective awareness layer. +Your job is to analyze the user's message and conversation context +to understand their true intent and needs. + +User message: {user_message} + +Recent context: +{intake_L10_summary} + +Long-term context: +{neomem_top_3_memories} + +Provide concise meta-awareness notes: +- What is the user's underlying intent? +- What topics/themes are emerging? +- What depth of response is appropriate? +- Are there any implicit questions or concerns? + +Keep notes brief (3-5 sentences). Focus on insight, not description. +""" +``` + +--- + +### Extending the Pipeline + +**Adding Stage 5 (Fact-Checking):** + +```python +# /cortex/reasoning/factcheck.py +async def factcheck_answer(answer: str, context: dict) -> dict: + """ + Stage 5: Verify factual claims in answer. + + Returns: + { + "verified": bool, + "flagged_claims": list, + "corrected_answer": str + } + """ + + prompt = f""" + Review this answer for factual accuracy: + + {answer} + + Flag any claims that seem dubious or need verification. + Provide corrected version if needed. + """ + + result = await call_llm(prompt, backend="CLOUD", temperature=0.1) + return parse_factcheck_result(result) + +# Update router.py to include Stage 5 +async def reason_endpoint(request): + # ... existing stages ... + + # Stage 5: Fact-checking + factcheck_result = await factcheck_answer(final_answer, context) + + if not factcheck_result["verified"]: + final_answer = factcheck_result["corrected_answer"] + + return {"answer": final_answer} +``` + +--- + +### Custom LLM Backend Integration + +**Adding Anthropic Claude:** + +```python +# /cortex/llm/llm_router.py + +BACKEND_CONFIGS = { + # ... existing backends ... + + "CLAUDE": { + "url": "https://api.anthropic.com/v1", + "provider": "anthropic", + "model": "claude-3-5-sonnet-20241022", + "api_key": os.getenv("ANTHROPIC_API_KEY") + } +} + +# Add provider-specific logic +elif backend_config["provider"] == "anthropic": + headers = { + "x-api-key": api_key, + "anthropic-version": "2023-06-01" + } + payload = { + "model": model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": max_tokens, + "temperature": temperature + } + response = await httpx_client.post( + f"{url}/messages", + json=payload, + headers=headers, + timeout=120 + ) + return response.json()["content"][0]["text"] +``` + +--- + +### Performance Optimization + +**Caching Strategies:** + +```python +# /cortex/utils/cache.py +from functools import lru_cache +import hashlib + +@lru_cache(maxsize=128) +def cache_llm_call(prompt_hash: str, backend: str): + """Cache LLM responses for identical prompts""" + # Note: Only cache deterministic calls (temperature=0) + pass + +# Usage in llm_router.py +async def call_llm(prompt, backend, temperature=0.7, max_tokens=512): + if temperature == 0: + prompt_hash = hashlib.md5(prompt.encode()).hexdigest() + cached = cache_llm_call(prompt_hash, backend) + if cached: + return cached + + # ... normal LLM call ... +``` + +**Database Query Optimization:** + +```python +# /neomem/neomem/database.py + +# BAD: Load all memories, then filter +def search_memories(query): + all_memories = db.execute("SELECT * FROM memories") + # Expensive in-memory filtering + return [m for m in all_memories if similarity(m, query) > 0.8] + +# GOOD: Use database indexes and LIMIT +def search_memories(query, limit=5): + query_embedding = embed(query) + return db.execute(""" + SELECT * FROM memories + WHERE embedding <-> %s < 0.2 -- pgvector cosine distance + ORDER BY embedding <-> %s + LIMIT %s + """, (query_embedding, query_embedding, limit)) +``` + +--- + +## Conclusion + +Project Lyra is a sophisticated, multi-layered AI companion system that addresses the fundamental limitation of chatbot amnesia through: + +1. **Dual-memory architecture** (short-term Intake + long-term NeoMem) +2. **Multi-stage reasoning pipeline** (Reflection → Reasoning → Refinement → Persona) +3. **Flexible multi-backend LLM support** (cloud + local with fallback) +4. **Microservices design** for scalability and maintainability +5. **Modern web UI** with session management + +The system is production-ready with comprehensive error handling, logging, and health monitoring. + +--- + +## Quick Reference + +### Service Ports +- **UI:** 8081 (Browser interface) +- **Relay:** 7078 (Main orchestrator) +- **Cortex:** 7081 (Reasoning engine) +- **NeoMem:** 7077 (Long-term memory) +- **PostgreSQL:** 5432 (Vector storage) +- **Neo4j:** 7474 (Browser), 7687 (Bolt) + +### Key Files +- **Main Entry:** `/core/relay/server.js` +- **Reasoning Pipeline:** `/cortex/router.py` +- **LLM Router:** `/cortex/llm/llm_router.py` +- **Short-term Memory:** `/cortex/intake/intake.py` +- **Long-term Memory:** `/neomem/neomem/` +- **Personality:** `/cortex/persona/identity.py` + +### Important Commands +```bash +# Start system +docker-compose up -d + +# View logs +docker-compose logs -f cortex + +# Debug sessions +curl http://localhost:7081/debug/sessions + +# Health check +curl http://localhost:7078/_health + +# Search memories +curl "http://localhost:7077/search?query=deployment&limit=5" +``` + +--- + +**Document Version:** 1.0 +**Last Updated:** 2025-12-13 +**Maintained By:** Project Lyra Team From ec5f17694e80bdbe4332d3cc03235ab6a40ab0d8 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Wed, 17 Dec 2025 01:47:19 -0500 Subject: [PATCH 08/10] ignore --- .gitignore | 1 + .vscode/settings.json | 8 +------- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 87ae84a..cb39a91 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ *.pyc *.log /.vscode/ +.vscode/ # ============================= # 🔐 Environment files (NEVER commit secrets!) # ============================= diff --git a/.vscode/settings.json b/.vscode/settings.json index 387f816..9e26dfe 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1 @@ -{ - "workbench.colorCustomizations": { - "activityBar.background": "#16340C", - "titleBar.activeBackground": "#1F4911", - "titleBar.activeForeground": "#F6FDF4" - } -} \ No newline at end of file +{} \ No newline at end of file From a41e342dbdcc17cca754203b231577c87036650d Mon Sep 17 00:00:00 2001 From: serversdwn Date: Wed, 17 Dec 2025 02:46:23 -0500 Subject: [PATCH 09/10] cleanup ignore stuff --- .vscode/settings.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9e26dfe..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file From 34aff340388b6eedb9406924e13949c72bce7bb8 Mon Sep 17 00:00:00 2001 From: serversdwn Date: Fri, 19 Dec 2025 17:43:22 -0500 Subject: [PATCH 10/10] Docs updated v0.6.0 --- CHANGELOG.md | 99 ++++++++++++++++++++++++++ README.md | 136 +++++++++++++++++++++++++++++------- cortex/data/self_state.json | 4 +- 3 files changed, 210 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c895d52..f5784f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,105 @@ Format based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and [Se --- +## [0.6.0] - 2025-12-18 + +### Added - Autonomy System (Phase 1 & 2) + +**Autonomy Phase 1** - Self-Awareness & Planning Foundation +- **Executive Planning Module** [cortex/autonomy/executive/planner.py](cortex/autonomy/executive/planner.py) + - Autonomous goal setting and task planning capabilities + - Multi-step reasoning for complex objectives + - Integration with self-state tracking +- **Self-State Management** [cortex/data/self_state.json](cortex/data/self_state.json) + - Persistent state tracking across sessions + - Memory of past actions and outcomes + - Self-awareness metadata storage +- **Self Analyzer** [cortex/autonomy/self/analyzer.py](cortex/autonomy/self/analyzer.py) + - Analyzes own performance and decision patterns + - Identifies areas for improvement + - Tracks cognitive patterns over time +- **Test Suite** [cortex/tests/test_autonomy_phase1.py](cortex/tests/test_autonomy_phase1.py) + - Unit tests for phase 1 autonomy features + +**Autonomy Phase 2** - Decision Making & Proactive Behavior +- **Autonomous Actions Module** [cortex/autonomy/actions/autonomous_actions.py](cortex/autonomy/actions/autonomous_actions.py) + - Self-initiated action execution + - Context-aware decision implementation + - Action logging and tracking +- **Pattern Learning System** [cortex/autonomy/learning/pattern_learner.py](cortex/autonomy/learning/pattern_learner.py) + - Learns from interaction patterns + - Identifies recurring user needs + - Adapts behavior based on learned patterns +- **Proactive Monitor** [cortex/autonomy/proactive/monitor.py](cortex/autonomy/proactive/monitor.py) + - Monitors system state for intervention opportunities + - Detects patterns requiring proactive response + - Background monitoring capabilities +- **Decision Engine** [cortex/autonomy/tools/decision_engine.py](cortex/autonomy/tools/decision_engine.py) + - Autonomous decision-making framework + - Weighs options and selects optimal actions + - Integrates with orchestrator for coordinated decisions +- **Orchestrator** [cortex/autonomy/tools/orchestrator.py](cortex/autonomy/tools/orchestrator.py) + - Coordinates multiple autonomy subsystems + - Manages tool selection and execution + - Handles NeoMem integration (with disable capability) +- **Test Suite** [cortex/tests/test_autonomy_phase2.py](cortex/tests/test_autonomy_phase2.py) + - Unit tests for phase 2 autonomy features + +**Autonomy Phase 2.5** - Pipeline Refinement +- Tightened integration between autonomy modules and reasoning pipeline +- Enhanced self-state persistence and tracking +- Improved orchestrator reliability +- NeoMem integration refinements in vector store handling [neomem/neomem/vector_stores/qdrant.py](neomem/neomem/vector_stores/qdrant.py) + +### Added - Documentation + +- **Complete AI Agent Breakdown** [docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md](docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md) + - Comprehensive system architecture documentation + - Detailed component descriptions + - Data flow diagrams + - Integration points and API specifications + +### Changed - Core Integration + +- **Router Updates** [cortex/router.py](cortex/router.py) + - Integrated autonomy subsystems into main routing logic + - Added endpoints for autonomous decision-making + - Enhanced state management across requests +- **Reasoning Pipeline** [cortex/reasoning/reasoning.py](cortex/reasoning/reasoning.py) + - Integrated autonomy-aware reasoning + - Self-state consideration in reasoning process +- **Persona Layer** [cortex/persona/speak.py](cortex/persona/speak.py) + - Autonomy-aware response generation + - Self-state reflection in personality expression +- **Context Handling** [cortex/context.py](cortex/context.py) + - NeoMem disable capability for flexible deployment + +### Changed - Development Environment + +- Updated [.gitignore](.gitignore) for better workspace management +- Cleaned up VSCode settings +- Removed [.vscode/settings.json](.vscode/settings.json) from repository + +### Technical Improvements + +- Modular autonomy architecture with clear separation of concerns +- Test-driven development for new autonomy features +- Enhanced state persistence across system restarts +- Flexible NeoMem integration with enable/disable controls + +### Architecture - Autonomy System Design + +The autonomy system operates in layers: +1. **Executive Layer** - High-level planning and goal setting +2. **Decision Layer** - Evaluates options and makes choices +3. **Action Layer** - Executes autonomous decisions +4. **Learning Layer** - Adapts behavior based on patterns +5. **Monitoring Layer** - Proactive awareness of system state + +All layers coordinate through the orchestrator and maintain state in `self_state.json`. + +--- + ## [0.5.2] - 2025-12-12 ### Fixed - LLM Router & Async HTTP diff --git a/README.md b/README.md index 15ea23d..0afc2b6 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,12 @@ -# Project Lyra - README v0.5.1 +# Project Lyra - README v0.6.0 -Lyra is a modular persistent AI companion system with advanced reasoning capabilities. -It provides memory-backed chat using **NeoMem** + **Relay** + **Cortex**, -with multi-stage reasoning pipeline powered by HTTP-based LLM backends. +Lyra is a modular persistent AI companion system with advanced reasoning capabilities and autonomous decision-making. +It provides memory-backed chat using **Relay** + **Cortex** with integrated **Autonomy System**, +featuring a multi-stage reasoning pipeline powered by HTTP-based LLM backends. -**Current Version:** v0.5.1 (2025-12-11) +**Current Version:** v0.6.0 (2025-12-18) + +> **Note:** As of v0.6.0, NeoMem is **disabled by default** while we work out integration hiccups in the pipeline. The autonomy system is being refined independently before full memory integration. ## Mission Statement @@ -24,7 +26,8 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do - OpenAI-compatible endpoint: `POST /v1/chat/completions` - Internal endpoint: `POST /chat` - Routes messages through Cortex reasoning pipeline -- Manages async calls to NeoMem and Cortex ingest +- Manages async calls to Cortex ingest +- *(NeoMem integration currently disabled in v0.6.0)* **2. UI** (Static HTML) - Browser-based chat interface with cyberpunk theme @@ -32,18 +35,20 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do - Saves and loads sessions - OpenAI-compatible message format -**3. NeoMem** (Python/FastAPI) - Port 7077 +**3. NeoMem** (Python/FastAPI) - Port 7077 - **DISABLED IN v0.6.0** - Long-term memory database (fork of Mem0 OSS) - Vector storage (PostgreSQL + pgvector) + Graph storage (Neo4j) - RESTful API: `/memories`, `/search` - Semantic memory updates and retrieval - No external SDK dependencies - fully local +- **Status:** Currently disabled while pipeline integration is refined ### Reasoning Layer **4. Cortex** (Python/FastAPI) - Port 7081 -- Primary reasoning engine with multi-stage pipeline +- Primary reasoning engine with multi-stage pipeline and autonomy system - **Includes embedded Intake module** (no separate service as of v0.5.1) +- **Integrated Autonomy System** (NEW in v0.6.0) - See Autonomy System section below - **4-Stage Processing:** 1. **Reflection** - Generates meta-awareness notes about conversation 2. **Reasoning** - Creates initial draft answer using context @@ -82,9 +87,49 @@ Project Lyra operates as a **single docker-compose deployment** with multiple Do Each module can be configured to use a different backend via environment variables. +### Autonomy System (NEW in v0.6.0) + +**Cortex Autonomy Subsystems** - Multi-layered autonomous decision-making and learning +- **Executive Layer** [cortex/autonomy/executive/](cortex/autonomy/executive/) + - High-level planning and goal setting + - Multi-step reasoning for complex objectives + - Strategic decision making +- **Decision Engine** [cortex/autonomy/tools/decision_engine.py](cortex/autonomy/tools/decision_engine.py) + - Autonomous decision-making framework + - Option evaluation and selection + - Coordinated decision orchestration +- **Autonomous Actions** [cortex/autonomy/actions/](cortex/autonomy/actions/) + - Self-initiated action execution + - Context-aware behavior implementation + - Action logging and tracking +- **Pattern Learning** [cortex/autonomy/learning/](cortex/autonomy/learning/) + - Learns from interaction patterns + - Identifies recurring user needs + - Adaptive behavior refinement +- **Proactive Monitoring** [cortex/autonomy/proactive/](cortex/autonomy/proactive/) + - System state monitoring + - Intervention opportunity detection + - Background awareness capabilities +- **Self-Analysis** [cortex/autonomy/self/](cortex/autonomy/self/) + - Performance tracking and analysis + - Cognitive pattern identification + - Self-state persistence in [cortex/data/self_state.json](cortex/data/self_state.json) +- **Orchestrator** [cortex/autonomy/tools/orchestrator.py](cortex/autonomy/tools/orchestrator.py) + - Coordinates all autonomy subsystems + - Manages tool selection and execution + - Handles external integrations (with enable/disable controls) + +**Autonomy Architecture:** +The autonomy system operates in coordinated layers, all maintaining state in `self_state.json`: +1. Executive Layer → Planning and goals +2. Decision Layer → Evaluation and choices +3. Action Layer → Execution +4. Learning Layer → Pattern adaptation +5. Monitoring Layer → Proactive awareness + --- -## Data Flow Architecture (v0.5.1) +## Data Flow Architecture (v0.6.0) ### Normal Message Flow: @@ -97,11 +142,13 @@ Cortex (7081) ↓ (internal Python call) Intake module → summarize_context() ↓ +Autonomy System → Decision evaluation & pattern learning + ↓ Cortex processes (4 stages): 1. reflection.py → meta-awareness notes (CLOUD backend) - 2. reasoning.py → draft answer (PRIMARY backend) + 2. reasoning.py → draft answer (PRIMARY backend, autonomy-aware) 3. refine.py → refined answer (PRIMARY backend) - 4. persona/speak.py → Lyra personality (CLOUD backend) + 4. persona/speak.py → Lyra personality (CLOUD backend, autonomy-aware) ↓ Returns persona answer to Relay ↓ @@ -109,9 +156,11 @@ Relay → POST /ingest (async) ↓ Cortex → add_exchange_internal() → SESSIONS buffer ↓ -Relay → NeoMem /memories (async, planned) +Autonomy System → Update self_state.json (pattern tracking) ↓ Relay → UI (returns final response) + +Note: NeoMem integration disabled in v0.6.0 ``` ### Cortex 4-Stage Reasoning Pipeline: @@ -239,13 +288,13 @@ rag/ All services run in a single docker-compose stack with the following containers: **Active Services:** -- **neomem-postgres** - PostgreSQL with pgvector extension (port 5432) -- **neomem-neo4j** - Neo4j graph database (ports 7474, 7687) -- **neomem-api** - NeoMem memory service (port 7077) - **relay** - Main orchestrator (port 7078) -- **cortex** - Reasoning engine with embedded Intake (port 7081) +- **cortex** - Reasoning engine with embedded Intake and Autonomy System (port 7081) -**Disabled Services:** +**Disabled Services (v0.6.0):** +- **neomem-postgres** - PostgreSQL with pgvector extension (port 5432) - *disabled while refining pipeline* +- **neomem-neo4j** - Neo4j graph database (ports 7474, 7687) - *disabled while refining pipeline* +- **neomem-api** - NeoMem memory service (port 7077) - *disabled while refining pipeline* - **intake** - No longer needed (embedded in Cortex as of v0.5.1) - **rag** - Beta Lyrae RAG service (port 7090) - currently disabled @@ -278,7 +327,32 @@ The following LLM backends are accessed via HTTP (not part of docker-compose): ## Version History -### v0.5.1 (2025-12-11) - Current Release +### v0.6.0 (2025-12-18) - Current Release +**Major Feature: Autonomy System (Phase 1, 2, and 2.5)** +- ✅ Added autonomous decision-making framework +- ✅ Implemented executive planning and goal-setting layer +- ✅ Added pattern learning system for adaptive behavior +- ✅ Implemented proactive monitoring capabilities +- ✅ Created self-analysis and performance tracking system +- ✅ Integrated self-state persistence (`cortex/data/self_state.json`) +- ✅ Built decision engine with orchestrator coordination +- ✅ Added autonomous action execution framework +- ✅ Integrated autonomy into reasoning and persona layers +- ✅ Created comprehensive test suites for autonomy features +- ✅ Added complete system breakdown documentation + +**Architecture Changes:** +- Autonomy system integrated into Cortex reasoning pipeline +- Multi-layered autonomous decision-making architecture +- Self-state tracking across sessions +- NeoMem disabled by default while refining pipeline integration +- Enhanced orchestrator with flexible service controls + +**Documentation:** +- Added [PROJECT_LYRA_COMPLETE_BREAKDOWN.md](docs/PROJECT_LYRA_COMPLETE_BREAKDOWN.md) +- Updated changelog with comprehensive autonomy system details + +### v0.5.1 (2025-12-11) **Critical Intake Integration Fixes:** - ✅ Fixed `bg_summarize()` NameError preventing SESSIONS persistence - ✅ Fixed `/ingest` endpoint unreachable code @@ -320,17 +394,19 @@ The following LLM backends are accessed via HTTP (not part of docker-compose): --- -## Known Issues (v0.5.1) +## Known Issues (v0.6.0) -### Critical (Fixed in v0.5.1) -- ~~Intake SESSIONS not persisting~~ ✅ **FIXED** -- ~~`bg_summarize()` NameError~~ ✅ **FIXED** -- ~~`/ingest` endpoint unreachable code~~ ✅ **FIXED** +### Temporarily Disabled (v0.6.0) +- **NeoMem disabled by default** - Being refined independently before full integration + - PostgreSQL + pgvector storage inactive + - Neo4j graph database inactive + - Memory persistence endpoints not active +- RAG service (Beta Lyrae) currently disabled in docker-compose.yml ### Non-Critical - Session management endpoints not fully implemented in Relay -- RAG service currently disabled in docker-compose.yml -- NeoMem integration in Relay not yet active (planned for v0.5.2) +- Full autonomy system integration still being refined +- Memory retrieval integration pending NeoMem re-enablement ### Operational Notes - **Single-worker constraint**: Cortex must run with single Uvicorn worker to maintain SESSIONS state @@ -338,12 +414,14 @@ The following LLM backends are accessed via HTTP (not part of docker-compose): - Diagnostic endpoints (`/debug/sessions`, `/debug/summary`) available for troubleshooting ### Future Enhancements +- Re-enable NeoMem integration after pipeline refinement +- Full autonomy system maturation and optimization - Re-enable RAG service integration - Implement full session persistence - Migrate SESSIONS to Redis for multi-worker support - Add request correlation IDs for tracing - Comprehensive health checks across all services -- NeoMem integration in Relay +- Enhanced pattern learning with long-term memory integration --- @@ -576,12 +654,16 @@ NeoMem is a derivative work based on Mem0 OSS (Apache 2.0). ## Development Notes -### Cortex Architecture (v0.5.1) +### Cortex Architecture (v0.6.0) - Cortex contains embedded Intake module at `cortex/intake/` - Intake is imported as: `from intake.intake import add_exchange_internal, SESSIONS` - SESSIONS is a module-level global dictionary (singleton pattern) - Single-worker constraint required to maintain SESSIONS state - Diagnostic endpoints available for debugging: `/debug/sessions`, `/debug/summary` +- **NEW:** Autonomy system integrated at `cortex/autonomy/` + - Executive, decision, action, learning, and monitoring layers + - Self-state persistence in `cortex/data/self_state.json` + - Coordinated via orchestrator with flexible service controls ### Adding New LLM Backends 1. Add backend URL to `.env`: diff --git a/cortex/data/self_state.json b/cortex/data/self_state.json index ce52668..16a6d2f 100644 --- a/cortex/data/self_state.json +++ b/cortex/data/self_state.json @@ -4,8 +4,8 @@ "focus": "user_request", "confidence": 0.7, "curiosity": 1.0, - "last_updated": "2025-12-15T07:43:32.567849", - "interaction_count": 15, + "last_updated": "2025-12-19T20:25:25.437557", + "interaction_count": 16, "learning_queue": [], "active_goals": [], "preferences": {