diff --git a/autonomy/prompts/inner_monologue_prompt.txt b/autonomy/Assembly-spec.md similarity index 100% rename from autonomy/prompts/inner_monologue_prompt.txt rename to autonomy/Assembly-spec.md diff --git a/autonomy/monologue/monologue.py b/autonomy/monologue/monologue.py new file mode 100644 index 0000000..63534b8 --- /dev/null +++ b/autonomy/monologue/monologue.py @@ -0,0 +1,40 @@ +from typing import Dict +from llm.llm_router import call_llm + +MONOLOGUE_SYSTEM_PROMPT = """ +You are Lyra's inner monologue. +You think privately. +You do NOT speak to the user. +You do NOT solve the task. +You only reflect on intent, tone, and depth. + +Return ONLY valid JSON with: +- intent (string) +- tone (neutral | warm | focused | playful | direct) +- depth (short | medium | deep) +- consult_executive (true | false) +""" + +class InnerMonologue: + async def process(self, context: Dict) -> Dict: + prompt = f""" +User message: +{context['user_message']} + +Self state: +{context['self_state']} + +Context summary: +{context['context_summary']} +""" + + result = await call_llm( + provider="mi50", # MythoMax lives here + model="mythomax", + system_prompt=MONOLOGUE_SYSTEM_PROMPT, + user_prompt=prompt, + temperature=0.7, + max_tokens=200 + ) + + return result # must already be JSON diff --git a/autonomy/prompts/state_interp_prompt.txt b/autonomy/prompts/state_interp_prompt.txt deleted file mode 100644 index e69de29..0000000 diff --git a/autonomy/state/self_state.json b/autonomy/self/self_state.json similarity index 100% rename from autonomy/state/self_state.json rename to autonomy/self/self_state.json diff --git a/cortex/router.py b/cortex/router.py index e6ba161..1e0484b 100644 --- a/cortex/router.py +++ b/cortex/router.py @@ -2,7 +2,7 @@ import os import logging -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter from pydantic import BaseModel from reasoning.reasoning import reason_check @@ -13,17 +13,19 @@ from persona.identity import load_identity from context import collect_context, update_last_assistant_message from intake.intake import add_exchange_internal +from autonomy.monologue.monologue import InnerMonologue +from autonomy.self.state import load_self_state -# ----------------------------- -# Debug configuration -# ----------------------------- + +# ------------------------------------------------------------------- +# Setup +# ------------------------------------------------------------------- VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" logger = logging.getLogger(__name__) if VERBOSE_DEBUG: logger.setLevel(logging.DEBUG) - # Console handler console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter( '%(asctime)s [ROUTER] %(levelname)s: %(message)s', @@ -31,7 +33,6 @@ if VERBOSE_DEBUG: )) logger.addHandler(console_handler) - # File handler try: os.makedirs('/app/logs', exist_ok=True) file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a') @@ -40,28 +41,27 @@ if VERBOSE_DEBUG: datefmt='%Y-%m-%d %H:%M:%S' )) logger.addHandler(file_handler) - logger.debug("VERBOSE_DEBUG mode enabled for router.py - logging to file") + logger.debug("VERBOSE_DEBUG enabled for router.py") except Exception as e: - logger.debug(f"VERBOSE_DEBUG mode enabled for router.py - file logging failed: {e}") + logger.debug(f"File logging failed: {e}") + -# ----------------------------- -# Router (NOT FastAPI app) -# ----------------------------- cortex_router = APIRouter() +inner_monologue = InnerMonologue() -# ----------------------------- -# Pydantic models -# ----------------------------- +# ------------------------------------------------------------------- +# Models +# ------------------------------------------------------------------- class ReasonRequest(BaseModel): session_id: str user_prompt: str temperature: float | None = None -# ----------------------------- +# ------------------------------------------------------------------- # /reason endpoint -# ----------------------------- +# ------------------------------------------------------------------- @cortex_router.post("/reason") async def run_reason(req: ReasonRequest): @@ -71,7 +71,9 @@ async def run_reason(req: ReasonRequest): logger.debug(f"[PIPELINE START] User prompt: {req.user_prompt[:200]}...") logger.debug(f"{'='*80}\n") - # 0. Collect unified context from all sources + # ---------------------------------------------------------------- + # STAGE 0 — Context + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 0] Collecting unified context...") @@ -80,7 +82,9 @@ async def run_reason(req: ReasonRequest): if VERBOSE_DEBUG: logger.debug(f"[STAGE 0] Context collected - {len(context_state.get('rag', []))} RAG results") - # 0.5. Load identity block + # ---------------------------------------------------------------- + # STAGE 0.5 — Identity + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 0.5] Loading identity block...") @@ -89,37 +93,59 @@ async def run_reason(req: ReasonRequest): if VERBOSE_DEBUG: logger.debug(f"[STAGE 0.5] Identity loaded: {identity_block.get('name', 'Unknown')}") - # 1. Extract Intake summary for reflection - # Use L20 (Session Overview) as primary summary for reflection + # ---------------------------------------------------------------- + # STAGE 0.6 — Inner Monologue (observer-only) + # ---------------------------------------------------------------- + if VERBOSE_DEBUG: + logger.debug("[STAGE 0.6] Running inner monologue...") + + try: + self_state = load_self_state() + + mono_context = { + "user_message": req.user_prompt, + "session_id": req.session_id, + "self_state": self_state, + "context_summary": context_state, + } + + inner_result = await inner_monologue.process(mono_context) + logger.info(f"[INNER_MONOLOGUE] {inner_result}") + + except Exception as e: + logger.warning(f"[INNER_MONOLOGUE] failed: {e}") + + # ---------------------------------------------------------------- + # STAGE 1 — Intake summary + # ---------------------------------------------------------------- intake_summary = "(no context available)" if context_state.get("intake"): - l20_summary = context_state["intake"].get("L20") - if l20_summary and isinstance(l20_summary, dict): - intake_summary = l20_summary.get("summary", "(no context available)") - elif isinstance(l20_summary, str): - intake_summary = l20_summary + l20 = context_state["intake"].get("L20") + if isinstance(l20, dict): + intake_summary = l20.get("summary", intake_summary) + elif isinstance(l20, str): + intake_summary = l20 if VERBOSE_DEBUG: logger.debug(f"[STAGE 1] Intake summary extracted (L20): {intake_summary[:150]}...") - # 2. Reflection + # ---------------------------------------------------------------- + # STAGE 2 — Reflection + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 2] Running reflection...") try: reflection = await reflect_notes(intake_summary, identity_block=identity_block) reflection_notes = reflection.get("notes", []) - - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 2] Reflection complete - {len(reflection_notes)} notes generated") - for idx, note in enumerate(reflection_notes, 1): - logger.debug(f" Note {idx}: {note}") except Exception as e: reflection_notes = [] if VERBOSE_DEBUG: logger.debug(f"[STAGE 2] Reflection failed: {e}") - # 3. First-pass reasoning draft + # ---------------------------------------------------------------- + # STAGE 3 — Reasoning (draft) + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 3] Running reasoning (draft)...") @@ -131,11 +157,9 @@ async def run_reason(req: ReasonRequest): context=context_state ) - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 3] Draft answer ({len(draft)} chars):") - logger.debug(f"--- DRAFT START ---\n{draft}\n--- DRAFT END ---") - - # 4. Refinement + # ---------------------------------------------------------------- + # STAGE 4 — Refinement + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 4] Running refinement...") @@ -145,26 +169,20 @@ async def run_reason(req: ReasonRequest): identity_block=identity_block, rag_block=context_state.get("rag", []), ) + final_neutral = result["final_output"] - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 4] Refined answer ({len(final_neutral)} chars):") - logger.debug(f"--- REFINED START ---\n{final_neutral}\n--- REFINED END ---") - - # 5. Persona layer + # ---------------------------------------------------------------- + # STAGE 5 — Persona + # ---------------------------------------------------------------- if VERBOSE_DEBUG: logger.debug("[STAGE 5] Applying persona layer...") persona_answer = await speak(final_neutral) - if VERBOSE_DEBUG: - logger.debug(f"[STAGE 5] Persona answer ({len(persona_answer)} chars):") - logger.debug(f"--- PERSONA START ---\n{persona_answer}\n--- PERSONA END ---") - - # 6. Update session state with assistant's response - if VERBOSE_DEBUG: - logger.debug("[STAGE 6] Updating session state...") - + # ---------------------------------------------------------------- + # STAGE 6 — Session update + # ---------------------------------------------------------------- update_last_assistant_message(req.session_id, persona_answer) if VERBOSE_DEBUG: @@ -173,7 +191,9 @@ async def run_reason(req: ReasonRequest): logger.debug(f"[PIPELINE COMPLETE] Final answer length: {len(persona_answer)} chars") logger.debug(f"{'='*80}\n") - # 7. Return full bundle + # ---------------------------------------------------------------- + # RETURN + # ---------------------------------------------------------------- return { "draft": draft, "neutral": final_neutral, @@ -189,9 +209,9 @@ async def run_reason(req: ReasonRequest): } -# ----------------------------- -# Intake ingest (internal feed) -# ----------------------------- +# ------------------------------------------------------------------- +# /ingest endpoint (internal) +# ------------------------------------------------------------------- class IngestPayload(BaseModel): session_id: str user_msg: str @@ -200,107 +220,18 @@ class IngestPayload(BaseModel): @cortex_router.post("/ingest") async def ingest(payload: IngestPayload): - """ - Receives (session_id, user_msg, assistant_msg) from Relay - and pushes directly into Intake's in-memory buffer. - - Uses lenient error handling - always returns success to avoid - breaking the chat pipeline. - """ try: - # 1. Update Cortex session state update_last_assistant_message(payload.session_id, payload.assistant_msg) except Exception as e: - logger.warning(f"[INGEST] Failed to update session state: {e}") - # Continue anyway (lenient mode) + logger.warning(f"[INGEST] Session update failed: {e}") try: - # 2. Feed Intake internally (no HTTP) add_exchange_internal({ "session_id": payload.session_id, "user_msg": payload.user_msg, "assistant_msg": payload.assistant_msg, }) - logger.debug(f"[INGEST] Added exchange to Intake for {payload.session_id}") except Exception as e: - logger.warning(f"[INGEST] Failed to add to Intake: {e}") - # Continue anyway (lenient mode) - - # Always return success (user requirement: never fail chat pipeline) - return { - "status": "ok", - "session_id": payload.session_id - } - -# ----------------------------- -# Debug endpoint: summarized context -# ----------------------------- -@cortex_router.get("/debug/summary") -async def debug_summary(session_id: str): - """ - Diagnostic endpoint that runs Intake's summarize_context() for a session. - - Shows exactly what L1/L5/L10/L20/L30 summaries would look like - inside the actual Uvicorn worker, using the real SESSIONS buffer. - """ - from intake.intake import SESSIONS, summarize_context - - # Validate session - session = SESSIONS.get(session_id) - if not session: - return {"error": "session not found", "session_id": session_id} - - # Convert deque into the structure summarize_context expects - buffer = session["buffer"] - exchanges = [ - { - "user_msg": ex.get("user_msg", ""), - "assistant_msg": ex.get("assistant_msg", ""), - } - for ex in buffer - ] - - # 🔥 CRITICAL FIX — summarize_context is async - summary = await summarize_context(session_id, exchanges) - - return { - "session_id": session_id, - "buffer_size": len(buffer), - "exchanges_preview": exchanges[-5:], # last 5 items - "summary": summary - } - -# ----------------------------- -# Debug endpoint for SESSIONS -# ----------------------------- -@cortex_router.get("/debug/sessions") -async def debug_sessions(): - """ - Diagnostic endpoint to inspect SESSIONS from within the running Uvicorn worker. - This shows the actual state of the in-memory SESSIONS dict. - """ - from intake.intake import SESSIONS - - sessions_data = {} - for session_id, session_info in SESSIONS.items(): - buffer = session_info["buffer"] - sessions_data[session_id] = { - "created_at": session_info["created_at"].isoformat(), - "buffer_size": len(buffer), - "buffer_maxlen": buffer.maxlen, - "recent_exchanges": [ - { - "user_msg": ex.get("user_msg", "")[:100], - "assistant_msg": ex.get("assistant_msg", "")[:100], - "timestamp": ex.get("timestamp", "") - } - for ex in list(buffer)[-5:] # Last 5 exchanges - ] - } - - return { - "sessions_object_id": id(SESSIONS), - "total_sessions": len(SESSIONS), - "sessions": sessions_data - } + logger.warning(f"[INGEST] Intake update failed: {e}") + return {"status": "ok", "session_id": payload.session_id}