Major rewire, all modules connected. Intake still wonkey

2025-11-28 15:14:47 -05:00
parent 734999e8bb
commit a83405beb1
19 changed files with 10109 additions and 4072 deletions
--- a/cortex/reasoning/refine.py
+++ b/cortex/reasoning/refine.py
@@ -8,22 +8,22 @@ from llm.llm_router import call_llm

 logger = logging.getLogger(__name__)

-# ============================================================
-# Config
-# ============================================================
+# ===============================================
+# Configuration
+# ===============================================

 REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
 REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
 REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"

-# Module-level backend selection
-REFINE_LLM = os.getenv("REFINE_LLM", "PRIMARY").upper()
+# These come from root .env
+REFINE_LLM = os.getenv("REFINE_LLM", "").upper()
 CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()


-# ============================================================
+# ===============================================
 # Prompt builder
-# ============================================================
+# ===============================================

 def build_refine_prompt(
    draft_output: str,
@@ -32,15 +32,10 @@ def build_refine_prompt(
    rag_block: Optional[str],
 ) -> str:

-    if reflection_notes is None:
-        reflection_text = "(none)"
-    elif isinstance(reflection_notes, str):
-        reflection_text = reflection_notes
-    else:
-        try:
-            reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
-        except Exception:
-            reflection_text = str(reflection_notes)
+    try:
+        reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
+    except Exception:
+        reflection_text = str(reflection_notes)

    identity_text = identity_block or "(none)"
    rag_text = rag_block or "(none)"
@@ -49,12 +44,13 @@ def build_refine_prompt(
 You are Lyra Cortex's internal refiner.

 Your job:
- Fix factual errors, logical gaps, or missing info.
- Use reflection notes for corrections.
- Use RAG context as factual grounding.
- Respect the identity block without adding style or personality.
+- Fix factual issues.
+- Improve clarity.
+- Apply reflection notes when helpful.
+- Respect identity constraints.
+- Apply RAG context as truth source.

-Never mention RAG, reflection, or internal logic.
+Do NOT mention RAG, reflection, internal logic, or this refinement step.

 ------------------------------
 [IDENTITY BLOCK]
@@ -74,14 +70,14 @@ Never mention RAG, reflection, or internal logic.

 ------------------------------
 Task:
-Rewrite the DRAFT ANSWER into a single, final answer.
+Rewrite the DRAFT into a single final answer for the user.
 Return ONLY the final answer text.
 """.strip()


-# ============================================================
-# Public API: async, using llm_router
-# ============================================================
+# ===============================================
+# Public API — now async & fully router-based
+# ===============================================

 async def refine_answer(
    draft_output: str,
@@ -104,7 +100,7 @@ async def refine_answer(
        rag_block,
    )

-    # Refinement backend → fallback to Cortex backend → fallback to PRIMARY
+    # backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY
    backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"

    try: