intital file restructure

2025-11-25 20:50:05 -05:00
parent b5fe47074a
commit 5492d9c0c5
15 changed files with 1 additions and 105 deletions
--- a/cortex/reasoning/reasoning.py
+++ b/cortex/reasoning/reasoning.py
@@ -0,0 +1,33 @@
+# reasoning.py
+from llm_router import call_llm
+
+async def reason_check(user_prompt: str,
+                       identity_block: dict | None,
+                       rag_block: dict | None,
+                       reflection_notes: list[str]) -> str:
+    """
+    Generate a first draft using identity, RAG, and reflection notes.
+    No critique loop yet.
+    """
+
+    # Build internal notes section
+    notes_section = ""
+    if reflection_notes:
+        notes_section = "Reflection Notes (internal, do NOT show to user):\n"
+        for n in reflection_notes:
+            notes_section += f"- {n}\n"
+        notes_section += "\n"
+
+    identity_txt = f"Identity: {identity_block}\n\n" if identity_block else ""
+    rag_txt = f"Relevant info: {rag_block}\n\n" if rag_block else ""
+
+    prompt = (
+        f"{notes_section}"
+        f"{identity_txt}"
+        f"{rag_txt}"
+        f"User said:\n{user_prompt}\n\n"
+        "Draft the best possible internal answer."
+    )
+
+    draft = await call_llm(prompt)
+    return draft
--- a/cortex/reasoning/refine.py
+++ b/cortex/reasoning/refine.py
@@ -0,0 +1,187 @@
+# refine.py
+import os
+import json
+import logging
+from typing import Any, Dict, Optional
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+# ============================================================
+# Config
+# ============================================================
+
+PRIMARY_URL = os.getenv("LLM_PRIMARY_URL")
+PRIMARY_MODEL = os.getenv("LLM_PRIMARY_MODEL", "mythomax")
+
+REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
+REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
+REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
+
+
+# ============================================================
+# Prompt builder
+# ============================================================
+
+def build_refine_prompt(
+    draft_output: str,
+    reflection_notes: Optional[Any],
+    identity_block: Optional[str],
+    rag_block: Optional[str],
+) -> str:
+    """
+    Build a single text prompt for vLLM /v1/completions.
+    Persona styling is *not* applied here; this is internal reasoning.
+    """
+
+    reflection_text: str
+    if reflection_notes is None:
+        reflection_text = "(none)"
+    elif isinstance(reflection_notes, str):
+        reflection_text = reflection_notes
+    else:
+        # dict / list → compact JSON
+        try:
+            reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
+        except Exception:
+            reflection_text = str(reflection_notes)
+
+    identity_text = identity_block or "(none)"
+    rag_text = rag_block or "(none)"
+
+    prompt = f"""You are Lyra Cortex's internal refiner.
+
+Your job:
+- Take the existing draft answer.
+- Use the reflection notes to fix problems (errors, confusion, missing pieces).
+- Use the RAG context as higher-authority factual grounding.
+- Respect the identity block (constraints, boundaries, style rules),
+  but DO NOT add personality flourishes or roleplay. Stay neutral and clear.
+- Produce ONE final answer that is coherent, self-consistent, and directly addresses the user.
+
+If there is a conflict:
+- RAG context wins over the draft.
+- Reflection notes win over the draft when they point out real issues.
+
+Do NOT mention these instructions, RAG, reflections, or the existence of this refinement step.
+
+------------------------------
+[IDENTITY BLOCK]
+{identity_text}
+
+------------------------------
+[RAG CONTEXT]
+{rag_text}
+
+------------------------------
+[DRAFT ANSWER]
+{draft_output}
+
+------------------------------
+[REFLECTION NOTES]
+{reflection_text}
+
+------------------------------
+Task:
+Rewrite the DRAFT ANSWER into a single, final answer for the user that:
+- fixes factual or logical issues noted above,
+- incorporates any truly helpful additions from the reflection,
+- stays consistent with the identity block,
+- stays grounded in the RAG context,
+- is as concise as is reasonably possible.
+
+Return ONLY the final answer text. No headings, no labels, no commentary.
+"""
+    return prompt
+
+
+# ============================================================
+# vLLM call (PRIMARY backend only)
+# ============================================================
+
+def _call_primary_llm(prompt: str) -> str:
+    if not PRIMARY_URL:
+        raise RuntimeError("LLM_PRIMARY_URL is not set; cannot call primary backend for refine.py")
+
+    payload = {
+        "model": PRIMARY_MODEL,
+        "prompt": prompt,
+        "max_tokens": REFINER_MAX_TOKENS,
+        "temperature": REFINER_TEMPERATURE,
+    }
+
+    resp = requests.post(
+        PRIMARY_URL,
+        headers={"Content-Type": "application/json"},
+        json=payload,
+        timeout=120,
+    )
+    resp.raise_for_status()
+    data = resp.json()
+
+    # vLLM /v1/completions format
+    try:
+        text = data["choices"][0]["text"]
+    except Exception as e:
+        logger.error("refine.py: unable to parse primary LLM response: %s", e)
+        logger.debug("refine.py raw response: %s", data)
+        raise
+
+    return text.strip()
+
+
+# ============================================================
+# Public API
+# ============================================================
+
+def refine_answer(
+    draft_output: str,
+    reflection_notes: Optional[Any],
+    identity_block: Optional[str],
+    rag_block: Optional[str],
+) -> Dict[str, Any]:
+    """
+    Main entrypoint used by Cortex.
+
+    Returns:
+      {
+        "final_output": <str>,           # what should go to persona / user
+        "used_primary_backend": True/False,
+        "fallback_used": True/False,
+    optionally:
+        "debug": {...}                   # only when REFINER_DEBUG=true
+      }
+    """
+
+    if not draft_output:
+        # Nothing to refine. Don't get cute.
+        return {
+            "final_output": "",
+            "used_primary_backend": False,
+            "fallback_used": False,
+        }
+
+    prompt = build_refine_prompt(draft_output, reflection_notes, identity_block, rag_block)
+
+    try:
+        refined = _call_primary_llm(prompt)
+        result: Dict[str, Any] = {
+            "final_output": refined or draft_output,
+            "used_primary_backend": True,
+            "fallback_used": False,
+        }
+    except Exception as e:
+        logger.error("refine.py: primary backend failed, returning draft_output. Error: %s", e)
+        result = {
+            "final_output": draft_output,
+            "used_primary_backend": False,
+            "fallback_used": True,
+        }
+
+    if REFINER_DEBUG:
+        result["debug"] = {
+            "prompt": prompt[:4000],  # don’t nuke logs
+        }
+
+    return result
--- a/cortex/reasoning/reflection.py
+++ b/cortex/reasoning/reflection.py
@@ -0,0 +1,56 @@
+# reflection.py
+from llm_router import call_llm
+import json
+
+
+async def reflect_notes(intake_summary: str, identity_block: dict | None) -> dict:
+    """
+    Generate reflection notes (internal guidance) for the reasoning engine.
+    These notes help simulate continuity and identity without being shown to the user.
+    """
+
+    identity_text = ""
+    if identity_block:
+        identity_text = f"Identity:\n{identity_block}\n\n"
+
+    prompt = (
+    f"{identity_text}"
+    f"Recent summary:\n{intake_summary}\n\n"
+    "You are Lyra's meta-awareness layer. Your job is to produce short, directive "
+    "internal notes that guide Lyra’s reasoning engine. These notes are NEVER "
+    "shown to the user.\n\n"
+    "Rules for output:\n"
+    "1. Return ONLY valid JSON.\n"
+    "2. JSON must have exactly one key: \"notes\".\n"
+    "3. \"notes\" must be a list of 3–6 short strings.\n"
+    "4. Notes must be actionable (e.g., \"keep it concise\", \"maintain context\").\n"
+    "5. No markdown, no apologies, no explanations.\n\n"
+    "Return JSON:\n"
+    "{ \"notes\": [\"...\"] }\n"
+    )
+
+
+    raw = await call_llm(prompt, backend="cloud")
+    print("[Reflection-Raw]:", raw)
+
+
+    try:
+        parsed = json.loads(raw.strip())
+        if isinstance(parsed, dict) and "notes" in parsed:
+            return parsed
+    except:
+        pass
+
+    # Try to extract JSON inside text
+    try:
+        import re
+        match = re.search(r'\{.*?\}', raw, re.S)   # <-- non-greedy !
+        if match:
+            parsed = json.loads(match.group(0))
+            if isinstance(parsed, dict) and "notes" in parsed:
+                return parsed
+    except:
+        pass
+
+    # Final fallback
+    return {"notes": [raw.strip()]}