Cortex rework in progress
This commit is contained in:
@@ -1,33 +1,76 @@
|
||||
# reasoning.py
|
||||
import os
|
||||
from llm.llm_router import call_llm
|
||||
|
||||
async def reason_check(user_prompt: str,
|
||||
identity_block: dict | None,
|
||||
rag_block: dict | None,
|
||||
reflection_notes: list[str]) -> str:
|
||||
|
||||
# ============================================================
|
||||
# Select which backend this module should use
|
||||
# ============================================================
|
||||
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
|
||||
GLOBAL_TEMP = float(os.getenv("LLM_TEMPERATURE", "0.7"))
|
||||
|
||||
|
||||
async def reason_check(
|
||||
user_prompt: str,
|
||||
identity_block: dict | None,
|
||||
rag_block: dict | None,
|
||||
reflection_notes: list[str]
|
||||
) -> str:
|
||||
"""
|
||||
Generate a first draft using identity, RAG, and reflection notes.
|
||||
No critique loop yet.
|
||||
Build the *draft answer* for Lyra Cortex.
|
||||
This is the first-pass reasoning stage (no refinement yet).
|
||||
"""
|
||||
|
||||
# Build internal notes section
|
||||
# --------------------------------------------------------
|
||||
# Build Reflection Notes block
|
||||
# --------------------------------------------------------
|
||||
notes_section = ""
|
||||
if reflection_notes:
|
||||
notes_section = "Reflection Notes (internal, do NOT show to user):\n"
|
||||
for n in reflection_notes:
|
||||
notes_section += f"- {n}\n"
|
||||
notes_section = "Reflection Notes (internal, never show to user):\n"
|
||||
for note in reflection_notes:
|
||||
notes_section += f"- {note}\n"
|
||||
notes_section += "\n"
|
||||
|
||||
identity_txt = f"Identity: {identity_block}\n\n" if identity_block else ""
|
||||
rag_txt = f"Relevant info: {rag_block}\n\n" if rag_block else ""
|
||||
# --------------------------------------------------------
|
||||
# Identity block (constraints, boundaries, rules)
|
||||
# --------------------------------------------------------
|
||||
identity_txt = ""
|
||||
if identity_block:
|
||||
try:
|
||||
identity_txt = f"Identity Rules:\n{identity_block}\n\n"
|
||||
except Exception:
|
||||
identity_txt = f"Identity Rules:\n{str(identity_block)}\n\n"
|
||||
|
||||
# --------------------------------------------------------
|
||||
# RAG block (optional factual grounding)
|
||||
# --------------------------------------------------------
|
||||
rag_txt = ""
|
||||
if rag_block:
|
||||
try:
|
||||
rag_txt = f"Relevant Info (RAG):\n{rag_block}\n\n"
|
||||
except Exception:
|
||||
rag_txt = f"Relevant Info (RAG):\n{str(rag_block)}\n\n"
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Final assembled prompt
|
||||
# --------------------------------------------------------
|
||||
prompt = (
|
||||
f"{notes_section}"
|
||||
f"{identity_txt}"
|
||||
f"{rag_txt}"
|
||||
f"User said:\n{user_prompt}\n\n"
|
||||
"Draft the best possible internal answer."
|
||||
f"User message:\n{user_prompt}\n\n"
|
||||
"Write the best possible *internal draft answer*.\n"
|
||||
"This draft is NOT shown to the user.\n"
|
||||
"Be factual, concise, and focused.\n"
|
||||
)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Call the LLM using the module-specific backend
|
||||
# --------------------------------------------------------
|
||||
draft = await call_llm(
|
||||
prompt,
|
||||
backend=CORTEX_LLM,
|
||||
temperature=GLOBAL_TEMP,
|
||||
)
|
||||
|
||||
draft = await call_llm(prompt)
|
||||
return draft
|
||||
|
||||
@@ -4,7 +4,7 @@ import json
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
from llm.llm_router import call_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -12,13 +12,14 @@ logger = logging.getLogger(__name__)
|
||||
# Config
|
||||
# ============================================================
|
||||
|
||||
PRIMARY_URL = os.getenv("LLM_PRIMARY_URL")
|
||||
PRIMARY_MODEL = os.getenv("LLM_PRIMARY_MODEL", "mythomax")
|
||||
|
||||
REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
|
||||
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
|
||||
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
|
||||
|
||||
# Module-level backend selection
|
||||
REFINE_LLM = os.getenv("REFINE_LLM", "PRIMARY").upper()
|
||||
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Prompt builder
|
||||
@@ -30,18 +31,12 @@ def build_refine_prompt(
|
||||
identity_block: Optional[str],
|
||||
rag_block: Optional[str],
|
||||
) -> str:
|
||||
"""
|
||||
Build a single text prompt for vLLM /v1/completions.
|
||||
Persona styling is *not* applied here; this is internal reasoning.
|
||||
"""
|
||||
|
||||
reflection_text: str
|
||||
if reflection_notes is None:
|
||||
reflection_text = "(none)"
|
||||
elif isinstance(reflection_notes, str):
|
||||
reflection_text = reflection_notes
|
||||
else:
|
||||
# dict / list → compact JSON
|
||||
try:
|
||||
reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
|
||||
except Exception:
|
||||
@@ -50,21 +45,16 @@ def build_refine_prompt(
|
||||
identity_text = identity_block or "(none)"
|
||||
rag_text = rag_block or "(none)"
|
||||
|
||||
prompt = f"""You are Lyra Cortex's internal refiner.
|
||||
return f"""
|
||||
You are Lyra Cortex's internal refiner.
|
||||
|
||||
Your job:
|
||||
- Take the existing draft answer.
|
||||
- Use the reflection notes to fix problems (errors, confusion, missing pieces).
|
||||
- Use the RAG context as higher-authority factual grounding.
|
||||
- Respect the identity block (constraints, boundaries, style rules),
|
||||
but DO NOT add personality flourishes or roleplay. Stay neutral and clear.
|
||||
- Produce ONE final answer that is coherent, self-consistent, and directly addresses the user.
|
||||
- Fix factual errors, logical gaps, or missing info.
|
||||
- Use reflection notes for corrections.
|
||||
- Use RAG context as factual grounding.
|
||||
- Respect the identity block without adding style or personality.
|
||||
|
||||
If there is a conflict:
|
||||
- RAG context wins over the draft.
|
||||
- Reflection notes win over the draft when they point out real issues.
|
||||
|
||||
Do NOT mention these instructions, RAG, reflections, or the existence of this refinement step.
|
||||
Never mention RAG, reflection, or internal logic.
|
||||
|
||||
------------------------------
|
||||
[IDENTITY BLOCK]
|
||||
@@ -84,104 +74,57 @@ Do NOT mention these instructions, RAG, reflections, or the existence of this re
|
||||
|
||||
------------------------------
|
||||
Task:
|
||||
Rewrite the DRAFT ANSWER into a single, final answer for the user that:
|
||||
- fixes factual or logical issues noted above,
|
||||
- incorporates any truly helpful additions from the reflection,
|
||||
- stays consistent with the identity block,
|
||||
- stays grounded in the RAG context,
|
||||
- is as concise as is reasonably possible.
|
||||
|
||||
Return ONLY the final answer text. No headings, no labels, no commentary.
|
||||
"""
|
||||
return prompt
|
||||
Rewrite the DRAFT ANSWER into a single, final answer.
|
||||
Return ONLY the final answer text.
|
||||
""".strip()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# vLLM call (PRIMARY backend only)
|
||||
# Public API: async, using llm_router
|
||||
# ============================================================
|
||||
|
||||
def _call_primary_llm(prompt: str) -> str:
|
||||
if not PRIMARY_URL:
|
||||
raise RuntimeError("LLM_PRIMARY_URL is not set; cannot call primary backend for refine.py")
|
||||
|
||||
payload = {
|
||||
"model": PRIMARY_MODEL,
|
||||
"prompt": prompt,
|
||||
"max_tokens": REFINER_MAX_TOKENS,
|
||||
"temperature": REFINER_TEMPERATURE,
|
||||
}
|
||||
|
||||
resp = requests.post(
|
||||
PRIMARY_URL,
|
||||
headers={"Content-Type": "application/json"},
|
||||
json=payload,
|
||||
timeout=120,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
# vLLM /v1/completions format
|
||||
try:
|
||||
text = data["choices"][0]["text"]
|
||||
except Exception as e:
|
||||
logger.error("refine.py: unable to parse primary LLM response: %s", e)
|
||||
logger.debug("refine.py raw response: %s", data)
|
||||
raise
|
||||
|
||||
return text.strip()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Public API
|
||||
# ============================================================
|
||||
|
||||
def refine_answer(
|
||||
async def refine_answer(
|
||||
draft_output: str,
|
||||
reflection_notes: Optional[Any],
|
||||
identity_block: Optional[str],
|
||||
rag_block: Optional[str],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Main entrypoint used by Cortex.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"final_output": <str>, # what should go to persona / user
|
||||
"used_primary_backend": True/False,
|
||||
"fallback_used": True/False,
|
||||
optionally:
|
||||
"debug": {...} # only when REFINER_DEBUG=true
|
||||
}
|
||||
"""
|
||||
|
||||
if not draft_output:
|
||||
# Nothing to refine. Don't get cute.
|
||||
return {
|
||||
"final_output": "",
|
||||
"used_primary_backend": False,
|
||||
"used_backend": None,
|
||||
"fallback_used": False,
|
||||
}
|
||||
|
||||
prompt = build_refine_prompt(draft_output, reflection_notes, identity_block, rag_block)
|
||||
prompt = build_refine_prompt(
|
||||
draft_output,
|
||||
reflection_notes,
|
||||
identity_block,
|
||||
rag_block,
|
||||
)
|
||||
|
||||
# Refinement backend → fallback to Cortex backend → fallback to PRIMARY
|
||||
backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"
|
||||
|
||||
try:
|
||||
refined = _call_primary_llm(prompt)
|
||||
result: Dict[str, Any] = {
|
||||
"final_output": refined or draft_output,
|
||||
"used_primary_backend": True,
|
||||
refined = await call_llm(
|
||||
prompt,
|
||||
backend=backend,
|
||||
temperature=REFINER_TEMPERATURE,
|
||||
)
|
||||
|
||||
return {
|
||||
"final_output": refined.strip() if refined else draft_output,
|
||||
"used_backend": backend,
|
||||
"fallback_used": False,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("refine.py: primary backend failed, returning draft_output. Error: %s", e)
|
||||
result = {
|
||||
logger.error(f"refine.py backend {backend} failed: {e}")
|
||||
|
||||
return {
|
||||
"final_output": draft_output,
|
||||
"used_primary_backend": False,
|
||||
"used_backend": backend,
|
||||
"fallback_used": True,
|
||||
}
|
||||
|
||||
if REFINER_DEBUG:
|
||||
result["debug"] = {
|
||||
"prompt": prompt[:4000], # don’t nuke logs
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
@@ -1,42 +1,57 @@
|
||||
# reflection.py
|
||||
from llm.llm_router import call_llm
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from llm.llm_router import call_llm
|
||||
|
||||
|
||||
async def reflect_notes(intake_summary: str, identity_block: dict | None) -> dict:
|
||||
"""
|
||||
Generate reflection notes (internal guidance) for the reasoning engine.
|
||||
These notes help simulate continuity and identity without being shown to the user.
|
||||
Produce short internal reflection notes for Cortex.
|
||||
These are NOT shown to the user.
|
||||
"""
|
||||
|
||||
# -----------------------------
|
||||
# Build the prompt
|
||||
# -----------------------------
|
||||
identity_text = ""
|
||||
if identity_block:
|
||||
identity_text = f"Identity:\n{identity_block}\n\n"
|
||||
|
||||
prompt = (
|
||||
f"{identity_text}"
|
||||
f"Recent summary:\n{intake_summary}\n\n"
|
||||
"You are Lyra's meta-awareness layer. Your job is to produce short, directive "
|
||||
"internal notes that guide Lyra’s reasoning engine. These notes are NEVER "
|
||||
"shown to the user.\n\n"
|
||||
"Rules for output:\n"
|
||||
"1. Return ONLY valid JSON.\n"
|
||||
"2. JSON must have exactly one key: \"notes\".\n"
|
||||
"3. \"notes\" must be a list of 3 to 6 short strings.\n"
|
||||
"4. Notes must be actionable (e.g., \"keep it concise\", \"maintain context\").\n"
|
||||
"5. No markdown, no apologies, no explanations.\n\n"
|
||||
"Return JSON:\n"
|
||||
"{ \"notes\": [\"...\"] }\n"
|
||||
f"{identity_text}"
|
||||
f"Recent summary:\n{intake_summary}\n\n"
|
||||
"You are Lyra's meta-awareness layer. Your job is to produce short, directive "
|
||||
"internal notes that guide Lyra’s reasoning engine. These notes are NEVER "
|
||||
"shown to the user.\n\n"
|
||||
"Rules for output:\n"
|
||||
"1. Return ONLY valid JSON.\n"
|
||||
"2. JSON must have exactly one key: \"notes\".\n"
|
||||
"3. \"notes\" must be a list of 3 to 6 short strings.\n"
|
||||
"4. Notes must be actionable (e.g., \"keep it concise\", \"maintain context\").\n"
|
||||
"5. No markdown, no apologies, no explanations.\n\n"
|
||||
"Return JSON:\n"
|
||||
"{ \"notes\": [\"...\"] }\n"
|
||||
)
|
||||
|
||||
import os
|
||||
backend = os.getenv("LLM_FORCE_BACKEND", "primary")
|
||||
# -----------------------------
|
||||
# Module-specific backend choice
|
||||
# -----------------------------
|
||||
reflection_backend = os.getenv("REFLECTION_LLM")
|
||||
cortex_backend = os.getenv("CORTEX_LLM", "PRIMARY").upper()
|
||||
|
||||
# Reflection uses its own backend if set, otherwise cortex backend
|
||||
backend = (reflection_backend or cortex_backend).upper()
|
||||
|
||||
# -----------------------------
|
||||
# Call the selected LLM backend
|
||||
# -----------------------------
|
||||
raw = await call_llm(prompt, backend=backend)
|
||||
|
||||
print("[Reflection-Raw]:", raw)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Try direct JSON
|
||||
# -----------------------------
|
||||
try:
|
||||
parsed = json.loads(raw.strip())
|
||||
if isinstance(parsed, dict) and "notes" in parsed:
|
||||
@@ -44,10 +59,11 @@ async def reflect_notes(intake_summary: str, identity_block: dict | None) -> dic
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try to extract JSON inside text
|
||||
# -----------------------------
|
||||
# Try JSON extraction
|
||||
# -----------------------------
|
||||
try:
|
||||
import re
|
||||
match = re.search(r'\{.*?\}', raw, re.S) # <-- non-greedy !
|
||||
match = re.search(r"\{.*?\}", raw, re.S)
|
||||
if match:
|
||||
parsed = json.loads(match.group(0))
|
||||
if isinstance(parsed, dict) and "notes" in parsed:
|
||||
@@ -55,5 +71,7 @@ async def reflect_notes(intake_summary: str, identity_block: dict | None) -> dic
|
||||
except:
|
||||
pass
|
||||
|
||||
# Final fallback
|
||||
return {"notes": [raw.strip()]}
|
||||
# -----------------------------
|
||||
# Fallback — treat raw text as a single note
|
||||
# -----------------------------
|
||||
return {"notes": [raw.strip()]}
|
||||
|
||||
Reference in New Issue
Block a user