Files
project-lyra/cortex/reasoning/refine.py

127 lines
3.0 KiB
Python

# refine.py
import os
import json
import logging
from typing import Any, Dict, Optional
from llm.llm_router import call_llm
logger = logging.getLogger(__name__)
# ===============================================
# Configuration
# ===============================================
REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
# These come from root .env
REFINE_LLM = os.getenv("REFINE_LLM", "").upper()
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
# ===============================================
# Prompt builder
# ===============================================
def build_refine_prompt(
draft_output: str,
reflection_notes: Optional[Any],
identity_block: Optional[str],
rag_block: Optional[str],
) -> str:
try:
reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
except Exception:
reflection_text = str(reflection_notes)
identity_text = identity_block or "(none)"
rag_text = rag_block or "(none)"
return f"""
You are Lyra Cortex's internal refiner.
Your job:
- Fix factual issues.
- Improve clarity.
- Apply reflection notes when helpful.
- Respect identity constraints.
- Apply RAG context as truth source.
Do NOT mention RAG, reflection, internal logic, or this refinement step.
------------------------------
[IDENTITY BLOCK]
{identity_text}
------------------------------
[RAG CONTEXT]
{rag_text}
------------------------------
[DRAFT ANSWER]
{draft_output}
------------------------------
[REFLECTION NOTES]
{reflection_text}
------------------------------
Task:
Rewrite the DRAFT into a single final answer for the user.
Return ONLY the final answer text.
""".strip()
# ===============================================
# Public API — now async & fully router-based
# ===============================================
async def refine_answer(
draft_output: str,
reflection_notes: Optional[Any],
identity_block: Optional[str],
rag_block: Optional[str],
) -> Dict[str, Any]:
if not draft_output:
return {
"final_output": "",
"used_backend": None,
"fallback_used": False,
}
prompt = build_refine_prompt(
draft_output,
reflection_notes,
identity_block,
rag_block,
)
# backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY
backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"
try:
refined = await call_llm(
prompt,
backend=backend,
temperature=REFINER_TEMPERATURE,
)
return {
"final_output": refined.strip() if refined else draft_output,
"used_backend": backend,
"fallback_used": False,
}
except Exception as e:
logger.error(f"refine.py backend {backend} failed: {e}")
return {
"final_output": draft_output,
"used_backend": backend,
"fallback_used": True,
}