project-lyra/cortex/reasoning/refine.py

# refine.py
import os
import json
import logging
from typing import Any, Dict, Optional

from llm.llm_router import call_llm

logger = logging.getLogger(__name__)

# ===============================================
# Configuration
# ===============================================

REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"

# These come from root .env
REFINE_LLM = os.getenv("REFINE_LLM", "").upper()
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()


# ===============================================
# Prompt builder
# ===============================================

def build_refine_prompt(
    draft_output: str,
    reflection_notes: Optional[Any],
    identity_block: Optional[str],
    rag_block: Optional[str],
) -> str:

    try:
        reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
    except Exception:
        reflection_text = str(reflection_notes)

    identity_text = identity_block or "(none)"
    rag_text = rag_block or "(none)"

    return f"""
You are Lyra Cortex's internal refiner.

Your job:
- Fix factual issues.
- Improve clarity.
- Apply reflection notes when helpful.
- Respect identity constraints.
- Apply RAG context as truth source.

Do NOT mention RAG, reflection, internal logic, or this refinement step.

------------------------------
[IDENTITY BLOCK]
{identity_text}

------------------------------
[RAG CONTEXT]
{rag_text}

------------------------------
[DRAFT ANSWER]
{draft_output}

------------------------------
[REFLECTION NOTES]
{reflection_text}

------------------------------
Task:
Rewrite the DRAFT into a single final answer for the user.
Return ONLY the final answer text.
""".strip()


# ===============================================
# Public API — now async & fully router-based
# ===============================================

async def refine_answer(
    draft_output: str,
    reflection_notes: Optional[Any],
    identity_block: Optional[str],
    rag_block: Optional[str],
) -> Dict[str, Any]:

    if not draft_output:
        return {
            "final_output": "",
            "used_backend": None,
            "fallback_used": False,
        }

    prompt = build_refine_prompt(
        draft_output,
        reflection_notes,
        identity_block,
        rag_block,
    )

    # backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY
    backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"

    try:
        refined = await call_llm(
            prompt,
            backend=backend,
            temperature=REFINER_TEMPERATURE,
        )

        return {
            "final_output": refined.strip() if refined else draft_output,
            "used_backend": backend,
            "fallback_used": False,
        }

    except Exception as e:
        logger.error(f"refine.py backend {backend} failed: {e}")

        return {
            "final_output": draft_output,
            "used_backend": backend,
            "fallback_used": True,
        }