project-lyra/cortex/reasoning/refine.py

# refine.py
import os
import json
import logging
from typing import Any, Dict, Optional

from llm.llm_router import call_llm

logger = logging.getLogger(__name__)

# ===============================================
# Configuration
# ===============================================

REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"

# These come from root .env
REFINE_LLM = os.getenv("REFINE_LLM", "").upper()
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()

if VERBOSE_DEBUG:
    logger.setLevel(logging.DEBUG)

    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(logging.Formatter(
        '%(asctime)s [REFINE] %(levelname)s: %(message)s',
        datefmt='%H:%M:%S'
    ))
    logger.addHandler(console_handler)

    # File handler
    try:
        os.makedirs('/app/logs', exist_ok=True)
        file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
        file_handler.setFormatter(logging.Formatter(
            '%(asctime)s [REFINE] %(levelname)s: %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        ))
        logger.addHandler(file_handler)
        logger.debug("VERBOSE_DEBUG mode enabled for refine.py - logging to file")
    except Exception as e:
        logger.debug(f"VERBOSE_DEBUG mode enabled for refine.py - file logging failed: {e}")


# ===============================================
# Prompt builder
# ===============================================

def build_refine_prompt(
    draft_output: str,
    reflection_notes: Optional[Any],
    identity_block: Optional[str],
    rag_block: Optional[str],
) -> str:

    try:
        reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
    except Exception:
        reflection_text = str(reflection_notes)

    identity_text = identity_block or "(none)"
    rag_text = rag_block or "(none)"

    return f"""
You are Lyra Cortex's internal refiner.

Your job:
- Fix factual issues.
- Improve clarity.
- Apply reflection notes when helpful.
- Respect identity constraints.
- Apply RAG context as truth source.

Do NOT mention RAG, reflection, internal logic, or this refinement step.

------------------------------
[IDENTITY BLOCK]
{identity_text}

------------------------------
[RAG CONTEXT]
{rag_text}

------------------------------
[DRAFT ANSWER]
{draft_output}

------------------------------
[REFLECTION NOTES]
{reflection_text}

------------------------------
Task:
Rewrite the DRAFT into a single final answer for the user.
Return ONLY the final answer text.
""".strip()


# ===============================================
# Public API — now async & fully router-based
# ===============================================

async def refine_answer(
    draft_output: str,
    reflection_notes: Optional[Any],
    identity_block: Optional[str],
    rag_block: Optional[str],
) -> Dict[str, Any]:

    if not draft_output:
        return {
            "final_output": "",
            "used_backend": None,
            "fallback_used": False,
        }

    prompt = build_refine_prompt(
        draft_output,
        reflection_notes,
        identity_block,
        rag_block,
    )

    # backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY
    backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"

    if VERBOSE_DEBUG:
        logger.debug(f"\n{'='*80}")
        logger.debug("[REFINE] Full prompt being sent to LLM:")
        logger.debug(f"{'='*80}")
        logger.debug(prompt)
        logger.debug(f"{'='*80}")
        logger.debug(f"Backend: {backend}, Temperature: {REFINER_TEMPERATURE}")
        logger.debug(f"{'='*80}\n")

    try:
        refined = await call_llm(
            prompt,
            backend=backend,
            temperature=REFINER_TEMPERATURE,
        )

        if VERBOSE_DEBUG:
            logger.debug(f"\n{'='*80}")
            logger.debug("[REFINE] LLM Response received:")
            logger.debug(f"{'='*80}")
            logger.debug(refined)
            logger.debug(f"{'='*80}\n")

        return {
            "final_output": refined.strip() if refined else draft_output,
            "used_backend": backend,
            "fallback_used": False,
        }

    except Exception as e:
        logger.error(f"refine.py backend {backend} failed: {e}")

        if VERBOSE_DEBUG:
            logger.debug("[REFINE] Falling back to draft output due to error")

        return {
            "final_output": draft_output,
            "used_backend": backend,
            "fallback_used": True,
        }