# refine.py import os import json import logging from typing import Any, Dict, Optional from llm.llm_router import call_llm logger = logging.getLogger(__name__) # =============================================== # Configuration # =============================================== REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3")) REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768")) REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true" VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" # These come from root .env REFINE_LLM = os.getenv("REFINE_LLM", "").upper() CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper() if VERBOSE_DEBUG: logger.setLevel(logging.DEBUG) # Console handler console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter( '%(asctime)s [REFINE] %(levelname)s: %(message)s', datefmt='%H:%M:%S' )) logger.addHandler(console_handler) # File handler try: os.makedirs('/app/logs', exist_ok=True) file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a') file_handler.setFormatter(logging.Formatter( '%(asctime)s [REFINE] %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S' )) logger.addHandler(file_handler) logger.debug("VERBOSE_DEBUG mode enabled for refine.py - logging to file") except Exception as e: logger.debug(f"VERBOSE_DEBUG mode enabled for refine.py - file logging failed: {e}") # =============================================== # Prompt builder # =============================================== def build_refine_prompt( draft_output: str, reflection_notes: Optional[Any], identity_block: Optional[str], rag_block: Optional[str], ) -> str: try: reflection_text = json.dumps(reflection_notes, ensure_ascii=False) except Exception: reflection_text = str(reflection_notes) identity_text = identity_block or "(none)" rag_text = rag_block or "(none)" return f""" You are Lyra Cortex's internal refiner. Your job: - Fix factual issues. - Improve clarity. - Apply reflection notes when helpful. - Respect identity constraints. - Apply RAG context as truth source. Do NOT mention RAG, reflection, internal logic, or this refinement step. ------------------------------ [IDENTITY BLOCK] {identity_text} ------------------------------ [RAG CONTEXT] {rag_text} ------------------------------ [DRAFT ANSWER] {draft_output} ------------------------------ [REFLECTION NOTES] {reflection_text} ------------------------------ Task: Rewrite the DRAFT into a single final answer for the user. Return ONLY the final answer text. """.strip() # =============================================== # Public API — now async & fully router-based # =============================================== async def refine_answer( draft_output: str, reflection_notes: Optional[Any], identity_block: Optional[str], rag_block: Optional[str], ) -> Dict[str, Any]: if not draft_output: return { "final_output": "", "used_backend": None, "fallback_used": False, } prompt = build_refine_prompt( draft_output, reflection_notes, identity_block, rag_block, ) # backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY backend = REFINE_LLM or CORTEX_LLM or "PRIMARY" if VERBOSE_DEBUG: logger.debug(f"\n{'='*80}") logger.debug("[REFINE] Full prompt being sent to LLM:") logger.debug(f"{'='*80}") logger.debug(prompt) logger.debug(f"{'='*80}") logger.debug(f"Backend: {backend}, Temperature: {REFINER_TEMPERATURE}") logger.debug(f"{'='*80}\n") try: refined = await call_llm( prompt, backend=backend, temperature=REFINER_TEMPERATURE, ) if VERBOSE_DEBUG: logger.debug(f"\n{'='*80}") logger.debug("[REFINE] LLM Response received:") logger.debug(f"{'='*80}") logger.debug(refined) logger.debug(f"{'='*80}\n") return { "final_output": refined.strip() if refined else draft_output, "used_backend": backend, "fallback_used": False, } except Exception as e: logger.error(f"refine.py backend {backend} failed: {e}") if VERBOSE_DEBUG: logger.debug("[REFINE] Falling back to draft output due to error") return { "final_output": draft_output, "used_backend": backend, "fallback_used": True, }