171 lines
4.6 KiB
Python
171 lines
4.6 KiB
Python
# refine.py
|
|
import os
|
|
import json
|
|
import logging
|
|
from typing import Any, Dict, Optional
|
|
|
|
from llm.llm_router import call_llm
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ===============================================
|
|
# Configuration
|
|
# ===============================================
|
|
|
|
REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
|
|
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
|
|
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
|
|
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
|
|
|
|
# These come from root .env
|
|
REFINE_LLM = os.getenv("REFINE_LLM", "").upper()
|
|
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
|
|
|
|
if VERBOSE_DEBUG:
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
# Console handler
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setFormatter(logging.Formatter(
|
|
'%(asctime)s [REFINE] %(levelname)s: %(message)s',
|
|
datefmt='%H:%M:%S'
|
|
))
|
|
logger.addHandler(console_handler)
|
|
|
|
# File handler
|
|
try:
|
|
os.makedirs('/app/logs', exist_ok=True)
|
|
file_handler = logging.FileHandler('/app/logs/cortex_verbose_debug.log', mode='a')
|
|
file_handler.setFormatter(logging.Formatter(
|
|
'%(asctime)s [REFINE] %(levelname)s: %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
))
|
|
logger.addHandler(file_handler)
|
|
logger.debug("VERBOSE_DEBUG mode enabled for refine.py - logging to file")
|
|
except Exception as e:
|
|
logger.debug(f"VERBOSE_DEBUG mode enabled for refine.py - file logging failed: {e}")
|
|
|
|
|
|
# ===============================================
|
|
# Prompt builder
|
|
# ===============================================
|
|
|
|
def build_refine_prompt(
|
|
draft_output: str,
|
|
reflection_notes: Optional[Any],
|
|
identity_block: Optional[str],
|
|
rag_block: Optional[str],
|
|
) -> str:
|
|
|
|
try:
|
|
reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
|
|
except Exception:
|
|
reflection_text = str(reflection_notes)
|
|
|
|
identity_text = identity_block or "(none)"
|
|
rag_text = rag_block or "(none)"
|
|
|
|
return f"""
|
|
You are Lyra Cortex's internal refiner.
|
|
|
|
Your job:
|
|
- Fix factual issues.
|
|
- Improve clarity.
|
|
- Apply reflection notes when helpful.
|
|
- Respect identity constraints.
|
|
- Apply RAG context as truth source.
|
|
|
|
Do NOT mention RAG, reflection, internal logic, or this refinement step.
|
|
|
|
------------------------------
|
|
[IDENTITY BLOCK]
|
|
{identity_text}
|
|
|
|
------------------------------
|
|
[RAG CONTEXT]
|
|
{rag_text}
|
|
|
|
------------------------------
|
|
[DRAFT ANSWER]
|
|
{draft_output}
|
|
|
|
------------------------------
|
|
[REFLECTION NOTES]
|
|
{reflection_text}
|
|
|
|
------------------------------
|
|
Task:
|
|
Rewrite the DRAFT into a single final answer for the user.
|
|
Return ONLY the final answer text.
|
|
""".strip()
|
|
|
|
|
|
# ===============================================
|
|
# Public API — now async & fully router-based
|
|
# ===============================================
|
|
|
|
async def refine_answer(
|
|
draft_output: str,
|
|
reflection_notes: Optional[Any],
|
|
identity_block: Optional[str],
|
|
rag_block: Optional[str],
|
|
) -> Dict[str, Any]:
|
|
|
|
if not draft_output:
|
|
return {
|
|
"final_output": "",
|
|
"used_backend": None,
|
|
"fallback_used": False,
|
|
}
|
|
|
|
prompt = build_refine_prompt(
|
|
draft_output,
|
|
reflection_notes,
|
|
identity_block,
|
|
rag_block,
|
|
)
|
|
|
|
# backend priority: REFINE_LLM → CORTEX_LLM → PRIMARY
|
|
backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"
|
|
|
|
if VERBOSE_DEBUG:
|
|
logger.debug(f"\n{'='*80}")
|
|
logger.debug("[REFINE] Full prompt being sent to LLM:")
|
|
logger.debug(f"{'='*80}")
|
|
logger.debug(prompt)
|
|
logger.debug(f"{'='*80}")
|
|
logger.debug(f"Backend: {backend}, Temperature: {REFINER_TEMPERATURE}")
|
|
logger.debug(f"{'='*80}\n")
|
|
|
|
try:
|
|
refined = await call_llm(
|
|
prompt,
|
|
backend=backend,
|
|
temperature=REFINER_TEMPERATURE,
|
|
)
|
|
|
|
if VERBOSE_DEBUG:
|
|
logger.debug(f"\n{'='*80}")
|
|
logger.debug("[REFINE] LLM Response received:")
|
|
logger.debug(f"{'='*80}")
|
|
logger.debug(refined)
|
|
logger.debug(f"{'='*80}\n")
|
|
|
|
return {
|
|
"final_output": refined.strip() if refined else draft_output,
|
|
"used_backend": backend,
|
|
"fallback_used": False,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"refine.py backend {backend} failed: {e}")
|
|
|
|
if VERBOSE_DEBUG:
|
|
logger.debug("[REFINE] Falling back to draft output due to error")
|
|
|
|
return {
|
|
"final_output": draft_output,
|
|
"used_backend": backend,
|
|
"fallback_used": True,
|
|
}
|