131 lines
3.3 KiB
Python
131 lines
3.3 KiB
Python
# refine.py
|
|
import os
|
|
import json
|
|
import logging
|
|
from typing import Any, Dict, Optional
|
|
|
|
from llm.llm_router import call_llm
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ============================================================
|
|
# Config
|
|
# ============================================================
|
|
|
|
REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
|
|
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
|
|
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
|
|
|
|
# Module-level backend selection
|
|
REFINE_LLM = os.getenv("REFINE_LLM", "PRIMARY").upper()
|
|
CORTEX_LLM = os.getenv("CORTEX_LLM", "PRIMARY").upper()
|
|
|
|
|
|
# ============================================================
|
|
# Prompt builder
|
|
# ============================================================
|
|
|
|
def build_refine_prompt(
|
|
draft_output: str,
|
|
reflection_notes: Optional[Any],
|
|
identity_block: Optional[str],
|
|
rag_block: Optional[str],
|
|
) -> str:
|
|
|
|
if reflection_notes is None:
|
|
reflection_text = "(none)"
|
|
elif isinstance(reflection_notes, str):
|
|
reflection_text = reflection_notes
|
|
else:
|
|
try:
|
|
reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
|
|
except Exception:
|
|
reflection_text = str(reflection_notes)
|
|
|
|
identity_text = identity_block or "(none)"
|
|
rag_text = rag_block or "(none)"
|
|
|
|
return f"""
|
|
You are Lyra Cortex's internal refiner.
|
|
|
|
Your job:
|
|
- Fix factual errors, logical gaps, or missing info.
|
|
- Use reflection notes for corrections.
|
|
- Use RAG context as factual grounding.
|
|
- Respect the identity block without adding style or personality.
|
|
|
|
Never mention RAG, reflection, or internal logic.
|
|
|
|
------------------------------
|
|
[IDENTITY BLOCK]
|
|
{identity_text}
|
|
|
|
------------------------------
|
|
[RAG CONTEXT]
|
|
{rag_text}
|
|
|
|
------------------------------
|
|
[DRAFT ANSWER]
|
|
{draft_output}
|
|
|
|
------------------------------
|
|
[REFLECTION NOTES]
|
|
{reflection_text}
|
|
|
|
------------------------------
|
|
Task:
|
|
Rewrite the DRAFT ANSWER into a single, final answer.
|
|
Return ONLY the final answer text.
|
|
""".strip()
|
|
|
|
|
|
# ============================================================
|
|
# Public API: async, using llm_router
|
|
# ============================================================
|
|
|
|
async def refine_answer(
|
|
draft_output: str,
|
|
reflection_notes: Optional[Any],
|
|
identity_block: Optional[str],
|
|
rag_block: Optional[str],
|
|
) -> Dict[str, Any]:
|
|
|
|
if not draft_output:
|
|
return {
|
|
"final_output": "",
|
|
"used_backend": None,
|
|
"fallback_used": False,
|
|
}
|
|
|
|
prompt = build_refine_prompt(
|
|
draft_output,
|
|
reflection_notes,
|
|
identity_block,
|
|
rag_block,
|
|
)
|
|
|
|
# Refinement backend → fallback to Cortex backend → fallback to PRIMARY
|
|
backend = REFINE_LLM or CORTEX_LLM or "PRIMARY"
|
|
|
|
try:
|
|
refined = await call_llm(
|
|
prompt,
|
|
backend=backend,
|
|
temperature=REFINER_TEMPERATURE,
|
|
)
|
|
|
|
return {
|
|
"final_output": refined.strip() if refined else draft_output,
|
|
"used_backend": backend,
|
|
"fallback_used": False,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"refine.py backend {backend} failed: {e}")
|
|
|
|
return {
|
|
"final_output": draft_output,
|
|
"used_backend": backend,
|
|
"fallback_used": True,
|
|
}
|