intital file restructure

This commit is contained in:
serversdwn
2025-11-25 20:50:05 -05:00
parent b5fe47074a
commit 5492d9c0c5
15 changed files with 1 additions and 105 deletions

187
cortex/reasoning/refine.py Normal file
View File

@@ -0,0 +1,187 @@
# refine.py
import os
import json
import logging
from typing import Any, Dict, Optional
import requests
logger = logging.getLogger(__name__)
# ============================================================
# Config
# ============================================================
PRIMARY_URL = os.getenv("LLM_PRIMARY_URL")
PRIMARY_MODEL = os.getenv("LLM_PRIMARY_MODEL", "mythomax")
REFINER_TEMPERATURE = float(os.getenv("REFINER_TEMPERATURE", "0.3"))
REFINER_MAX_TOKENS = int(os.getenv("REFINER_MAX_TOKENS", "768"))
REFINER_DEBUG = os.getenv("REFINER_DEBUG", "false").lower() == "true"
# ============================================================
# Prompt builder
# ============================================================
def build_refine_prompt(
draft_output: str,
reflection_notes: Optional[Any],
identity_block: Optional[str],
rag_block: Optional[str],
) -> str:
"""
Build a single text prompt for vLLM /v1/completions.
Persona styling is *not* applied here; this is internal reasoning.
"""
reflection_text: str
if reflection_notes is None:
reflection_text = "(none)"
elif isinstance(reflection_notes, str):
reflection_text = reflection_notes
else:
# dict / list → compact JSON
try:
reflection_text = json.dumps(reflection_notes, ensure_ascii=False)
except Exception:
reflection_text = str(reflection_notes)
identity_text = identity_block or "(none)"
rag_text = rag_block or "(none)"
prompt = f"""You are Lyra Cortex's internal refiner.
Your job:
- Take the existing draft answer.
- Use the reflection notes to fix problems (errors, confusion, missing pieces).
- Use the RAG context as higher-authority factual grounding.
- Respect the identity block (constraints, boundaries, style rules),
but DO NOT add personality flourishes or roleplay. Stay neutral and clear.
- Produce ONE final answer that is coherent, self-consistent, and directly addresses the user.
If there is a conflict:
- RAG context wins over the draft.
- Reflection notes win over the draft when they point out real issues.
Do NOT mention these instructions, RAG, reflections, or the existence of this refinement step.
------------------------------
[IDENTITY BLOCK]
{identity_text}
------------------------------
[RAG CONTEXT]
{rag_text}
------------------------------
[DRAFT ANSWER]
{draft_output}
------------------------------
[REFLECTION NOTES]
{reflection_text}
------------------------------
Task:
Rewrite the DRAFT ANSWER into a single, final answer for the user that:
- fixes factual or logical issues noted above,
- incorporates any truly helpful additions from the reflection,
- stays consistent with the identity block,
- stays grounded in the RAG context,
- is as concise as is reasonably possible.
Return ONLY the final answer text. No headings, no labels, no commentary.
"""
return prompt
# ============================================================
# vLLM call (PRIMARY backend only)
# ============================================================
def _call_primary_llm(prompt: str) -> str:
if not PRIMARY_URL:
raise RuntimeError("LLM_PRIMARY_URL is not set; cannot call primary backend for refine.py")
payload = {
"model": PRIMARY_MODEL,
"prompt": prompt,
"max_tokens": REFINER_MAX_TOKENS,
"temperature": REFINER_TEMPERATURE,
}
resp = requests.post(
PRIMARY_URL,
headers={"Content-Type": "application/json"},
json=payload,
timeout=120,
)
resp.raise_for_status()
data = resp.json()
# vLLM /v1/completions format
try:
text = data["choices"][0]["text"]
except Exception as e:
logger.error("refine.py: unable to parse primary LLM response: %s", e)
logger.debug("refine.py raw response: %s", data)
raise
return text.strip()
# ============================================================
# Public API
# ============================================================
def refine_answer(
draft_output: str,
reflection_notes: Optional[Any],
identity_block: Optional[str],
rag_block: Optional[str],
) -> Dict[str, Any]:
"""
Main entrypoint used by Cortex.
Returns:
{
"final_output": <str>, # what should go to persona / user
"used_primary_backend": True/False,
"fallback_used": True/False,
optionally:
"debug": {...} # only when REFINER_DEBUG=true
}
"""
if not draft_output:
# Nothing to refine. Don't get cute.
return {
"final_output": "",
"used_primary_backend": False,
"fallback_used": False,
}
prompt = build_refine_prompt(draft_output, reflection_notes, identity_block, rag_block)
try:
refined = _call_primary_llm(prompt)
result: Dict[str, Any] = {
"final_output": refined or draft_output,
"used_primary_backend": True,
"fallback_used": False,
}
except Exception as e:
logger.error("refine.py: primary backend failed, returning draft_output. Error: %s", e)
result = {
"final_output": draft_output,
"used_primary_backend": False,
"fallback_used": True,
}
if REFINER_DEBUG:
result["debug"] = {
"prompt": prompt[:4000], # dont nuke logs
}
return result