feat: Refactor LLM router and integrate health check endpoint
- Simplified LLM call logic in llm_router.py, removing tool adapter complexity and enhancing error handling. - Added health check endpoint to main.py for system status verification. - Cleaned up router.py by removing unused imports and commented-out code, streamlining the structure. - Updated docker-compose.yml to unify services under a single Lyra container, enhancing deployment simplicity. - Created Dockerfile for unified container setup, including both Relay and Cortex services. - Added QUICKSTART.md for improved onboarding and usage instructions. - Implemented start.sh script to manage service startup and health checks.
This commit is contained in:
+66
-28
@@ -33,8 +33,8 @@ INTAKE_LLM = os.getenv("INTAKE_LLM", "PRIMARY").upper()
|
||||
SUMMARY_MAX_TOKENS = int(os.getenv("SUMMARY_MAX_TOKENS", "200"))
|
||||
SUMMARY_TEMPERATURE = float(os.getenv("SUMMARY_TEMPERATURE", "0.3"))
|
||||
|
||||
NEOMEM_API = os.getenv("NEOMEM_API")
|
||||
NEOMEM_KEY = os.getenv("NEOMEM_KEY")
|
||||
NEBULA_API = os.getenv("NEBULA_API", "http://localhost:7090")
|
||||
NEBULA_KEY = os.getenv("NEBULA_KEY")
|
||||
|
||||
# ─────────────────────────────
|
||||
# Internal history for L10/L20/L30
|
||||
@@ -120,7 +120,7 @@ async def summarize_L5(buf: List[Dict[str, Any]]) -> str:
|
||||
|
||||
|
||||
async def summarize_L10(session_id: str, buf: List[Dict[str, Any]]) -> str:
|
||||
# “Reality Check” for last 10 exchanges
|
||||
# "Reality Check" for last 10 exchanges
|
||||
text = _format_exchanges(buf[-10:])
|
||||
|
||||
prompt = f"""
|
||||
@@ -138,6 +138,9 @@ Reality Check:
|
||||
L10_HISTORY.setdefault(session_id, [])
|
||||
L10_HISTORY[session_id].append(summary)
|
||||
|
||||
# Send to Nebula
|
||||
await send_to_nebula(summary, session_id, "L10")
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
@@ -165,6 +168,9 @@ Overview:
|
||||
L20_HISTORY.setdefault(session_id, [])
|
||||
L20_HISTORY[session_id].append(summary)
|
||||
|
||||
# Send to Nebula
|
||||
await send_to_nebula(summary, session_id, "L20")
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
@@ -187,45 +193,77 @@ noting major themes, persistent goals, and shifts.
|
||||
|
||||
Continuity Report:
|
||||
"""
|
||||
return await _llm(prompt)
|
||||
summary = await _llm(prompt)
|
||||
|
||||
# Send to Nebula
|
||||
await send_to_nebula(summary, session_id, "L30")
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
# ─────────────────────────────
|
||||
# NeoMem push
|
||||
# Nebula push
|
||||
# ─────────────────────────────
|
||||
|
||||
def push_to_neomem(summary: str, session_id: str, level: str) -> None:
|
||||
async def send_to_nebula(summary: str, session_id: str, level: str) -> None:
|
||||
"""
|
||||
Fire-and-forget push of a summary into NeoMem.
|
||||
Send summary to Nebula vector memory system.
|
||||
Falls back to disk storage if Nebula is not available.
|
||||
"""
|
||||
if not NEOMEM_API or not summary:
|
||||
if not summary:
|
||||
return
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if NEOMEM_KEY:
|
||||
headers["Authorization"] = f"Bearer {NEOMEM_KEY}"
|
||||
|
||||
payload = {
|
||||
"messages": [{"role": "assistant", "content": summary}],
|
||||
"user_id": "brian",
|
||||
"metadata": {
|
||||
"source": "intake",
|
||||
"session_id": session_id,
|
||||
"level": level,
|
||||
},
|
||||
"summary": summary,
|
||||
"session_id": session_id,
|
||||
"level": level,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"source": "intake",
|
||||
}
|
||||
|
||||
# Try HTTP POST to Nebula first
|
||||
try:
|
||||
import requests
|
||||
requests.post(
|
||||
f"{NEOMEM_API}/memories",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=20,
|
||||
).raise_for_status()
|
||||
print(f"🧠 NeoMem updated ({level}) for {session_id}")
|
||||
import httpx
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if NEBULA_KEY:
|
||||
headers["Authorization"] = f"Bearer {NEBULA_KEY}"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
f"{NEBULA_API}/summaries",
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=10.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
print(f"🌌 Nebula updated ({level}) for {session_id}")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
print(f"NeoMem push failed ({level}, {session_id}): {e}")
|
||||
print(f"⚠️ Nebula unavailable, falling back to disk: {e}")
|
||||
|
||||
# Fallback: Write to disk
|
||||
try:
|
||||
fallback_dir = os.path.join(os.path.dirname(__file__), "../../.nebula_fallback")
|
||||
os.makedirs(fallback_dir, exist_ok=True)
|
||||
|
||||
# Create session directory
|
||||
session_dir = os.path.join(fallback_dir, session_id)
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
|
||||
# Write summary to timestamped file
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{level}_{timestamp}.json"
|
||||
filepath = os.path.join(session_dir, filename)
|
||||
|
||||
import json
|
||||
with open(filepath, "w") as f:
|
||||
json.dump(payload, f, indent=2)
|
||||
|
||||
print(f"💾 Saved to disk: {filepath}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to save summary to disk: {e}")
|
||||
|
||||
|
||||
# ─────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user