feat: Refactor LLM router and integrate health check endpoint

- Simplified LLM call logic in llm_router.py, removing tool adapter complexity and enhancing error handling.
- Added health check endpoint to main.py for system status verification.
- Cleaned up router.py by removing unused imports and commented-out code, streamlining the structure.
- Updated docker-compose.yml to unify services under a single Lyra container, enhancing deployment simplicity.
- Created Dockerfile for unified container setup, including both Relay and Cortex services.
- Added QUICKSTART.md for improved onboarding and usage instructions.
- Implemented start.sh script to manage service startup and health checks.
This commit is contained in:
2026-05-29 18:20:56 -04:00
parent 376b8114ad
commit 5f53fb32a4
14 changed files with 802 additions and 1665 deletions
+66 -28
View File
@@ -33,8 +33,8 @@ INTAKE_LLM = os.getenv("INTAKE_LLM", "PRIMARY").upper()
SUMMARY_MAX_TOKENS = int(os.getenv("SUMMARY_MAX_TOKENS", "200"))
SUMMARY_TEMPERATURE = float(os.getenv("SUMMARY_TEMPERATURE", "0.3"))
NEOMEM_API = os.getenv("NEOMEM_API")
NEOMEM_KEY = os.getenv("NEOMEM_KEY")
NEBULA_API = os.getenv("NEBULA_API", "http://localhost:7090")
NEBULA_KEY = os.getenv("NEBULA_KEY")
# ─────────────────────────────
# Internal history for L10/L20/L30
@@ -120,7 +120,7 @@ async def summarize_L5(buf: List[Dict[str, Any]]) -> str:
async def summarize_L10(session_id: str, buf: List[Dict[str, Any]]) -> str:
# Reality Check for last 10 exchanges
# "Reality Check" for last 10 exchanges
text = _format_exchanges(buf[-10:])
prompt = f"""
@@ -138,6 +138,9 @@ Reality Check:
L10_HISTORY.setdefault(session_id, [])
L10_HISTORY[session_id].append(summary)
# Send to Nebula
await send_to_nebula(summary, session_id, "L10")
return summary
@@ -165,6 +168,9 @@ Overview:
L20_HISTORY.setdefault(session_id, [])
L20_HISTORY[session_id].append(summary)
# Send to Nebula
await send_to_nebula(summary, session_id, "L20")
return summary
@@ -187,45 +193,77 @@ noting major themes, persistent goals, and shifts.
Continuity Report:
"""
return await _llm(prompt)
summary = await _llm(prompt)
# Send to Nebula
await send_to_nebula(summary, session_id, "L30")
return summary
# ─────────────────────────────
# NeoMem push
# Nebula push
# ─────────────────────────────
def push_to_neomem(summary: str, session_id: str, level: str) -> None:
async def send_to_nebula(summary: str, session_id: str, level: str) -> None:
"""
Fire-and-forget push of a summary into NeoMem.
Send summary to Nebula vector memory system.
Falls back to disk storage if Nebula is not available.
"""
if not NEOMEM_API or not summary:
if not summary:
return
headers = {"Content-Type": "application/json"}
if NEOMEM_KEY:
headers["Authorization"] = f"Bearer {NEOMEM_KEY}"
payload = {
"messages": [{"role": "assistant", "content": summary}],
"user_id": "brian",
"metadata": {
"source": "intake",
"session_id": session_id,
"level": level,
},
"summary": summary,
"session_id": session_id,
"level": level,
"timestamp": datetime.now().isoformat(),
"source": "intake",
}
# Try HTTP POST to Nebula first
try:
import requests
requests.post(
f"{NEOMEM_API}/memories",
json=payload,
headers=headers,
timeout=20,
).raise_for_status()
print(f"🧠 NeoMem updated ({level}) for {session_id}")
import httpx
headers = {"Content-Type": "application/json"}
if NEBULA_KEY:
headers["Authorization"] = f"Bearer {NEBULA_KEY}"
async with httpx.AsyncClient() as client:
response = await client.post(
f"{NEBULA_API}/summaries",
json=payload,
headers=headers,
timeout=10.0,
)
response.raise_for_status()
print(f"🌌 Nebula updated ({level}) for {session_id}")
return
except Exception as e:
print(f"NeoMem push failed ({level}, {session_id}): {e}")
print(f"⚠️ Nebula unavailable, falling back to disk: {e}")
# Fallback: Write to disk
try:
fallback_dir = os.path.join(os.path.dirname(__file__), "../../.nebula_fallback")
os.makedirs(fallback_dir, exist_ok=True)
# Create session directory
session_dir = os.path.join(fallback_dir, session_id)
os.makedirs(session_dir, exist_ok=True)
# Write summary to timestamped file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{level}_{timestamp}.json"
filepath = os.path.join(session_dir, filename)
import json
with open(filepath, "w") as f:
json.dump(payload, f, indent=2)
print(f"💾 Saved to disk: {filepath}")
except Exception as e:
print(f"❌ Failed to save summary to disk: {e}")
# ─────────────────────────────