Update to v0.9.1 #1

Merged
serversdown merged 44 commits from dev into main 2026-01-18 02:46:25 -05:00
4 changed files with 313 additions and 89 deletions
Showing only changes of commit 320bf4439b - Show all commits

View File

@@ -13,7 +13,6 @@ const PORT = Number(process.env.PORT || 7078);
// core endpoints // core endpoints
const CORTEX_REASON = process.env.CORTEX_REASON_URL || "http://cortex:7081/reason"; const CORTEX_REASON = process.env.CORTEX_REASON_URL || "http://cortex:7081/reason";
const CORTEX_INGEST = process.env.CORTEX_INGEST_URL || "http://cortex:7081/ingest"; const CORTEX_INGEST = process.env.CORTEX_INGEST_URL || "http://cortex:7081/ingest";
const INTAKE_URL = process.env.INTAKE_URL || "http://intake:7080/add_exchange";
// ----------------------------------------------------- // -----------------------------------------------------
// Helper request wrapper // Helper request wrapper
@@ -28,6 +27,7 @@ async function postJSON(url, data) {
const raw = await resp.text(); const raw = await resp.text();
let json; let json;
// Try to parse JSON safely
try { try {
json = raw ? JSON.parse(raw) : null; json = raw ? JSON.parse(raw) : null;
} catch (e) { } catch (e) {
@@ -45,7 +45,7 @@ async function postJSON(url, data) {
// Shared chat handler logic // Shared chat handler logic
// ----------------------------------------------------- // -----------------------------------------------------
async function handleChatRequest(session_id, user_msg) { async function handleChatRequest(session_id, user_msg) {
// 1. → Cortex.reason // 1. → Cortex.reason: the main pipeline
let reason; let reason;
try { try {
reason = await postJSON(CORTEX_REASON, { reason = await postJSON(CORTEX_REASON, {
@@ -60,20 +60,16 @@ async function handleChatRequest(session_id, user_msg) {
const persona = reason.final_output || reason.persona || "(no persona text)"; const persona = reason.final_output || reason.persona || "(no persona text)";
// 2. → Cortex.ingest (async, non-blocking) // 2. → Cortex.ingest (async, non-blocking)
// Cortex might still want this for separate ingestion pipeline.
postJSON(CORTEX_INGEST, { postJSON(CORTEX_INGEST, {
session_id, session_id,
user_msg, user_msg,
assistant_msg: persona assistant_msg: persona
}).catch(e => console.warn("Relay → Cortex.ingest failed:", e.message)); }).catch(e =>
console.warn("Relay → Cortex.ingest failed:", e.message)
);
// 3. → Intake summary (async, non-blocking) // 3. Return corrected result
postJSON(INTAKE_URL, {
session_id,
user_msg,
assistant_msg: persona
}).catch(e => console.warn("Relay → Intake failed:", e.message));
// 4. Return result
return { return {
session_id, session_id,
reply: persona reply: persona
@@ -88,11 +84,10 @@ app.get("/_health", (_, res) => {
}); });
// ----------------------------------------------------- // -----------------------------------------------------
// OPENAI-COMPATIBLE ENDPOINT (for UI) // OPENAI-COMPATIBLE ENDPOINT (for UI & clients)
// ----------------------------------------------------- // -----------------------------------------------------
app.post("/v1/chat/completions", async (req, res) => { app.post("/v1/chat/completions", async (req, res) => {
try { try {
// Extract from OpenAI format
const session_id = req.body.session_id || req.body.user || "default"; const session_id = req.body.session_id || req.body.user || "default";
const messages = req.body.messages || []; const messages = req.body.messages || [];
const lastMessage = messages[messages.length - 1]; const lastMessage = messages[messages.length - 1];
@@ -104,10 +99,8 @@ app.post("/v1/chat/completions", async (req, res) => {
console.log(`Relay (v1) → received: "${user_msg}"`); console.log(`Relay (v1) → received: "${user_msg}"`);
// Call the same logic as /chat
const result = await handleChatRequest(session_id, user_msg); const result = await handleChatRequest(session_id, user_msg);
// Return in OpenAI format
return res.json({ return res.json({
id: `chatcmpl-${Date.now()}`, id: `chatcmpl-${Date.now()}`,
object: "chat.completion", object: "chat.completion",
@@ -129,7 +122,7 @@ app.post("/v1/chat/completions", async (req, res) => {
}); });
} catch (err) { } catch (err) {
console.error("Relay v1 endpoint fatal:", err); console.error("Relay v1 fatal:", err);
res.status(500).json({ res.status(500).json({
error: { error: {
message: err.message || String(err), message: err.message || String(err),
@@ -141,7 +134,7 @@ app.post("/v1/chat/completions", async (req, res) => {
}); });
// ----------------------------------------------------- // -----------------------------------------------------
// MAIN ENDPOINT (new canonical) // MAIN ENDPOINT (canonical Lyra UI entrance)
// ----------------------------------------------------- // -----------------------------------------------------
app.post("/chat", async (req, res) => { app.post("/chat", async (req, res) => {
try { try {

View File

@@ -15,13 +15,14 @@ import logging
from datetime import datetime from datetime import datetime
from typing import Dict, Any, Optional, List from typing import Dict, Any, Optional, List
import httpx import httpx
from intake.intake import summarize_context
from neomem_client import NeoMemClient from neomem_client import NeoMemClient
# ----------------------------- # -----------------------------
# Configuration # Configuration
# ----------------------------- # -----------------------------
INTAKE_API_URL = os.getenv("INTAKE_API_URL", "http://intake:7080")
NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000") NEOMEM_API = os.getenv("NEOMEM_API", "http://neomem-api:8000")
RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4")) RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.4"))
VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true" VERBOSE_DEBUG = os.getenv("VERBOSE_DEBUG", "false").lower() == "true"
@@ -89,69 +90,27 @@ def _init_session(session_id: str) -> Dict[str, Any]:
# ----------------------------- # -----------------------------
# Intake context retrieval # Intake context retrieval
# ----------------------------- # -----------------------------
async def _get_intake_context(session_id: str) -> Dict[str, Any]: async def _get_intake_context(session_id: str, messages: List[Dict[str, str]]):
""" """
Retrieve multilevel summaries from Intake /context endpoint. Internal Intake — Direct call to summarize_context()
No HTTP, no containers, no failures.
Returns L1-L30 summary hierarchy:
- L1: Last 5 exchanges
- L5: Last 10 exchanges (reality check)
- L10: Intermediate checkpoint
- L20: Session overview
- L30: Continuity report
Args:
session_id: Session identifier
Returns:
Dict with multilevel summaries or empty structure on failure
""" """
url = f"{INTAKE_API_URL}/context"
params = {"session_id": session_id}
try: try:
async with httpx.AsyncClient(timeout=5.0) as client: return await summarize_context(session_id, messages)
response = await client.get(url, params=params)
response.raise_for_status()
data = response.json()
# Expected format from Intake:
# {
# "session_id": "...",
# "L1": [...],
# "L5": [...],
# "L10": {...},
# "L20": {...},
# "L30": {...}
# }
logger.info(f"Retrieved Intake context for session {session_id}")
return data
except httpx.HTTPError as e:
logger.warning(f"Failed to retrieve Intake context: {e}")
return {
"session_id": session_id,
"L1": [],
"L5": [],
"L10": None,
"L20": None,
"L30": None,
"error": str(e)
}
except Exception as e: except Exception as e:
logger.error(f"Unexpected error retrieving Intake context: {e}") logger.error(f"Internal Intake summarization failed: {e}")
return { return {
"session_id": session_id, "session_id": session_id,
"L1": [], "L1": "",
"L5": [], "L5": "",
"L10": None, "L10": "",
"L20": None, "L20": "",
"L30": None, "L30": "",
"error": str(e) "error": str(e)
} }
# ----------------------------- # -----------------------------
# NeoMem semantic search # NeoMem semantic search
# ----------------------------- # -----------------------------
@@ -279,7 +238,19 @@ async def collect_context(session_id: str, user_prompt: str) -> Dict[str, Any]:
logger.debug(f"[COLLECT_CONTEXT] Time since last message: {minutes_since_last_msg:.2f} minutes") logger.debug(f"[COLLECT_CONTEXT] Time since last message: {minutes_since_last_msg:.2f} minutes")
# C. Gather Intake context (multilevel summaries) # C. Gather Intake context (multilevel summaries)
intake_data = await _get_intake_context(session_id) # Build compact message buffer for Intake:
messages_for_intake = []
# You track messages inside SESSION_STATE — assemble it here:
if "message_history" in state:
for turn in state["message_history"]:
messages_for_intake.append({
"user_msg": turn.get("user", ""),
"assistant_msg": turn.get("assistant", "")
})
intake_data = await _get_intake_context(session_id, messages_for_intake)
if VERBOSE_DEBUG: if VERBOSE_DEBUG:
import json import json

260
cortex/intake/intake.py Normal file
View File

@@ -0,0 +1,260 @@
import os
from datetime import datetime
from typing import List, Dict, Any
from llm.llm_router import call_llm # use Cortex's shared router
# ─────────────────────────────
# Config
# ─────────────────────────────
INTAKE_LLM = os.getenv("INTAKE_LLM", "PRIMARY").upper()
SUMMARY_MAX_TOKENS = int(os.getenv("SUMMARY_MAX_TOKENS", "200"))
SUMMARY_TEMPERATURE = float(os.getenv("SUMMARY_TEMPERATURE", "0.3"))
NEOMEM_API = os.getenv("NEOMEM_API")
NEOMEM_KEY = os.getenv("NEOMEM_KEY")
# ─────────────────────────────
# Internal history for L10/L20/L30
# ─────────────────────────────
L10_HISTORY: Dict[str, list[str]] = {} # session_id → list of L10 blocks
L20_HISTORY: Dict[str, list[str]] = {} # session_id → list of merged overviews
# ─────────────────────────────
# LLM helper (via Cortex router)
# ─────────────────────────────
async def _llm(prompt: str) -> str:
"""
Use Cortex's llm_router to run a summary prompt.
"""
try:
text = await call_llm(
prompt,
backend=INTAKE_LLM,
temperature=SUMMARY_TEMPERATURE,
max_tokens=SUMMARY_MAX_TOKENS,
)
return (text or "").strip()
except Exception as e:
return f"[Error summarizing: {e}]"
# ─────────────────────────────
# Formatting helpers
# ─────────────────────────────
def _format_exchanges(exchanges: List[Dict[str, Any]]) -> str:
"""
Expect each exchange to look like:
{ "user_msg": "...", "assistant_msg": "..." }
"""
chunks = []
for e in exchanges:
user = e.get("user_msg", "")
assistant = e.get("assistant_msg", "")
chunks.append(f"User: {user}\nAssistant: {assistant}\n")
return "\n".join(chunks)
# ─────────────────────────────
# Base factual summary
# ─────────────────────────────
async def summarize_simple(exchanges: List[Dict[str, Any]]) -> str:
"""
Simple factual summary of recent exchanges.
"""
if not exchanges:
return ""
text = _format_exchanges(exchanges)
prompt = f"""
Summarize the following conversation between Brian (user) and Lyra (assistant).
Focus only on factual content. Avoid names, examples, story tone, or invented details.
{text}
Summary:
"""
return await _llm(prompt)
# ─────────────────────────────
# Multilevel Summaries (L1, L5, L10, L20, L30)
# ─────────────────────────────
async def summarize_L1(buf: List[Dict[str, Any]]) -> str:
# Last ~5 exchanges
return await summarize_simple(buf[-5:])
async def summarize_L5(buf: List[Dict[str, Any]]) -> str:
# Last ~10 exchanges
return await summarize_simple(buf[-10:])
async def summarize_L10(session_id: str, buf: List[Dict[str, Any]]) -> str:
# “Reality Check” for last 10 exchanges
text = _format_exchanges(buf[-10:])
prompt = f"""
You are Lyra Intake performing a short 'Reality Check'.
Summarize the last block of conversation (up to 10 exchanges)
in one clear paragraph focusing on tone, intent, and direction.
{text}
Reality Check:
"""
summary = await _llm(prompt)
# Track history for this session
L10_HISTORY.setdefault(session_id, [])
L10_HISTORY[session_id].append(summary)
return summary
async def summarize_L20(session_id: str) -> str:
"""
Merge all L10 Reality Checks into a 'Session Overview'.
"""
history = L10_HISTORY.get(session_id, [])
joined = "\n\n".join(history) if history else ""
if not joined:
return ""
prompt = f"""
You are Lyra Intake creating a 'Session Overview'.
Merge the following Reality Check paragraphs into one short summary
capturing progress, themes, and the direction of the conversation.
{joined}
Overview:
"""
summary = await _llm(prompt)
L20_HISTORY.setdefault(session_id, [])
L20_HISTORY[session_id].append(summary)
return summary
async def summarize_L30(session_id: str) -> str:
"""
Merge all L20 session overviews into a 'Continuity Report'.
"""
history = L20_HISTORY.get(session_id, [])
joined = "\n\n".join(history) if history else ""
if not joined:
return ""
prompt = f"""
You are Lyra Intake generating a 'Continuity Report'.
Condense these session overviews into one high-level reflection,
noting major themes, persistent goals, and shifts.
{joined}
Continuity Report:
"""
return await _llm(prompt)
# ─────────────────────────────
# NeoMem push
# ─────────────────────────────
def push_to_neomem(summary: str, session_id: str, level: str) -> None:
"""
Fire-and-forget push of a summary into NeoMem.
"""
if not NEOMEM_API or not summary:
return
headers = {"Content-Type": "application/json"}
if NEOMEM_KEY:
headers["Authorization"] = f"Bearer {NEOMEM_KEY}"
payload = {
"messages": [{"role": "assistant", "content": summary}],
"user_id": "brian",
"metadata": {
"source": "intake",
"session_id": session_id,
"level": level,
},
}
try:
import requests
requests.post(
f"{NEOMEM_API}/memories",
json=payload,
headers=headers,
timeout=20,
).raise_for_status()
print(f"🧠 NeoMem updated ({level}) for {session_id}")
except Exception as e:
print(f"NeoMem push failed ({level}, {session_id}): {e}")
# ─────────────────────────────
# Main entrypoint for Cortex
# ─────────────────────────────
async def summarize_context(
session_id: str,
exchanges: List[Dict[str, Any]],
) -> Dict[str, Any]:
"""
Main API used by Cortex:
summaries = await summarize_context(session_id, exchanges)
`exchanges` should be the recent conversation buffer for that session.
"""
buf = list(exchanges)
if not buf:
return {
"session_id": session_id,
"exchange_count": 0,
"L1": "",
"L5": "",
"L10": "",
"L20": "",
"L30": "",
"last_updated": None,
}
# Base levels
L1 = await summarize_L1(buf)
L5 = await summarize_L5(buf)
L10 = await summarize_L10(session_id, buf)
L20 = await summarize_L20(session_id)
L30 = await summarize_L30(session_id)
# Push the "interesting" tiers into NeoMem
push_to_neomem(L10, session_id, "L10")
push_to_neomem(L20, session_id, "L20")
push_to_neomem(L30, session_id, "L30")
return {
"session_id": session_id,
"exchange_count": len(buf),
"L1": L1,
"L5": L5,
"L10": L10,
"L20": L20,
"L30": L30,
"last_updated": datetime.now().isoformat(),
}

View File

@@ -118,23 +118,23 @@ services:
# ============================================================ # ============================================================
# Intake # Intake
# ============================================================ # ============================================================
intake: # intake:
build: # build:
context: ./intake # context: ./intake
container_name: intake # container_name: intake
restart: unless-stopped # restart: unless-stopped
env_file: # env_file:
- ./intake/.env # - ./intake/.env
- ./.env # - ./.env
ports: # ports:
- "7080:7080" 3 - "7080:7080"
volumes: # volumes:
- ./intake:/app # - ./intake:/app
- ./intake-logs:/app/logs # - ./intake-logs:/app/logs
depends_on: # depends_on:
- cortex # - cortex
networks: # networks:
- lyra_net # - lyra_net
# ============================================================ # ============================================================
# RAG Service # RAG Service