Major rewire, all modules connected. Intake still wonkey

2025-11-28 15:14:47 -05:00
parent 734999e8bb
commit a83405beb1
19 changed files with 10109 additions and 4072 deletions
--- a/core/relay-backup/lib/cortex.js
+++ b/core/relay-backup/lib/cortex.js
@@ -0,0 +1,73 @@
+// relay/lib/cortex.js
+import fetch from "node-fetch";
+
+const REFLECT_URL = process.env.CORTEX_URL || "http://localhost:7081/reflect";
+const INGEST_URL  = process.env.CORTEX_URL_INGEST || "http://localhost:7081/ingest";
+
+export async function reflectWithCortex(userInput, memories = []) {
+  const body = { prompt: userInput, memories };
+  try {
+    const res = await fetch(REFLECT_URL, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+      timeout: 120000,
+    });
+
+    const rawText = await res.text();
+	console.log("🔎 [Cortex-Debug] rawText from /reflect →", rawText.slice(0, 300));
+    if (!res.ok) {
+      throw new Error(`HTTP ${res.status} — ${rawText.slice(0, 200)}`);
+    }
+
+    let data;
+    try {
+      data = JSON.parse(rawText);
+    } catch (err) {
+      // Fallback ① try to grab a JSON-looking block
+      const match = rawText.match(/\{[\s\S]*\}/);
+      if (match) {
+        try {
+          data = JSON.parse(match[0]);
+        } catch {
+          data = { reflection_raw: rawText.trim(), notes: "partial parse" };
+        }
+      } else {
+        // Fallback ② if it’s already an object (stringified Python dict)
+        try {
+          const normalized = rawText
+            .replace(/'/g, '"')        // convert single quotes
+            .replace(/None/g, 'null'); // convert Python None
+          data = JSON.parse(normalized);
+        } catch {
+          data = { reflection_raw: rawText.trim(), notes: "no JSON found" };
+        }
+      }
+    }
+
+    if (typeof data !== "object") {
+      data = { reflection_raw: rawText.trim(), notes: "non-object response" };
+    }
+
+    console.log("🧠 Cortex reflection normalized:", data);
+    return data;
+  } catch (e) {
+    console.warn("⚠️ Cortex reflect failed:", e.message);
+    return { error: e.message, reflection_raw: "" };
+  }
+}
+
+export async function ingestToCortex(user, assistant, reflection = {}, sessionId = "default") {
+  const body = { turn: { user, assistant }, reflection, session_id: sessionId };
+  try {
+    const res = await fetch(INGEST_URL, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+      timeout: 120000,
+    });
+    console.log(`📤 Sent exchange to Cortex ingest (${res.status})`);
+  } catch (e) {
+    console.warn("⚠️ Cortex ingest failed:", e.message);
+  }
+}
--- a/core/relay-backup/lib/llm.js
+++ b/core/relay-backup/lib/llm.js
@@ -0,0 +1,93 @@
+async function tryBackend(backend, messages) {
+  if (!backend.url || !backend.model) throw new Error("missing url/model");
+
+  const isOllama = backend.type === "ollama";
+  const isOpenAI = backend.type === "openai";
+  const isVllm = backend.type === "vllm";
+  const isLlamaCpp = backend.type === "llamacpp";
+
+  let endpoint = backend.url;
+  let headers = { "Content-Type": "application/json" };
+  if (isOpenAI) headers["Authorization"] = `Bearer ${OPENAI_API_KEY}`;
+
+  // Choose correct endpoint automatically
+  if (isOllama && !endpoint.endsWith("/api/chat")) endpoint += "/api/chat";
+  if ((isVllm || isLlamaCpp) && !endpoint.endsWith("/v1/completions")) endpoint += "/v1/completions";
+  if (isOpenAI && !endpoint.endsWith("/v1/chat/completions")) endpoint += "/v1/chat/completions";
+
+  // Build payload based on backend style
+  const body = (isVllm || isLlamaCpp)
+    ? {
+        model: backend.model,
+        prompt: messages.map(m => m.content).join("\n"),
+        max_tokens: 400,
+        temperature: 0.3,
+      }
+    : isOllama
+    ? { model: backend.model, messages, stream: false }
+    : { model: backend.model, messages, stream: false };
+
+  const resp = await fetch(endpoint, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(body),
+    timeout: 120000,
+  });
+  if (!resp.ok) throw new Error(`${backend.key} HTTP ${resp.status}`);
+  const raw = await resp.text();
+
+  // 🧩 Normalize replies
+  let reply = "";
+  try {
+    if (isOllama) {
+      // Ollama sometimes returns NDJSON lines; merge them
+      const merged = raw
+        .split("\n")
+        .filter(line => line.trim().startsWith("{"))
+        .map(line => JSON.parse(line))
+        .map(obj => obj.message?.content || obj.response || "")
+        .join("");
+      reply = merged.trim();
+    } else {
+      const data = JSON.parse(raw);
+	  console.log("🔍 RAW LLM RESPONSE:", JSON.stringify(data, null, 2));
+	  reply =
+	    data?.choices?.[0]?.text?.trim() ||
+	    data?.choices?.[0]?.message?.content?.trim() ||
+	    data?.message?.content?.trim() ||
+	    "";
+
+
+    }
+  } catch (err) {
+    reply = `[parse error: ${err.message}]`;
+  }
+
+  return { reply, raw, backend: backend.key };
+}
+
+// ------------------------------------
+// Export the main call helper
+// ------------------------------------
+export async function callSpeechLLM(messages) {
+  const backends = [
+    { key: "primary",  type: "vllm",     url: process.env.LLM_PRIMARY_URL,  model: process.env.LLM_PRIMARY_MODEL },
+    { key: "secondary",type: "ollama",   url: process.env.LLM_SECONDARY_URL,model: process.env.LLM_SECONDARY_MODEL },
+    { key: "cloud",    type: "openai",   url: process.env.LLM_CLOUD_URL,    model: process.env.LLM_CLOUD_MODEL },
+    { key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL },
+  ];
+
+  for (const b of backends) {
+    if (!b.url || !b.model) continue;
+    try {
+      console.log(`🧠 Trying backend: ${b.key.toUpperCase()} (${b.url})`);
+      const out = await tryBackend(b, messages);
+      console.log(`✅ Success via ${b.key.toUpperCase()}`);
+      return out;
+    } catch (err) {
+      console.warn(`⚠️ ${b.key.toUpperCase()} failed: ${err.message}`);
+    }
+  }
+
+  throw new Error("all_backends_failed");
+}