async function tryBackend(backend, messages) { if (!backend.url || !backend.model) throw new Error("missing url/model"); const isOllama = backend.type === "ollama"; const isOpenAI = backend.type === "openai"; const isVllm = backend.type === "vllm"; const isLlamaCpp = backend.type === "llamacpp"; let endpoint = backend.url; let headers = { "Content-Type": "application/json" }; if (isOpenAI) headers["Authorization"] = `Bearer ${OPENAI_API_KEY}`; // Choose correct endpoint automatically if (isOllama && !endpoint.endsWith("/api/chat")) endpoint += "/api/chat"; if ((isVllm || isLlamaCpp) && !endpoint.endsWith("/v1/completions")) endpoint += "/v1/completions"; if (isOpenAI && !endpoint.endsWith("/v1/chat/completions")) endpoint += "/v1/chat/completions"; // Build payload based on backend style const body = (isVllm || isLlamaCpp) ? { model: backend.model, prompt: messages.map(m => m.content).join("\n"), max_tokens: 400, temperature: 0.3, } : isOllama ? { model: backend.model, messages, stream: false } : { model: backend.model, messages, stream: false }; const resp = await fetch(endpoint, { method: "POST", headers, body: JSON.stringify(body), timeout: 120000, }); if (!resp.ok) throw new Error(`${backend.key} HTTP ${resp.status}`); const raw = await resp.text(); // 🧩 Normalize replies let reply = ""; let parsedData = null; try { if (isOllama) { // Ollama sometimes returns NDJSON lines; merge them const merged = raw .split("\n") .filter(line => line.trim().startsWith("{")) .map(line => JSON.parse(line)) .map(obj => obj.message?.content || obj.response || "") .join(""); reply = merged.trim(); } else { parsedData = JSON.parse(raw); reply = parsedData?.choices?.[0]?.text?.trim() || parsedData?.choices?.[0]?.message?.content?.trim() || parsedData?.message?.content?.trim() || ""; } } catch (err) { reply = `[parse error: ${err.message}]`; } return { reply, raw, parsedData, backend: backend.key }; } // ------------------------------------ // Structured logging helper // ------------------------------------ const LOG_DETAIL = process.env.LOG_DETAIL_LEVEL || "summary"; // minimal | summary | detailed | verbose function logLLMCall(backend, messages, result, error = null) { const timestamp = new Date().toISOString().split('T')[1].slice(0, -1); if (error) { // Always log errors console.warn(`āš ļø [LLM] ${backend.key.toUpperCase()} failed | ${timestamp} | ${error.message}`); return; } // Success - log based on detail level if (LOG_DETAIL === "minimal") { return; // Don't log successful calls in minimal mode } if (LOG_DETAIL === "summary") { console.log(`āœ… [LLM] ${backend.key.toUpperCase()} | ${timestamp} | Reply: ${result.reply.substring(0, 80)}...`); return; } // Detailed or verbose console.log(`\n${'─'.repeat(100)}`); console.log(`🧠 LLM CALL | Backend: ${backend.key.toUpperCase()} | ${timestamp}`); console.log(`${'─'.repeat(100)}`); // Show prompt preview const lastMsg = messages[messages.length - 1]; const promptPreview = (lastMsg?.content || '').substring(0, 150); console.log(`šŸ“ Prompt: ${promptPreview}...`); // Show parsed reply console.log(`šŸ’¬ Reply: ${result.reply.substring(0, 200)}...`); // Show raw response only in verbose mode if (LOG_DETAIL === "verbose" && result.parsedData) { console.log(`\n╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────`); const jsonStr = JSON.stringify(result.parsedData, null, 2); const lines = jsonStr.split('\n'); const maxLines = 50; lines.slice(0, maxLines).forEach(line => { console.log(`│ ${line}`); }); if (lines.length > maxLines) { console.log(`│ ... (${lines.length - maxLines} more lines - check raw field for full response)`); } console.log(`ā•°${'─'.repeat(95)}`); } console.log(`${'─'.repeat(100)}\n`); } // ------------------------------------ // Export the main call helper // ------------------------------------ export async function callSpeechLLM(messages) { const backends = [ { key: "primary", type: "vllm", url: process.env.LLM_PRIMARY_URL, model: process.env.LLM_PRIMARY_MODEL }, { key: "secondary",type: "ollama", url: process.env.LLM_SECONDARY_URL,model: process.env.LLM_SECONDARY_MODEL }, { key: "cloud", type: "openai", url: process.env.LLM_CLOUD_URL, model: process.env.LLM_CLOUD_MODEL }, { key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL }, ]; const failedBackends = []; for (const b of backends) { if (!b.url || !b.model) continue; try { const out = await tryBackend(b, messages); logLLMCall(b, messages, out); return out; } catch (err) { logLLMCall(b, messages, null, err); failedBackends.push({ backend: b.key, error: err.message }); } } // All backends failed - log summary console.error(`\n${'='.repeat(100)}`); console.error(`šŸ”“ ALL LLM BACKENDS FAILED`); console.error(`${'='.repeat(100)}`); failedBackends.forEach(({ backend, error }) => { console.error(` ${backend.toUpperCase()}: ${error}`); }); console.error(`${'='.repeat(100)}\n`); throw new Error("all_backends_failed"); }