Files
2025-12-20 02:49:20 -05:00

162 lines
5.7 KiB
JavaScript

async function tryBackend(backend, messages) {
if (!backend.url || !backend.model) throw new Error("missing url/model");
const isOllama = backend.type === "ollama";
const isOpenAI = backend.type === "openai";
const isVllm = backend.type === "vllm";
const isLlamaCpp = backend.type === "llamacpp";
let endpoint = backend.url;
let headers = { "Content-Type": "application/json" };
if (isOpenAI) headers["Authorization"] = `Bearer ${OPENAI_API_KEY}`;
// Choose correct endpoint automatically
if (isOllama && !endpoint.endsWith("/api/chat")) endpoint += "/api/chat";
if ((isVllm || isLlamaCpp) && !endpoint.endsWith("/v1/completions")) endpoint += "/v1/completions";
if (isOpenAI && !endpoint.endsWith("/v1/chat/completions")) endpoint += "/v1/chat/completions";
// Build payload based on backend style
const body = (isVllm || isLlamaCpp)
? {
model: backend.model,
prompt: messages.map(m => m.content).join("\n"),
max_tokens: 400,
temperature: 0.3,
}
: isOllama
? { model: backend.model, messages, stream: false }
: { model: backend.model, messages, stream: false };
const resp = await fetch(endpoint, {
method: "POST",
headers,
body: JSON.stringify(body),
timeout: 120000,
});
if (!resp.ok) throw new Error(`${backend.key} HTTP ${resp.status}`);
const raw = await resp.text();
// 🧩 Normalize replies
let reply = "";
let parsedData = null;
try {
if (isOllama) {
// Ollama sometimes returns NDJSON lines; merge them
const merged = raw
.split("\n")
.filter(line => line.trim().startsWith("{"))
.map(line => JSON.parse(line))
.map(obj => obj.message?.content || obj.response || "")
.join("");
reply = merged.trim();
} else {
parsedData = JSON.parse(raw);
reply =
parsedData?.choices?.[0]?.text?.trim() ||
parsedData?.choices?.[0]?.message?.content?.trim() ||
parsedData?.message?.content?.trim() ||
"";
}
} catch (err) {
reply = `[parse error: ${err.message}]`;
}
return { reply, raw, parsedData, backend: backend.key };
}
// ------------------------------------
// Structured logging helper
// ------------------------------------
const LOG_DETAIL = process.env.LOG_DETAIL_LEVEL || "summary"; // minimal | summary | detailed | verbose
function logLLMCall(backend, messages, result, error = null) {
const timestamp = new Date().toISOString().split('T')[1].slice(0, -1);
if (error) {
// Always log errors
console.warn(`⚠️ [LLM] ${backend.key.toUpperCase()} failed | ${timestamp} | ${error.message}`);
return;
}
// Success - log based on detail level
if (LOG_DETAIL === "minimal") {
return; // Don't log successful calls in minimal mode
}
if (LOG_DETAIL === "summary") {
console.log(`✅ [LLM] ${backend.key.toUpperCase()} | ${timestamp} | Reply: ${result.reply.substring(0, 80)}...`);
return;
}
// Detailed or verbose
console.log(`\n${'─'.repeat(100)}`);
console.log(`🧠 LLM CALL | Backend: ${backend.key.toUpperCase()} | ${timestamp}`);
console.log(`${'─'.repeat(100)}`);
// Show prompt preview
const lastMsg = messages[messages.length - 1];
const promptPreview = (lastMsg?.content || '').substring(0, 150);
console.log(`📝 Prompt: ${promptPreview}...`);
// Show parsed reply
console.log(`💬 Reply: ${result.reply.substring(0, 200)}...`);
// Show raw response only in verbose mode
if (LOG_DETAIL === "verbose" && result.parsedData) {
console.log(`\n╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────`);
const jsonStr = JSON.stringify(result.parsedData, null, 2);
const lines = jsonStr.split('\n');
const maxLines = 50;
lines.slice(0, maxLines).forEach(line => {
console.log(`${line}`);
});
if (lines.length > maxLines) {
console.log(`│ ... (${lines.length - maxLines} more lines - check raw field for full response)`);
}
console.log(`${'─'.repeat(95)}`);
}
console.log(`${'─'.repeat(100)}\n`);
}
// ------------------------------------
// Export the main call helper
// ------------------------------------
export async function callSpeechLLM(messages) {
const backends = [
{ key: "primary", type: "vllm", url: process.env.LLM_PRIMARY_URL, model: process.env.LLM_PRIMARY_MODEL },
{ key: "secondary",type: "ollama", url: process.env.LLM_SECONDARY_URL,model: process.env.LLM_SECONDARY_MODEL },
{ key: "cloud", type: "openai", url: process.env.LLM_CLOUD_URL, model: process.env.LLM_CLOUD_MODEL },
{ key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL },
];
const failedBackends = [];
for (const b of backends) {
if (!b.url || !b.model) continue;
try {
const out = await tryBackend(b, messages);
logLLMCall(b, messages, out);
return out;
} catch (err) {
logLLMCall(b, messages, null, err);
failedBackends.push({ backend: b.key, error: err.message });
}
}
// All backends failed - log summary
console.error(`\n${'='.repeat(100)}`);
console.error(`🔴 ALL LLM BACKENDS FAILED`);
console.error(`${'='.repeat(100)}`);
failedBackends.forEach(({ backend, error }) => {
console.error(` ${backend.toUpperCase()}: ${error}`);
});
console.error(`${'='.repeat(100)}\n`);
throw new Error("all_backends_failed");
}