162 lines
5.7 KiB
JavaScript
162 lines
5.7 KiB
JavaScript
async function tryBackend(backend, messages) {
|
|
if (!backend.url || !backend.model) throw new Error("missing url/model");
|
|
|
|
const isOllama = backend.type === "ollama";
|
|
const isOpenAI = backend.type === "openai";
|
|
const isVllm = backend.type === "vllm";
|
|
const isLlamaCpp = backend.type === "llamacpp";
|
|
|
|
let endpoint = backend.url;
|
|
let headers = { "Content-Type": "application/json" };
|
|
if (isOpenAI) headers["Authorization"] = `Bearer ${OPENAI_API_KEY}`;
|
|
|
|
// Choose correct endpoint automatically
|
|
if (isOllama && !endpoint.endsWith("/api/chat")) endpoint += "/api/chat";
|
|
if ((isVllm || isLlamaCpp) && !endpoint.endsWith("/v1/completions")) endpoint += "/v1/completions";
|
|
if (isOpenAI && !endpoint.endsWith("/v1/chat/completions")) endpoint += "/v1/chat/completions";
|
|
|
|
// Build payload based on backend style
|
|
const body = (isVllm || isLlamaCpp)
|
|
? {
|
|
model: backend.model,
|
|
prompt: messages.map(m => m.content).join("\n"),
|
|
max_tokens: 400,
|
|
temperature: 0.3,
|
|
}
|
|
: isOllama
|
|
? { model: backend.model, messages, stream: false }
|
|
: { model: backend.model, messages, stream: false };
|
|
|
|
const resp = await fetch(endpoint, {
|
|
method: "POST",
|
|
headers,
|
|
body: JSON.stringify(body),
|
|
timeout: 120000,
|
|
});
|
|
if (!resp.ok) throw new Error(`${backend.key} HTTP ${resp.status}`);
|
|
const raw = await resp.text();
|
|
|
|
// 🧩 Normalize replies
|
|
let reply = "";
|
|
let parsedData = null;
|
|
|
|
try {
|
|
if (isOllama) {
|
|
// Ollama sometimes returns NDJSON lines; merge them
|
|
const merged = raw
|
|
.split("\n")
|
|
.filter(line => line.trim().startsWith("{"))
|
|
.map(line => JSON.parse(line))
|
|
.map(obj => obj.message?.content || obj.response || "")
|
|
.join("");
|
|
reply = merged.trim();
|
|
} else {
|
|
parsedData = JSON.parse(raw);
|
|
reply =
|
|
parsedData?.choices?.[0]?.text?.trim() ||
|
|
parsedData?.choices?.[0]?.message?.content?.trim() ||
|
|
parsedData?.message?.content?.trim() ||
|
|
"";
|
|
}
|
|
} catch (err) {
|
|
reply = `[parse error: ${err.message}]`;
|
|
}
|
|
|
|
return { reply, raw, parsedData, backend: backend.key };
|
|
}
|
|
|
|
// ------------------------------------
|
|
// Structured logging helper
|
|
// ------------------------------------
|
|
const LOG_DETAIL = process.env.LOG_DETAIL_LEVEL || "summary"; // minimal | summary | detailed | verbose
|
|
|
|
function logLLMCall(backend, messages, result, error = null) {
|
|
const timestamp = new Date().toISOString().split('T')[1].slice(0, -1);
|
|
|
|
if (error) {
|
|
// Always log errors
|
|
console.warn(`⚠️ [LLM] ${backend.key.toUpperCase()} failed | ${timestamp} | ${error.message}`);
|
|
return;
|
|
}
|
|
|
|
// Success - log based on detail level
|
|
if (LOG_DETAIL === "minimal") {
|
|
return; // Don't log successful calls in minimal mode
|
|
}
|
|
|
|
if (LOG_DETAIL === "summary") {
|
|
console.log(`✅ [LLM] ${backend.key.toUpperCase()} | ${timestamp} | Reply: ${result.reply.substring(0, 80)}...`);
|
|
return;
|
|
}
|
|
|
|
// Detailed or verbose
|
|
console.log(`\n${'─'.repeat(100)}`);
|
|
console.log(`🧠 LLM CALL | Backend: ${backend.key.toUpperCase()} | ${timestamp}`);
|
|
console.log(`${'─'.repeat(100)}`);
|
|
|
|
// Show prompt preview
|
|
const lastMsg = messages[messages.length - 1];
|
|
const promptPreview = (lastMsg?.content || '').substring(0, 150);
|
|
console.log(`📝 Prompt: ${promptPreview}...`);
|
|
|
|
// Show parsed reply
|
|
console.log(`💬 Reply: ${result.reply.substring(0, 200)}...`);
|
|
|
|
// Show raw response only in verbose mode
|
|
if (LOG_DETAIL === "verbose" && result.parsedData) {
|
|
console.log(`\n╭─ RAW RESPONSE ────────────────────────────────────────────────────────────────────────────`);
|
|
const jsonStr = JSON.stringify(result.parsedData, null, 2);
|
|
const lines = jsonStr.split('\n');
|
|
const maxLines = 50;
|
|
|
|
lines.slice(0, maxLines).forEach(line => {
|
|
console.log(`│ ${line}`);
|
|
});
|
|
|
|
if (lines.length > maxLines) {
|
|
console.log(`│ ... (${lines.length - maxLines} more lines - check raw field for full response)`);
|
|
}
|
|
console.log(`╰${'─'.repeat(95)}`);
|
|
}
|
|
|
|
console.log(`${'─'.repeat(100)}\n`);
|
|
}
|
|
|
|
// ------------------------------------
|
|
// Export the main call helper
|
|
// ------------------------------------
|
|
export async function callSpeechLLM(messages) {
|
|
const backends = [
|
|
{ key: "primary", type: "vllm", url: process.env.LLM_PRIMARY_URL, model: process.env.LLM_PRIMARY_MODEL },
|
|
{ key: "secondary",type: "ollama", url: process.env.LLM_SECONDARY_URL,model: process.env.LLM_SECONDARY_MODEL },
|
|
{ key: "cloud", type: "openai", url: process.env.LLM_CLOUD_URL, model: process.env.LLM_CLOUD_MODEL },
|
|
{ key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL },
|
|
];
|
|
|
|
const failedBackends = [];
|
|
|
|
for (const b of backends) {
|
|
if (!b.url || !b.model) continue;
|
|
|
|
try {
|
|
const out = await tryBackend(b, messages);
|
|
logLLMCall(b, messages, out);
|
|
return out;
|
|
} catch (err) {
|
|
logLLMCall(b, messages, null, err);
|
|
failedBackends.push({ backend: b.key, error: err.message });
|
|
}
|
|
}
|
|
|
|
// All backends failed - log summary
|
|
console.error(`\n${'='.repeat(100)}`);
|
|
console.error(`🔴 ALL LLM BACKENDS FAILED`);
|
|
console.error(`${'='.repeat(100)}`);
|
|
failedBackends.forEach(({ backend, error }) => {
|
|
console.error(` ${backend.toUpperCase()}: ${error}`);
|
|
});
|
|
console.error(`${'='.repeat(100)}\n`);
|
|
|
|
throw new Error("all_backends_failed");
|
|
}
|