94 lines
3.3 KiB
JavaScript
94 lines
3.3 KiB
JavaScript
async function tryBackend(backend, messages) {
|
|
if (!backend.url || !backend.model) throw new Error("missing url/model");
|
|
|
|
const isOllama = backend.type === "ollama";
|
|
const isOpenAI = backend.type === "openai";
|
|
const isVllm = backend.type === "vllm";
|
|
const isLlamaCpp = backend.type === "llamacpp";
|
|
|
|
let endpoint = backend.url;
|
|
let headers = { "Content-Type": "application/json" };
|
|
if (isOpenAI) headers["Authorization"] = `Bearer ${OPENAI_API_KEY}`;
|
|
|
|
// Choose correct endpoint automatically
|
|
if (isOllama && !endpoint.endsWith("/api/chat")) endpoint += "/api/chat";
|
|
if ((isVllm || isLlamaCpp) && !endpoint.endsWith("/v1/completions")) endpoint += "/v1/completions";
|
|
if (isOpenAI && !endpoint.endsWith("/v1/chat/completions")) endpoint += "/v1/chat/completions";
|
|
|
|
// Build payload based on backend style
|
|
const body = (isVllm || isLlamaCpp)
|
|
? {
|
|
model: backend.model,
|
|
prompt: messages.map(m => m.content).join("\n"),
|
|
max_tokens: 400,
|
|
temperature: 0.3,
|
|
}
|
|
: isOllama
|
|
? { model: backend.model, messages, stream: false }
|
|
: { model: backend.model, messages, stream: false };
|
|
|
|
const resp = await fetch(endpoint, {
|
|
method: "POST",
|
|
headers,
|
|
body: JSON.stringify(body),
|
|
timeout: 120000,
|
|
});
|
|
if (!resp.ok) throw new Error(`${backend.key} HTTP ${resp.status}`);
|
|
const raw = await resp.text();
|
|
|
|
// 🧩 Normalize replies
|
|
let reply = "";
|
|
try {
|
|
if (isOllama) {
|
|
// Ollama sometimes returns NDJSON lines; merge them
|
|
const merged = raw
|
|
.split("\n")
|
|
.filter(line => line.trim().startsWith("{"))
|
|
.map(line => JSON.parse(line))
|
|
.map(obj => obj.message?.content || obj.response || "")
|
|
.join("");
|
|
reply = merged.trim();
|
|
} else {
|
|
const data = JSON.parse(raw);
|
|
console.log("🔍 RAW LLM RESPONSE:", JSON.stringify(data, null, 2));
|
|
reply =
|
|
data?.choices?.[0]?.text?.trim() ||
|
|
data?.choices?.[0]?.message?.content?.trim() ||
|
|
data?.message?.content?.trim() ||
|
|
"";
|
|
|
|
|
|
}
|
|
} catch (err) {
|
|
reply = `[parse error: ${err.message}]`;
|
|
}
|
|
|
|
return { reply, raw, backend: backend.key };
|
|
}
|
|
|
|
// ------------------------------------
|
|
// Export the main call helper
|
|
// ------------------------------------
|
|
export async function callSpeechLLM(messages) {
|
|
const backends = [
|
|
{ key: "primary", type: "vllm", url: process.env.LLM_PRIMARY_URL, model: process.env.LLM_PRIMARY_MODEL },
|
|
{ key: "secondary",type: "ollama", url: process.env.LLM_SECONDARY_URL,model: process.env.LLM_SECONDARY_MODEL },
|
|
{ key: "cloud", type: "openai", url: process.env.LLM_CLOUD_URL, model: process.env.LLM_CLOUD_MODEL },
|
|
{ key: "fallback", type: "llamacpp", url: process.env.LLM_FALLBACK_URL, model: process.env.LLM_FALLBACK_MODEL },
|
|
];
|
|
|
|
for (const b of backends) {
|
|
if (!b.url || !b.model) continue;
|
|
try {
|
|
console.log(`🧠 Trying backend: ${b.key.toUpperCase()} (${b.url})`);
|
|
const out = await tryBackend(b, messages);
|
|
console.log(`✅ Success via ${b.key.toUpperCase()}`);
|
|
return out;
|
|
} catch (err) {
|
|
console.warn(`⚠️ ${b.key.toUpperCase()} failed: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
throw new Error("all_backends_failed");
|
|
}
|