diff --git a/Dockerfile b/Dockerfile index 136366b..5d2491c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,7 @@ RUN mkdir -p /app/data COPY --from=builder /app/public ./public COPY --from=builder /app/.next/static ./.next/static COPY --from=builder /app/.next/standalone ./ +COPY --from=builder /app/open-sse ./open-sse EXPOSE 20128 diff --git a/open-sse/config/providerModels.js b/open-sse/config/providerModels.js index a830e05..bbafb3f 100644 --- a/open-sse/config/providerModels.js +++ b/open-sse/config/providerModels.js @@ -135,6 +135,10 @@ export const PROVIDER_MODELS = { { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" }, { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" }, { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" }, + // Embedding models + { id: "gemini-embedding-001", name: "Gemini Embedding 001", type: "embedding" }, + { id: "text-embedding-005", name: "Text Embedding 005", type: "embedding" }, + { id: "text-embedding-004", name: "Text Embedding 004 (Legacy)", type: "embedding" }, ], openrouter: [ { id: "auto", name: "Auto (Best Available)" }, diff --git a/open-sse/handlers/chatCore.js b/open-sse/handlers/chatCore.js index dee1c18..8c713d5 100644 --- a/open-sse/handlers/chatCore.js +++ b/open-sse/handlers/chatCore.js @@ -760,6 +760,22 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred ? translateNonStreamingResponse(responseBody, targetFormat, sourceFormat) : responseBody; + // Ensure OpenAI-required fields are present (needed for Letta and other strict clients) + if (!translatedResponse.object) translatedResponse.object = "chat.completion"; + if (!translatedResponse.created) translatedResponse.created = Math.floor(Date.now() / 1000); + + // Strip Azure-specific non-standard fields + if (translatedResponse.prompt_filter_results !== undefined) { + delete translatedResponse.prompt_filter_results; + } + if (translatedResponse?.choices) { + for (const choice of translatedResponse.choices) { + if (choice.content_filter_results !== undefined) { + delete choice.content_filter_results; + } + } + } + // Add buffer and filter usage for client (to prevent CLI context errors) if (translatedResponse?.usage) { const buffered = addBufferToUsage(translatedResponse.usage); diff --git a/open-sse/handlers/embeddingsCore.js b/open-sse/handlers/embeddingsCore.js index 49ba9a6..35fdade 100644 --- a/open-sse/handlers/embeddingsCore.js +++ b/open-sse/handlers/embeddingsCore.js @@ -4,16 +4,49 @@ import { HTTP_STATUS } from "../config/constants.js"; import { getExecutor } from "../executors/index.js"; import { refreshWithRetry } from "../services/tokenRefresh.js"; +// Google AI (Gemini) provider aliases / identifiers +const GEMINI_PROVIDERS = new Set(["gemini", "google_ai_studio"]); + +/** + * Check whether a provider targets the Google AI (Gemini) embeddings API. + * @param {string} provider + */ +function isGeminiProvider(provider) { + return GEMINI_PROVIDERS.has(provider); +} + /** * Build the embeddings request body for the target provider. - * Most OpenAI-compatible providers accept the same format. - * For providers that don't support embeddings natively (chat-only), we return an error. + * + * - OpenAI / openai-compatible / openrouter: standard { model, input } format. + * - Google AI (Gemini): different format per API spec. + * - Single input → embedContent body: { model, content: { parts: [{ text }] } } + * - Batch input → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] } */ -function buildEmbeddingsBody(model, input, encodingFormat) { - const body = { - model, - input - }; +function buildEmbeddingsBody(provider, model, input, encodingFormat) { + if (isGeminiProvider(provider)) { + // Normalize model name: Gemini API expects "models/" prefix + const geminiModel = model.startsWith("models/") ? model : `models/${model}`; + + if (Array.isArray(input)) { + // Batch request + return { + requests: input.map((text) => ({ + model: geminiModel, + content: { parts: [{ text: String(text) }] } + })) + }; + } else { + // Single request + return { + model: geminiModel, + content: { parts: [{ text: String(input) }] } + }; + } + } + + // Default: OpenAI format + const body = { model, input }; if (encodingFormat) { body.encoding_format = encodingFormat; } @@ -22,8 +55,24 @@ function buildEmbeddingsBody(model, input, encodingFormat) { /** * Build the URL for the embeddings endpoint based on the provider. + * @param {string} provider + * @param {string} model + * @param {object} credentials + * @param {string|string[]} input - used to select single vs batch endpoint for Gemini */ -function buildEmbeddingsUrl(provider, credentials) { +function buildEmbeddingsUrl(provider, model, credentials, input) { + if (isGeminiProvider(provider)) { + const apiKey = credentials.apiKey || credentials.accessToken; + // Normalize model name for URL path + const modelPath = model.startsWith("models/") ? model : `models/${model}`; + + if (Array.isArray(input)) { + // batchEmbedContents for array input (keeps response format consistent even for length=1) + return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:batchEmbedContents?key=${encodeURIComponent(apiKey)}`; + } + return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:embedContent?key=${encodeURIComponent(apiKey)}`; + } + switch (provider) { case "openai": return "https://api.openai.com/v1/embeddings"; @@ -46,6 +95,11 @@ function buildEmbeddingsUrl(provider, credentials) { function buildEmbeddingsHeaders(provider, credentials) { const headers = { "Content-Type": "application/json" }; + if (isGeminiProvider(provider)) { + // Gemini API uses API key as query param — no Authorization header needed + return headers; + } + switch (provider) { case "openai": case "openrouter": @@ -56,11 +110,7 @@ function buildEmbeddingsHeaders(provider, credentials) { } break; default: - if (provider?.startsWith?.("openai-compatible-")) { - headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`; - } else { - headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`; - } + headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`; } return headers; @@ -68,14 +118,52 @@ function buildEmbeddingsHeaders(provider, credentials) { /** * Normalize the embeddings response to OpenAI format. - * Most OpenAI-compatible providers already return this format. + * + * Gemini single response: + * { embedding: { values: [0.1, 0.2, ...] } } + * + * Gemini batch response: + * { embeddings: [{ values: [...] }, ...] } + * + * Target OpenAI format: + * { object: "list", data: [{ object: "embedding", index: 0, embedding: [...] }], model, usage: {...} } */ -function normalizeEmbeddingsResponse(responseBody, model) { +function normalizeEmbeddingsResponse(responseBody, model, provider) { // Already in OpenAI format if (responseBody.object === "list" && Array.isArray(responseBody.data)) { return responseBody; } + if (isGeminiProvider(provider)) { + let embeddingItems = []; + + if (Array.isArray(responseBody.embeddings)) { + // Batch response + embeddingItems = responseBody.embeddings.map((emb, idx) => ({ + object: "embedding", + index: idx, + embedding: emb.values || [] + })); + } else if (responseBody.embedding?.values) { + // Single response + embeddingItems = [{ + object: "embedding", + index: 0, + embedding: responseBody.embedding.values + }]; + } + + return { + object: "list", + data: embeddingItems, + model, + usage: { + prompt_tokens: 0, + total_tokens: 0 + } + }; + } + // Try to handle alternate formats gracefully return responseBody; } @@ -114,16 +202,16 @@ export async function handleEmbeddingsCore({ const encodingFormat = body.encoding_format || "float"; // Determine embeddings URL - const url = buildEmbeddingsUrl(provider, credentials); + const url = buildEmbeddingsUrl(provider, model, credentials, input); if (!url) { return createErrorResult( HTTP_STATUS.BAD_REQUEST, - `Provider '${provider}' does not support embeddings. Use openai, openrouter, or an openai-compatible provider.` + `Provider '${provider}' does not support embeddings. Use openai, openrouter, gemini, or an openai-compatible provider.` ); } const headers = buildEmbeddingsHeaders(provider, credentials); - const requestBody = buildEmbeddingsBody(model, input, encodingFormat); + const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat); log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`); @@ -162,7 +250,12 @@ export async function handleEmbeddingsCore({ // Retry with refreshed credentials try { const retryHeaders = buildEmbeddingsHeaders(provider, credentials); - providerResponse = await fetch(url, { + // Rebuild URL for Gemini since API key is embedded in query param + const retryUrl = isGeminiProvider(provider) + ? buildEmbeddingsUrl(provider, model, credentials, input) + : url; + + providerResponse = await fetch(retryUrl, { method: "POST", headers: retryHeaders, body: JSON.stringify(requestBody) @@ -193,7 +286,7 @@ export async function handleEmbeddingsCore({ await onRequestSuccess(); } - const normalized = normalizeEmbeddingsResponse(responseBody, model); + const normalized = normalizeEmbeddingsResponse(responseBody, model, provider); log?.debug?.("EMBEDDINGS", `Success | usage=${JSON.stringify(normalized.usage || {})}`); diff --git a/open-sse/utils/stream.js b/open-sse/utils/stream.js index 83f4871..0e54a6a 100644 --- a/open-sse/utils/stream.js +++ b/open-sse/utils/stream.js @@ -82,6 +82,25 @@ export function createSSEStream(options = {}) { const idFixed = fixInvalidId(parsed); + // Ensure OpenAI-required fields are present on streaming chunks (Letta compat) + let fieldsInjected = false; + if (!parsed.object) { parsed.object = "chat.completion.chunk"; fieldsInjected = true; } + if (!parsed.created) { parsed.created = Math.floor(Date.now() / 1000); fieldsInjected = true; } + + // Strip Azure-specific non-standard fields from streaming chunks + if (parsed.prompt_filter_results !== undefined) { + delete parsed.prompt_filter_results; + fieldsInjected = true; + } + if (parsed?.choices) { + for (const choice of parsed.choices) { + if (choice.content_filter_results !== undefined) { + delete choice.content_filter_results; + fieldsInjected = true; + } + } + } + if (!hasValuableContent(parsed, FORMATS.OPENAI)) { continue; } @@ -115,7 +134,7 @@ export function createSSEStream(options = {}) { parsed.usage = filterUsageForFormat(buffered, FORMATS.OPENAI); output = `data: ${JSON.stringify(parsed)}\n`; injectedUsage = true; - } else if (idFixed) { + } else if (idFixed || fieldsInjected) { output = `data: ${JSON.stringify(parsed)}\n`; injectedUsage = true; }