From 0c832c9c594ff133dd702168ff27d6169e041c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=BB=D0=B0=D0=B4=D0=B8=D0=BC=D0=B8=D1=80=20=D0=90?= =?UTF-8?q?=D0=BA=D0=B8=D0=BC=D0=BE=D0=B2?= Date: Mon, 2 Mar 2026 05:36:08 +0300 Subject: [PATCH] feat(gemini): convert OpenAI SSE to Gemini SSE format in /v1beta/models route (#225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(translator): filter nameless hosted tools when converting Responses API to Chat format Codex CLI sends "hosted" tools (e.g. `request_user_input`) via the OpenAI Responses API. These tools have no explicit `name` field. The previous `body.tools.map()` pass propagated `name: undefined` into the resulting Chat Completions function declarations, which then became anonymous `functionDeclarations` after the OpenAI→Gemini translation step. Gemini strictly requires every function declaration to have a valid name and rejects the entire request with: GenerateContentRequest.tools[0].function_declarations[4].name: Invalid function name. Must start with a letter or an underscore. Fix: filter out any Responses API tool that lacks a non-empty `name` string before converting to `{ type: "function", function: { name, ... } }`. Named function tools are unaffected; only unnamed hosted tools are skipped. Fixes: Gemini 400 error when Codex CLI is routed through 9router. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude * feat(gemini): convert OpenAI SSE to Gemini SSE format in /v1beta/models route The @google/genai SDK always uses :streamGenerateContent?alt=sse for chat and expects Gemini SSE chunk format. The upstream handleChat returns OpenAI SSE format, causing the SDK to crash on the [DONE] sentinel. Changes: - Add transformOpenAISSEToGeminiSSE() using TransformStream that converts each OpenAI SSE chunk (choices[0].delta) to Gemini SSE format (candidates[0].content.parts) on the fly - Drop the OpenAI [DONE] sentinel (Gemini SSE ends by stream close) - Map finish_reason -> finishReason, attach usageMetadata on final chunk - Support reasoning_content -> thought: true parts for thinking models - Refactor finishReasonMap to shared FINISH_REASON_MAP constant - Fix streaming dispatch: stream=true now calls transformOpenAISSEToGeminiSSE instead of passing OpenAI SSE through raw Fixes: SyntaxError: "[DONE]" is not valid JSON in Gemini CLI 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --------- Co-authored-by: Claude --- src/app/api/v1beta/models/[...path]/route.js | 250 +++++++++++++++++-- 1 file changed, 233 insertions(+), 17 deletions(-) diff --git a/src/app/api/v1beta/models/[...path]/route.js b/src/app/api/v1beta/models/[...path]/route.js index cb9a63c..5de99f1 100644 --- a/src/app/api/v1beta/models/[...path]/route.js +++ b/src/app/api/v1beta/models/[...path]/route.js @@ -28,34 +28,59 @@ export async function OPTIONS() { } /** - * POST /v1beta/models/{model}:generateContent - Gemini compatible endpoint - * Converts Gemini format to internal format and handles via handleChat + * POST /v1beta/models/{model}:generateContent — non-streaming + * POST /v1beta/models/{model}:streamGenerateContent — streaming (SSE) + * + * Streaming intent is determined by the URL action suffix (canonical Gemini API + * convention), NOT by a body field. generationConfig.stream is not a real + * Gemini API field and Gemini CLI never sets it. + * + * The @google/genai SDK always uses :streamGenerateContent?alt=sse for chat. + * The upstream handleChat returns OpenAI SSE format; we transform it to + * Gemini SSE format on the fly via transformOpenAISSEToGeminiSSE(). */ export async function POST(request, { params }) { await ensureInitialized(); try { const { path } = await params; - // path = ["provider", "model:generateContent"] or ["model:generateContent"] - + // path = ["provider", "model:action"] or ["model:action"] + let model; + let action; // ":generateContent" | ":streamGenerateContent" + if (path.length >= 2) { // Format: /v1beta/models/provider/model:generateContent const provider = path[0]; const modelAction = path[1]; - const modelName = modelAction.replace(":generateContent", "").replace(":streamGenerateContent", ""); - model = `${provider}/${modelName}`; + action = modelAction.includes(":streamGenerateContent") + ? ":streamGenerateContent" + : ":generateContent"; + const modelName = modelAction + .replace(":streamGenerateContent", "") + .replace(":generateContent", ""); + model = provider + "/" + modelName; } else { // Format: /v1beta/models/model:generateContent const modelAction = path[0]; - model = modelAction.replace(":generateContent", "").replace(":streamGenerateContent", ""); + action = modelAction.includes(":streamGenerateContent") + ? ":streamGenerateContent" + : ":generateContent"; + model = modelAction + .replace(":streamGenerateContent", "") + .replace(":generateContent", ""); } const body = await request.json(); - - // Convert Gemini format to OpenAI/internal format - const convertedBody = convertGeminiToInternal(body, model); - + + // Streaming is determined by URL action suffix: + // :streamGenerateContent => stream: true (SSE) + // :generateContent => stream: false (plain JSON) + const stream = action === ":streamGenerateContent"; + + // Convert Gemini request format to OpenAI/internal format + const convertedBody = convertGeminiToInternal(body, model, stream); + // Create new request with converted body const newRequest = new Request(request.url, { method: "POST", @@ -63,7 +88,17 @@ export async function POST(request, { params }) { body: JSON.stringify(convertedBody), }); - return await handleChat(newRequest); + const response = await handleChat(newRequest); + + if (stream) { + // Transform OpenAI SSE => Gemini SSE on the fly. + // The @google/genai SDK always uses :streamGenerateContent?alt=sse and + // expects Gemini SSE chunks (no [DONE] sentinel — stream just closes). + return transformOpenAISSEToGeminiSSE(response, model); + } else { + // Convert OpenAI JSON response => Gemini GenerateContentResponse + return await convertOpenAIResponseToGemini(response, model); + } } catch (error) { console.log("Error handling Gemini request:", error); return Response.json( @@ -74,9 +109,13 @@ export async function POST(request, { params }) { } /** - * Convert Gemini request format to internal format + * Convert Gemini request format to OpenAI/internal format. + * + * @param {object} geminiBody - parsed Gemini request body + * @param {string} model - resolved model string (e.g. "gemini-pro-high") + * @param {boolean} stream - whether to stream (from URL action) */ -function convertGeminiToInternal(geminiBody, model) { +function convertGeminiToInternal(geminiBody, model, stream) { const messages = []; // Convert system instruction @@ -98,9 +137,6 @@ function convertGeminiToInternal(geminiBody, model) { } } - // Determine if streaming - const stream = geminiBody.generationConfig?.stream !== false; - return { model, messages, @@ -111,3 +147,183 @@ function convertGeminiToInternal(geminiBody, model) { }; } +/** Map OpenAI finish_reason => Gemini finishReason */ +const FINISH_REASON_MAP = { + stop: "STOP", + length: "MAX_TOKENS", + tool_calls: "STOP", + content_filter: "SAFETY", +}; + +/** + * Transform an OpenAI SSE stream into a Gemini SSE stream. + * + * OpenAI SSE format (what handleChat returns): + * data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":null}]} + * data: {"choices":[{"delta":{},"finish_reason":"stop"}],"usage":{...}} + * data: [DONE] + * + * Gemini SSE format (what @google/genai SDK expects): + * data: {"candidates":[{"content":{"role":"model","parts":[{"text":"Hi"}]},"index":0}]} + * data: {"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP","index":0}],"usageMetadata":{...}} + * (stream closes — no [DONE]) + */ +function transformOpenAISSEToGeminiSSE(upstreamResponse, model) { + if (!upstreamResponse.ok || !upstreamResponse.body) { + return upstreamResponse; + } + + const decoder = new TextDecoder(); + const encoder = new TextEncoder(); + + const transformStream = new TransformStream({ + transform(chunk, controller) { + const text = decoder.decode(chunk, { stream: true }); + const lines = text.split("\n"); + + for (const line of lines) { + if (!line.startsWith("data:")) continue; + + const data = line.slice(5).trim(); + + // Drop empty lines and the OpenAI [DONE] sentinel. + // Gemini SSE ends by stream close, no sentinel needed. + if (!data || data === "[DONE]") continue; + + let parsed; + try { + parsed = JSON.parse(data); + } catch { + continue; + } + + const choice = parsed.choices?.[0]; + if (!choice) continue; + + const delta = choice.delta || {}; + + const parts = []; + if (delta.reasoning_content) { + parts.push({ text: delta.reasoning_content, thought: true }); + } + if (delta.content) { + parts.push({ text: delta.content }); + } + + // Skip pure role-only deltas with no content and no finish signal + if (parts.length === 0 && !choice.finish_reason) continue; + + const candidate = { + content: { + role: "model", + parts: parts.length > 0 ? parts : [{ text: "" }], + }, + index: 0, + }; + + if (choice.finish_reason) { + candidate.finishReason = FINISH_REASON_MAP[choice.finish_reason] || "STOP"; + } + + const geminiChunk = { candidates: [candidate] }; + + // Attach usage + modelVersion on the final chunk (when finish_reason is set) + if (choice.finish_reason && parsed.usage) { + geminiChunk.usageMetadata = { + promptTokenCount: parsed.usage.prompt_tokens || 0, + candidatesTokenCount: parsed.usage.completion_tokens || 0, + totalTokenCount: parsed.usage.total_tokens || 0, + }; + const reasoningTokens = + parsed.usage.completion_tokens_details?.reasoning_tokens; + if (reasoningTokens) { + geminiChunk.usageMetadata.thoughtsTokenCount = reasoningTokens; + } + geminiChunk.modelVersion = parsed.model || model; + } + + controller.enqueue( + encoder.encode("data: " + JSON.stringify(geminiChunk) + "\r\n\r\n") + ); + } + }, + // No flush() needed: Gemini SSE ends by stream close, not a sentinel + }); + + return new Response(upstreamResponse.body.pipeThrough(transformStream), { + status: 200, + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Access-Control-Allow-Origin": "*", + }, + }); +} + +/** + * Convert an OpenAI chat.completion JSON response into a Gemini + * GenerateContentResponse so that Gemini CLI can parse it. + */ +async function convertOpenAIResponseToGemini(response, model) { + if (!response.ok) return response; + + let body; + try { + body = await response.json(); + } catch { + return response; + } + + if (body.candidates) return Response.json(body, { + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); + + if (body.error) return Response.json(body, { + status: response.status, + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); + + const choice = body.choices?.[0]; + if (!choice) { + return Response.json(body, { + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); + } + + const { message, finish_reason } = choice; + + const parts = []; + if (message.reasoning_content) { + parts.push({ text: message.reasoning_content, thought: true }); + } + parts.push({ text: message.content || "" }); + + const finishReason = FINISH_REASON_MAP[finish_reason] || "STOP"; + + const geminiResponse = { + candidates: [ + { + content: { role: "model", parts }, + finishReason, + index: 0, + }, + ], + modelVersion: body.model || model, + }; + + if (body.usage) { + geminiResponse.usageMetadata = { + promptTokenCount: body.usage.prompt_tokens || 0, + candidatesTokenCount: body.usage.completion_tokens || 0, + totalTokenCount: body.usage.total_tokens || 0, + }; + const reasoningTokens = body.usage.completion_tokens_details?.reasoning_tokens; + if (reasoningTokens) { + geminiResponse.usageMetadata.thoughtsTokenCount = reasoningTokens; + } + } + + return Response.json(geminiResponse, { + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); +}