diff --git a/src/app/api/v1beta/models/[...path]/route.js b/src/app/api/v1beta/models/[...path]/route.js index cb9a63c..5de99f1 100644 --- a/src/app/api/v1beta/models/[...path]/route.js +++ b/src/app/api/v1beta/models/[...path]/route.js @@ -28,34 +28,59 @@ export async function OPTIONS() { } /** - * POST /v1beta/models/{model}:generateContent - Gemini compatible endpoint - * Converts Gemini format to internal format and handles via handleChat + * POST /v1beta/models/{model}:generateContent — non-streaming + * POST /v1beta/models/{model}:streamGenerateContent — streaming (SSE) + * + * Streaming intent is determined by the URL action suffix (canonical Gemini API + * convention), NOT by a body field. generationConfig.stream is not a real + * Gemini API field and Gemini CLI never sets it. + * + * The @google/genai SDK always uses :streamGenerateContent?alt=sse for chat. + * The upstream handleChat returns OpenAI SSE format; we transform it to + * Gemini SSE format on the fly via transformOpenAISSEToGeminiSSE(). */ export async function POST(request, { params }) { await ensureInitialized(); try { const { path } = await params; - // path = ["provider", "model:generateContent"] or ["model:generateContent"] - + // path = ["provider", "model:action"] or ["model:action"] + let model; + let action; // ":generateContent" | ":streamGenerateContent" + if (path.length >= 2) { // Format: /v1beta/models/provider/model:generateContent const provider = path[0]; const modelAction = path[1]; - const modelName = modelAction.replace(":generateContent", "").replace(":streamGenerateContent", ""); - model = `${provider}/${modelName}`; + action = modelAction.includes(":streamGenerateContent") + ? ":streamGenerateContent" + : ":generateContent"; + const modelName = modelAction + .replace(":streamGenerateContent", "") + .replace(":generateContent", ""); + model = provider + "/" + modelName; } else { // Format: /v1beta/models/model:generateContent const modelAction = path[0]; - model = modelAction.replace(":generateContent", "").replace(":streamGenerateContent", ""); + action = modelAction.includes(":streamGenerateContent") + ? ":streamGenerateContent" + : ":generateContent"; + model = modelAction + .replace(":streamGenerateContent", "") + .replace(":generateContent", ""); } const body = await request.json(); - - // Convert Gemini format to OpenAI/internal format - const convertedBody = convertGeminiToInternal(body, model); - + + // Streaming is determined by URL action suffix: + // :streamGenerateContent => stream: true (SSE) + // :generateContent => stream: false (plain JSON) + const stream = action === ":streamGenerateContent"; + + // Convert Gemini request format to OpenAI/internal format + const convertedBody = convertGeminiToInternal(body, model, stream); + // Create new request with converted body const newRequest = new Request(request.url, { method: "POST", @@ -63,7 +88,17 @@ export async function POST(request, { params }) { body: JSON.stringify(convertedBody), }); - return await handleChat(newRequest); + const response = await handleChat(newRequest); + + if (stream) { + // Transform OpenAI SSE => Gemini SSE on the fly. + // The @google/genai SDK always uses :streamGenerateContent?alt=sse and + // expects Gemini SSE chunks (no [DONE] sentinel — stream just closes). + return transformOpenAISSEToGeminiSSE(response, model); + } else { + // Convert OpenAI JSON response => Gemini GenerateContentResponse + return await convertOpenAIResponseToGemini(response, model); + } } catch (error) { console.log("Error handling Gemini request:", error); return Response.json( @@ -74,9 +109,13 @@ export async function POST(request, { params }) { } /** - * Convert Gemini request format to internal format + * Convert Gemini request format to OpenAI/internal format. + * + * @param {object} geminiBody - parsed Gemini request body + * @param {string} model - resolved model string (e.g. "gemini-pro-high") + * @param {boolean} stream - whether to stream (from URL action) */ -function convertGeminiToInternal(geminiBody, model) { +function convertGeminiToInternal(geminiBody, model, stream) { const messages = []; // Convert system instruction @@ -98,9 +137,6 @@ function convertGeminiToInternal(geminiBody, model) { } } - // Determine if streaming - const stream = geminiBody.generationConfig?.stream !== false; - return { model, messages, @@ -111,3 +147,183 @@ function convertGeminiToInternal(geminiBody, model) { }; } +/** Map OpenAI finish_reason => Gemini finishReason */ +const FINISH_REASON_MAP = { + stop: "STOP", + length: "MAX_TOKENS", + tool_calls: "STOP", + content_filter: "SAFETY", +}; + +/** + * Transform an OpenAI SSE stream into a Gemini SSE stream. + * + * OpenAI SSE format (what handleChat returns): + * data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":null}]} + * data: {"choices":[{"delta":{},"finish_reason":"stop"}],"usage":{...}} + * data: [DONE] + * + * Gemini SSE format (what @google/genai SDK expects): + * data: {"candidates":[{"content":{"role":"model","parts":[{"text":"Hi"}]},"index":0}]} + * data: {"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP","index":0}],"usageMetadata":{...}} + * (stream closes — no [DONE]) + */ +function transformOpenAISSEToGeminiSSE(upstreamResponse, model) { + if (!upstreamResponse.ok || !upstreamResponse.body) { + return upstreamResponse; + } + + const decoder = new TextDecoder(); + const encoder = new TextEncoder(); + + const transformStream = new TransformStream({ + transform(chunk, controller) { + const text = decoder.decode(chunk, { stream: true }); + const lines = text.split("\n"); + + for (const line of lines) { + if (!line.startsWith("data:")) continue; + + const data = line.slice(5).trim(); + + // Drop empty lines and the OpenAI [DONE] sentinel. + // Gemini SSE ends by stream close, no sentinel needed. + if (!data || data === "[DONE]") continue; + + let parsed; + try { + parsed = JSON.parse(data); + } catch { + continue; + } + + const choice = parsed.choices?.[0]; + if (!choice) continue; + + const delta = choice.delta || {}; + + const parts = []; + if (delta.reasoning_content) { + parts.push({ text: delta.reasoning_content, thought: true }); + } + if (delta.content) { + parts.push({ text: delta.content }); + } + + // Skip pure role-only deltas with no content and no finish signal + if (parts.length === 0 && !choice.finish_reason) continue; + + const candidate = { + content: { + role: "model", + parts: parts.length > 0 ? parts : [{ text: "" }], + }, + index: 0, + }; + + if (choice.finish_reason) { + candidate.finishReason = FINISH_REASON_MAP[choice.finish_reason] || "STOP"; + } + + const geminiChunk = { candidates: [candidate] }; + + // Attach usage + modelVersion on the final chunk (when finish_reason is set) + if (choice.finish_reason && parsed.usage) { + geminiChunk.usageMetadata = { + promptTokenCount: parsed.usage.prompt_tokens || 0, + candidatesTokenCount: parsed.usage.completion_tokens || 0, + totalTokenCount: parsed.usage.total_tokens || 0, + }; + const reasoningTokens = + parsed.usage.completion_tokens_details?.reasoning_tokens; + if (reasoningTokens) { + geminiChunk.usageMetadata.thoughtsTokenCount = reasoningTokens; + } + geminiChunk.modelVersion = parsed.model || model; + } + + controller.enqueue( + encoder.encode("data: " + JSON.stringify(geminiChunk) + "\r\n\r\n") + ); + } + }, + // No flush() needed: Gemini SSE ends by stream close, not a sentinel + }); + + return new Response(upstreamResponse.body.pipeThrough(transformStream), { + status: 200, + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Access-Control-Allow-Origin": "*", + }, + }); +} + +/** + * Convert an OpenAI chat.completion JSON response into a Gemini + * GenerateContentResponse so that Gemini CLI can parse it. + */ +async function convertOpenAIResponseToGemini(response, model) { + if (!response.ok) return response; + + let body; + try { + body = await response.json(); + } catch { + return response; + } + + if (body.candidates) return Response.json(body, { + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); + + if (body.error) return Response.json(body, { + status: response.status, + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); + + const choice = body.choices?.[0]; + if (!choice) { + return Response.json(body, { + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); + } + + const { message, finish_reason } = choice; + + const parts = []; + if (message.reasoning_content) { + parts.push({ text: message.reasoning_content, thought: true }); + } + parts.push({ text: message.content || "" }); + + const finishReason = FINISH_REASON_MAP[finish_reason] || "STOP"; + + const geminiResponse = { + candidates: [ + { + content: { role: "model", parts }, + finishReason, + index: 0, + }, + ], + modelVersion: body.model || model, + }; + + if (body.usage) { + geminiResponse.usageMetadata = { + promptTokenCount: body.usage.prompt_tokens || 0, + candidatesTokenCount: body.usage.completion_tokens || 0, + totalTokenCount: body.usage.total_tokens || 0, + }; + const reasoningTokens = body.usage.completion_tokens_details?.reasoning_tokens; + if (reasoningTokens) { + geminiResponse.usageMetadata.thoughtsTokenCount = reasoningTokens; + } + } + + return Response.json(geminiResponse, { + headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" } + }); +}