diff --git a/open-sse/executors/kiro.js b/open-sse/executors/kiro.js index 3a362ef..c288330 100644 --- a/open-sse/executors/kiro.js +++ b/open-sse/executors/kiro.js @@ -98,9 +98,16 @@ export class KiroExecutor extends BaseExecutor { if (!event) continue; const eventType = event.headers[":event-type"] || ""; + + // Track total content length for token estimation + if (!state.totalContentLength) state.totalContentLength = 0; + if (!state.contextUsagePercentage) state.contextUsagePercentage = 0; // Handle assistantResponseEvent if (eventType === "assistantResponseEvent" && event.payload?.content) { + const content = event.payload.content; + state.totalContentLength += content.length; + const chunk = { id: responseId, object: "chat.completion.chunk", @@ -109,8 +116,8 @@ export class KiroExecutor extends BaseExecutor { choices: [{ index: 0, delta: chunkIndex === 0 - ? { role: "assistant", content: event.payload.content } - : { content: event.payload.content }, + ? { role: "assistant", content } + : { content }, finish_reason: null }] }; @@ -233,25 +240,79 @@ export class KiroExecutor extends BaseExecutor { controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`)); } - // Detect end of stream - if ((eventType === "meteringEvent" || eventType === "contextUsageEvent") && !state.endDetected) { - state.endDetected = true; - if (!state.finishEmitted) { - state.finishEmitted = true; - const finishChunk = { - id: responseId, - object: "chat.completion.chunk", - created, - model, - choices: [{ - index: 0, - delta: {}, - finish_reason: state.hasToolCalls ? "tool_calls" : "stop" - }] - }; - controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finishChunk)}\n\n`)); + // Handle contextUsageEvent to extract contextUsagePercentage + if (eventType === "contextUsageEvent" && event.payload?.contextUsagePercentage) { + state.contextUsagePercentage = event.payload.contextUsagePercentage; + // Mark that we received context usage event + state.hasContextUsage = true; + } + + // Handle meteringEvent - mark that we received it + if (eventType === "meteringEvent") { + state.hasMeteringEvent = true; + } + + // Handle metricsEvent for token usage + if (eventType === "metricsEvent") { + // Extract usage data from metricsEvent payload + const metrics = event.payload?.metricsEvent || event.payload; + if (metrics && typeof metrics === 'object') { + const inputTokens = metrics.inputTokens || 0; + const outputTokens = metrics.outputTokens || 0; + + if (inputTokens > 0 || outputTokens > 0) { + state.usage = { + prompt_tokens: inputTokens, + completion_tokens: outputTokens, + total_tokens: inputTokens + outputTokens + }; + } } } + + // Emit final chunk only after receiving BOTH meteringEvent AND contextUsageEvent + if (state.hasMeteringEvent && state.hasContextUsage && !state.finishEmitted) { + state.finishEmitted = true; + + // Estimate tokens if not available from events + if (!state.usage) { + // Estimate output tokens from content length + const estimatedOutputTokens = state.totalContentLength > 0 + ? Math.max(1, Math.floor(state.totalContentLength / 4)) + : 0; + + // Estimate input tokens from contextUsagePercentage + // Kiro models typically have 200k context window + const estimatedInputTokens = state.contextUsagePercentage > 0 + ? Math.floor(state.contextUsagePercentage * 200000 / 100) + : 0; + + state.usage = { + prompt_tokens: estimatedInputTokens, + completion_tokens: estimatedOutputTokens, + total_tokens: estimatedInputTokens + estimatedOutputTokens + }; + } + + const finishChunk = { + id: responseId, + object: "chat.completion.chunk", + created, + model, + choices: [{ + index: 0, + delta: {}, + finish_reason: state.hasToolCalls ? "tool_calls" : "stop" + }] + }; + + // Include usage in final chunk if available + if (state.usage) { + finishChunk.usage = state.usage; + } + + controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finishChunk)}\n\n`)); + } } if (iterations >= maxIterations) { diff --git a/open-sse/translator/helpers/maxTokensHelper.js b/open-sse/translator/helpers/maxTokensHelper.js index a32eb3f..57235a0 100644 --- a/open-sse/translator/helpers/maxTokensHelper.js +++ b/open-sse/translator/helpers/maxTokensHelper.js @@ -12,7 +12,6 @@ export function adjustMaxTokens(body) { // Tool calls with large content (like writing files) need more tokens if (body.tools && Array.isArray(body.tools) && body.tools.length > 0) { if (maxTokens < DEFAULT_MIN_TOKENS) { - console.log(`[AUTO-ADJUST] max_tokens: ${maxTokens} → ${DEFAULT_MIN_TOKENS} (tool calling detected)`); maxTokens = DEFAULT_MIN_TOKENS; } } diff --git a/open-sse/translator/response/claude-to-openai.js b/open-sse/translator/response/claude-to-openai.js index 67a50f6..aa8ccd6 100644 --- a/open-sse/translator/response/claude-to-openai.js +++ b/open-sse/translator/response/claude-to-openai.js @@ -92,9 +92,34 @@ export function claudeToOpenAIResponse(chunk, state) { } case "message_delta": { + // Extract usage from message_delta event (Claude native format) + // Normalize to OpenAI format (prompt_tokens/completion_tokens) for consistent logging + if (chunk.usage && typeof chunk.usage === "object") { + const inputTokens = typeof chunk.usage.input_tokens === "number" ? chunk.usage.input_tokens : 0; + const outputTokens = typeof chunk.usage.output_tokens === "number" ? chunk.usage.output_tokens : 0; + const cacheReadTokens = typeof chunk.usage.cache_read_input_tokens === "number" ? chunk.usage.cache_read_input_tokens : 0; + const cacheCreationTokens = typeof chunk.usage.cache_creation_input_tokens === "number" ? chunk.usage.cache_creation_input_tokens : 0; + + // Use OpenAI format keys for consistent logging in stream.js + state.usage = { + prompt_tokens: inputTokens, + completion_tokens: outputTokens, + input_tokens: inputTokens, + output_tokens: outputTokens + }; + + // Store cache tokens if present + if (cacheReadTokens > 0) { + state.usage.cache_read_input_tokens = cacheReadTokens; + } + if (cacheCreationTokens > 0) { + state.usage.cache_creation_input_tokens = cacheCreationTokens; + } + } + if (chunk.delta?.stop_reason) { state.finishReason = convertStopReason(chunk.delta.stop_reason); - results.push({ + const finalChunk = { id: `chatcmpl-${state.messageId}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), @@ -104,7 +129,41 @@ export function claudeToOpenAIResponse(chunk, state) { delta: {}, finish_reason: state.finishReason }] - }); + }; + + // Include usage in final chunk if available + if (state.usage && typeof state.usage === "object") { + const inputTokens = state.usage.input_tokens || 0; + const outputTokens = state.usage.output_tokens || 0; + const cachedTokens = state.usage.cache_read_input_tokens || 0; + const cacheCreationTokens = state.usage.cache_creation_input_tokens || 0; + + // prompt_tokens = input_tokens + cache_read + cache_creation (all prompt-side tokens) + // completion_tokens = output_tokens + // total_tokens = prompt_tokens + completion_tokens + const promptTokens = inputTokens + cachedTokens + cacheCreationTokens; + const completionTokens = outputTokens; + const totalTokens = promptTokens + completionTokens; + + finalChunk.usage = { + prompt_tokens: promptTokens, + completion_tokens: completionTokens, + total_tokens: totalTokens + }; + + // Add prompt_tokens_details if cached tokens exist + if (cachedTokens > 0 || cacheCreationTokens > 0) { + finalChunk.usage.prompt_tokens_details = {}; + if (cachedTokens > 0) { + finalChunk.usage.prompt_tokens_details.cached_tokens = cachedTokens; + } + if (cacheCreationTokens > 0) { + finalChunk.usage.prompt_tokens_details.cache_creation_tokens = cacheCreationTokens; + } + } + } + + results.push(finalChunk); state.finishReasonSent = true; } break; diff --git a/open-sse/translator/response/gemini-to-openai.js b/open-sse/translator/response/gemini-to-openai.js index b236145..000cc1f 100644 --- a/open-sse/translator/response/gemini-to-openai.js +++ b/open-sse/translator/response/gemini-to-openai.js @@ -160,14 +160,56 @@ export function geminiToOpenAIResponse(chunk, state) { } } - // Finish reason + // Usage metadata - extract before finish reason so we can include it + const usageMeta = response.usageMetadata || chunk.usageMetadata; + if (usageMeta && typeof usageMeta === "object") { + const cachedTokens = typeof usageMeta.cachedContentTokenCount === "number" ? usageMeta.cachedContentTokenCount : 0; + const promptTokenCountRaw = typeof usageMeta.promptTokenCount === "number" ? usageMeta.promptTokenCount : 0; + const thoughtsTokens = typeof usageMeta.thoughtsTokenCount === "number" ? usageMeta.thoughtsTokenCount : 0; + let candidatesTokens = typeof usageMeta.candidatesTokenCount === "number" ? usageMeta.candidatesTokenCount : 0; + const totalTokens = typeof usageMeta.totalTokenCount === "number" ? usageMeta.totalTokenCount : 0; + + // prompt_tokens = promptTokenCount (includes cached tokens, matching claude-to-openai.js behavior) + const promptTokens = promptTokenCountRaw; + + // Fallback calculation if candidatesTokenCount is 0 but totalTokenCount exists + if (candidatesTokens === 0 && totalTokens > 0) { + candidatesTokens = totalTokens - promptTokenCountRaw - thoughtsTokens; + if (candidatesTokens < 0) candidatesTokens = 0; + } + + // completion_tokens = candidatesTokenCount + thoughtsTokenCount (match Go code) + const completionTokens = candidatesTokens + thoughtsTokens; + + state.usage = { + prompt_tokens: promptTokens, + completion_tokens: completionTokens, + total_tokens: totalTokens + }; + + // Add prompt_tokens_details if cached tokens exist + if (cachedTokens > 0) { + state.usage.prompt_tokens_details = { + cached_tokens: cachedTokens + }; + } + + // Add completion_tokens_details if reasoning tokens exist + if (thoughtsTokens > 0) { + state.usage.completion_tokens_details = { + reasoning_tokens: thoughtsTokens + }; + } + } + + // Finish reason - include usage in final chunk if (candidate.finishReason) { let finishReason = candidate.finishReason.toLowerCase(); if (finishReason === "stop" && state.toolCalls.size > 0) { finishReason = "tool_calls"; } - results.push({ + const finalChunk = { id: `chatcmpl-${state.messageId}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), @@ -177,24 +219,15 @@ export function geminiToOpenAIResponse(chunk, state) { delta: {}, finish_reason: finishReason }] - }); - state.finishReason = finishReason; - } - -// Usage metadata - const usage = response.usageMetadata || chunk.usageMetadata; - if (usage && typeof usage === 'object') { - const promptTokens = (usage.promptTokenCount || 0) + (usage.thoughtsTokenCount || 0); - state.usage = { - prompt_tokens: promptTokens, - completion_tokens: usage.candidatesTokenCount || 0, - total_tokens: usage.totalTokenCount || 0 }; - if (usage.thoughtsTokenCount > 0) { - state.usage.completion_tokens_details = { - reasoning_tokens: usage.thoughtsTokenCount - }; + + // Include usage in final chunk for downstream translators + if (state.usage) { + finalChunk.usage = state.usage; } + + results.push(finalChunk); + state.finishReason = finishReason; } return results.length > 0 ? results : null; diff --git a/open-sse/translator/response/kiro-to-openai.js b/open-sse/translator/response/kiro-to-openai.js index a0e4aeb..f374d00 100644 --- a/open-sse/translator/response/kiro-to-openai.js +++ b/open-sse/translator/response/kiro-to-openai.js @@ -17,9 +17,6 @@ export function convertKiroToOpenAI(chunk, state) { if (chunk.object === "chat.completion.chunk" && chunk.choices) { return chunk; } - - - console.log("chunk", chunk); // Handle string chunk (raw SSE data) let data = chunk; @@ -161,6 +158,11 @@ export function convertKiroToOpenAI(chunk, state) { }] }; + // Include usage in final chunk if available + if (state.usage && typeof state.usage === "object") { + openaiChunk.usage = state.usage; + } + return openaiChunk; } diff --git a/open-sse/translator/response/openai-responses.js b/open-sse/translator/response/openai-responses.js index 9789bbb..d812340 100644 --- a/open-sse/translator/response/openai-responses.js +++ b/open-sse/translator/response/openai-responses.js @@ -474,9 +474,38 @@ export function openaiResponsesToOpenAIResponse(chunk, state) { // Response completed if (eventType === "response.completed") { + // Extract usage from response.completed event + const responseUsage = data.response?.usage; + if (responseUsage && typeof responseUsage === "object") { + const inputTokens = responseUsage.input_tokens || responseUsage.prompt_tokens || 0; + const outputTokens = responseUsage.output_tokens || responseUsage.completion_tokens || 0; + const cacheReadTokens = responseUsage.cache_read_input_tokens || 0; + const cacheCreationTokens = responseUsage.cache_creation_input_tokens || 0; + + // prompt_tokens = input_tokens + cache_read + cache_creation (all prompt-side tokens) + const promptTokens = inputTokens + cacheReadTokens + cacheCreationTokens; + + state.usage = { + prompt_tokens: promptTokens, + completion_tokens: outputTokens, + total_tokens: promptTokens + outputTokens + }; + + // Add prompt_tokens_details if cache tokens exist + if (cacheReadTokens > 0 || cacheCreationTokens > 0) { + state.usage.prompt_tokens_details = {}; + if (cacheReadTokens > 0) { + state.usage.prompt_tokens_details.cached_tokens = cacheReadTokens; + } + if (cacheCreationTokens > 0) { + state.usage.prompt_tokens_details.cache_creation_tokens = cacheCreationTokens; + } + } + } + if (!state.finishReasonSent) { state.finishReasonSent = true; - return { + const finalChunk = { id: state.chatId, object: "chat.completion.chunk", created: state.created, @@ -487,6 +516,13 @@ export function openaiResponsesToOpenAIResponse(chunk, state) { finish_reason: "stop" }] }; + + // Include usage in final chunk if available + if (state.usage && typeof state.usage === "object") { + finalChunk.usage = state.usage; + } + + return finalChunk; } return null; } diff --git a/open-sse/translator/response/openai-to-claude.js b/open-sse/translator/response/openai-to-claude.js index ad625ed..f0b636c 100644 --- a/open-sse/translator/response/openai-to-claude.js +++ b/open-sse/translator/response/openai-to-claude.js @@ -33,6 +33,40 @@ export function openaiToClaudeResponse(chunk, state) { const choice = chunk.choices[0]; const delta = choice.delta; + // Track usage from OpenAI chunk if available + if (chunk.usage && typeof chunk.usage === "object") { + const promptTokens = typeof chunk.usage.prompt_tokens === "number" ? chunk.usage.prompt_tokens : 0; + const outputTokens = typeof chunk.usage.completion_tokens === "number" ? chunk.usage.completion_tokens : 0; + + // Extract cache tokens from prompt_tokens_details + const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens; + const cacheCreationTokens = chunk.usage.prompt_tokens_details?.cache_creation_tokens; + const cacheReadTokens = typeof cachedTokens === "number" ? cachedTokens : 0; + const cacheCreateTokens = typeof cacheCreationTokens === "number" ? cacheCreationTokens : 0; + + // input_tokens = prompt_tokens - cached_tokens - cache_creation_tokens + // Because OpenAI's prompt_tokens includes all prompt-side tokens + const inputTokens = promptTokens - cacheReadTokens - cacheCreateTokens; + + state.usage = { + input_tokens: inputTokens, + output_tokens: outputTokens + }; + + // Add cache_read_input_tokens if present + if (cacheReadTokens > 0) { + state.usage.cache_read_input_tokens = cacheReadTokens; + } + + // Add cache_creation_input_tokens if present + if (cacheCreateTokens > 0) { + state.usage.cache_creation_input_tokens = cacheCreateTokens; + } + + // Note: completion_tokens_details.reasoning_tokens is already included in output_tokens + // No need to add separately as Claude expects total output_tokens + } + // First chunk - ALWAYS send message_start first if (!state.messageStartSent) { state.messageStartSent = true; @@ -158,10 +192,12 @@ export function openaiToClaudeResponse(chunk, state) { }); } + // Use tracked usage or default to 0 + const finalUsage = state.usage || { input_tokens: 0, output_tokens: 0 }; results.push({ type: "message_delta", delta: { stop_reason: convertFinishReason(choice.finish_reason) }, - usage: { output_tokens: 0 } + usage: finalUsage }); results.push({ type: "message_stop" }); } diff --git a/open-sse/utils/stream.js b/open-sse/utils/stream.js index cebd863..811c182 100644 --- a/open-sse/utils/stream.js +++ b/open-sse/utils/stream.js @@ -43,12 +43,13 @@ function extractUsage(chunk) { reasoning_tokens: chunk.usage.completion_tokens_details?.reasoning_tokens }); } - // Gemini format + // Gemini format (Antigravity) if (chunk.usageMetadata && typeof chunk.usageMetadata === 'object') { return normalizeUsage({ - prompt_tokens: chunk.usageMetadata.promptTokenCount || 0, - completion_tokens: chunk.usageMetadata.candidatesTokenCount || 0, - reasoning_tokens: chunk.usageMetadata.thoughtsTokenCount + prompt_tokens: chunk.usageMetadata?.promptTokenCount || 0, + completion_tokens: chunk.usageMetadata?.candidatesTokenCount || 0, + cached_tokens: chunk.usageMetadata?.cachedContentTokenCount, + reasoning_tokens: chunk.usageMetadata?.thoughtsTokenCount }); } return null; @@ -90,21 +91,27 @@ function logUsage(provider, usage, model = null, connectionId = null) { if (!usage || typeof usage !== 'object') return; const p = provider?.toUpperCase() || "UNKNOWN"; - const inTokens = usage?.prompt_tokens || 0; - const outTokens = usage?.completion_tokens || 0; + + // Support both formats: + // - OpenAI: prompt_tokens, completion_tokens + // - Claude: input_tokens, output_tokens + const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0; + const outTokens = usage?.completion_tokens || usage?.output_tokens || 0; let msg = `[${getTimeString()}] 📊 [USAGE] ${p} | in=${inTokens} | out=${outTokens}`; if (connectionId) msg += ` | account=${connectionId.slice(0, 8)}...`; + // Support both formats: cache_read_input_tokens (Claude) and cached_tokens (OpenAI/Gemini) + const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens; + if (cacheRead) msg += ` | cache_read=${cacheRead}`; + if (usage.cache_creation_input_tokens) msg += ` | cache_write=${usage.cache_creation_input_tokens}`; - if (usage.cache_read_input_tokens) msg += ` | cache_read=${usage.cache_read_input_tokens}`; - if (usage.cached_tokens) msg += ` | cached=${usage.cached_tokens}`; if (usage.reasoning_tokens) msg += ` | reasoning=${usage.reasoning_tokens}`; console.log(`${COLORS.green}${msg}${COLORS.reset}`); // Log to log.txt - appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => {}); + appendRequestLog({ model, provider, connectionId, tokens: usage, status: "200 OK" }).catch(() => { }); // Save to DB saveRequestUsage({ @@ -147,7 +154,7 @@ export function formatSSE(data, sourceFormat) { if (data === null || data === undefined) { return "data: null\n\n"; } - + if (data && data.done) return "data: [DONE]\n\n"; // OpenAI Responses API format: has event field @@ -230,7 +237,7 @@ export function createSSEStream(options = {}) { const parsed = JSON.parse(trimmed.slice(5).trim()); const extracted = extractUsage(parsed); if (extracted) usage = extracted; - } catch {} + } catch { } } // Normalize: ensure "data: " has space let output; @@ -263,7 +270,7 @@ export function createSSEStream(options = {}) { // Translate: targetFormat -> openai -> sourceFormat const translated = translateResponse(targetFormat, sourceFormat, parsed, state); - + // Log OpenAI intermediate chunks (if available) if (translated?._openaiIntermediate) { for (const item of translated._openaiIntermediate) { @@ -271,7 +278,7 @@ export function createSSEStream(options = {}) { reqLogger?.appendOpenAIChunk?.(openaiOutput); } } - + if (translated?.length > 0) { for (const item of translated) { const output = formatSSE(item, sourceFormat); @@ -297,11 +304,11 @@ export function createSSEStream(options = {}) { reqLogger?.appendConvertedChunk?.(output); controller.enqueue(sharedEncoder.encode(output)); } -if (usage && typeof usage === 'object') { + if (usage && typeof usage === 'object') { logUsage(provider, usage, model, connectionId); } else { // No usage data available - still mark request as completed - appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => {}); + appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { }); } return; } @@ -311,7 +318,7 @@ if (usage && typeof usage === 'object') { const parsed = parseSSELine(buffer.trim()); if (parsed && !parsed.done) { const translated = translateResponse(targetFormat, sourceFormat, parsed, state); - + // Log OpenAI intermediate chunks if (translated?._openaiIntermediate) { for (const item of translated._openaiIntermediate) { @@ -319,7 +326,7 @@ if (usage && typeof usage === 'object') { reqLogger?.appendOpenAIChunk?.(openaiOutput); } } - + if (translated?.length > 0) { for (const item of translated) { const output = formatSSE(item, sourceFormat); @@ -332,7 +339,7 @@ if (usage && typeof usage === 'object') { // Flush remaining events (only once at stream end) const flushed = translateResponse(targetFormat, sourceFormat, null, state); - + // Log OpenAI intermediate chunks for flushed events if (flushed?._openaiIntermediate) { for (const item of flushed._openaiIntermediate) { @@ -340,7 +347,7 @@ if (usage && typeof usage === 'object') { reqLogger?.appendOpenAIChunk?.(openaiOutput); } } - + if (flushed?.length > 0) { for (const item of flushed) { const output = formatSSE(item, sourceFormat); @@ -354,11 +361,11 @@ if (usage && typeof usage === 'object') { reqLogger?.appendConvertedChunk?.(doneOutput); controller.enqueue(sharedEncoder.encode(doneOutput)); -if (state?.usage && typeof state.usage === 'object') { + if (state?.usage && typeof state.usage === 'object') { logUsage(state.provider || targetFormat, state.usage, model, connectionId); } else { // No usage data available - still mark request as completed - appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => {}); + appendRequestLog({ model, provider, connectionId, tokens: null, status: "200 OK" }).catch(() => { }); } } catch (error) { console.log("Error in flush:", error);