From 4a575f21a2ef315ddd2b0b6969b07d6190657a24 Mon Sep 17 00:00:00 2001 From: decolua Date: Sat, 16 May 2026 11:20:08 +0700 Subject: [PATCH] https://github.com/decolua/9router/pull/1167 https://github.com/decolua/9router/pull/1166 --- open-sse/config/kiroConstants.js | 262 ++++++++++++++ open-sse/config/providerModels.js | 11 + open-sse/executors/kiro.js | 37 ++ open-sse/services/kiroModels.js | 332 ++++++++++++++++++ open-sse/translator/request/openai-to-kiro.js | 52 ++- .../translator/response/kiro-to-openai.js | 14 +- src/app/api/providers/[id]/models/route.js | 72 ++-- src/app/api/v1/models/route.js | 1 + tests/unit/openai-to-kiro.test.js | 146 ++++++++ 9 files changed, 884 insertions(+), 43 deletions(-) create mode 100644 open-sse/config/kiroConstants.js create mode 100644 open-sse/services/kiroModels.js create mode 100644 tests/unit/openai-to-kiro.test.js diff --git a/open-sse/config/kiroConstants.js b/open-sse/config/kiroConstants.js new file mode 100644 index 0000000..03e64f3 --- /dev/null +++ b/open-sse/config/kiroConstants.js @@ -0,0 +1,262 @@ +/** + * Kiro-specific constants and helpers. + * + * Mirrors the behaviour of `internal/translator/kiro/common/constants.go` and + * `internal/translator/kiro/claude/kiro_claude_request.go` from the + * CLIProxyAPIPlus reference implementation, scoped down to what 9router needs: + * + * - `-agentic` model suffix detection + chunked-write system prompt + * - reasoning / thinking trigger detection (Anthropic-Beta header, + * Claude `thinking`, OpenAI `reasoning_effort`, AMP/Cursor magic tag) + * - the `enabled` system-prompt injection + * that turns Kiro reasoning on + * + * Kiro upstream does not advertise `-agentic` model IDs; they are a 9router + * fiction. The suffix is stripped before the request leaves this process. + */ + +export const KIRO_AGENTIC_SUFFIX = "-agentic"; +export const KIRO_THINKING_SUFFIX = "-thinking"; + +export const KIRO_THINKING_BUDGET_DEFAULT = 16000; + +export const KIRO_AGENTIC_SYSTEM_PROMPT = ` +# CRITICAL: CHUNKED WRITE PROTOCOL (MANDATORY) + +You MUST follow these rules for ALL file operations. Violation causes server timeouts and task failure. + +## ABSOLUTE LIMITS +- **MAXIMUM 350 LINES** per single write/edit operation - NO EXCEPTIONS +- **RECOMMENDED 300 LINES** or less for optimal performance +- **NEVER** write entire files in one operation if >300 lines + +## MANDATORY CHUNKED WRITE STRATEGY + +### For NEW FILES (>300 lines total): +1. FIRST: Write initial chunk (first 250-300 lines) using write_to_file/fsWrite +2. THEN: Append remaining content in 250-300 line chunks using file append operations +3. REPEAT: Continue appending until complete + +### For EDITING EXISTING FILES: +1. Use surgical edits (apply_diff/targeted edits) - change ONLY what's needed +2. NEVER rewrite entire files - use incremental modifications +3. Split large refactors into multiple small, focused edits + +### For LARGE CODE GENERATION: +1. Generate in logical sections (imports, types, functions separately) +2. Write each section as a separate operation +3. Use append operations for subsequent sections + +## EXAMPLES OF CORRECT BEHAVIOR + +CORRECT: Writing a 600-line file +- Operation 1: Write lines 1-300 (initial file creation) +- Operation 2: Append lines 301-600 + +CORRECT: Editing multiple functions +- Operation 1: Edit function A +- Operation 2: Edit function B +- Operation 3: Edit function C + +WRONG: Writing 500 lines in single operation -> TIMEOUT +WRONG: Rewriting entire file to change 5 lines -> TIMEOUT +WRONG: Generating massive code blocks without chunking -> TIMEOUT + +## WHY THIS MATTERS +- Server has 2-3 minute timeout for operations +- Large writes exceed timeout and FAIL completely +- Chunked writes are FASTER and more RELIABLE +- Failed writes waste time and require retry + +REMEMBER: When in doubt, write LESS per operation. Multiple small operations > one large operation. +`.trim(); + +/** + * Detect whether an inbound request is asking for reasoning / thinking output. + * + * Sources of intent (any one is enough): + * - HTTP header `Anthropic-Beta: ...interleaved-thinking...` + * - JSON `thinking.type === "enabled"` (Claude Messages API) + * - JSON `reasoning_effort` in {low, medium, high, auto} (OpenAI o1/o3) + * - JSON `reasoning.effort` in {low, medium, high, auto} (OpenAI Responses) + * - System prompt contains `enabled` or + * `interleaved` (AMP / Cursor) + * - Model name contains `thinking` or `-reason` + * + * @param {object} body OpenAI-shaped request body (post-translation) + * @param {object} [headers] Original inbound HTTP headers (case-insensitive) + * @param {string} [model] Model id the caller asked for (post-strip ok) + * @returns {boolean} + */ +export function isThinkingEnabled(body, headers, model) { + if (headers) { + const beta = pickHeader(headers, "anthropic-beta"); + if (typeof beta === "string" && beta.toLowerCase().includes("interleaved-thinking")) { + return true; + } + } + + if (body && typeof body === "object") { + const thinking = body.thinking; + if (thinking && typeof thinking === "object" && thinking.type === "enabled") { + const budget = Number(thinking.budget_tokens); + if (!Number.isFinite(budget) || budget > 0) { + return true; + } + } + + const effort = body.reasoning_effort + ?? (body.reasoning && typeof body.reasoning === "object" ? body.reasoning.effort : null); + if (typeof effort === "string") { + const v = effort.toLowerCase(); + if (v && v !== "none" && (v === "low" || v === "medium" || v === "high" || v === "auto")) { + return true; + } + } + + if (containsThinkingModeTag(body)) { + return true; + } + } + + if (typeof model === "string" && model) { + const m = model.toLowerCase(); + if (m.includes("thinking") || m.includes("-reason")) { + return true; + } + } + + return false; +} + +/** + * Detect whether a model id refers to a 9router synthetic agentic variant. + * Agentic variants share the same upstream model as the base; the only + * difference is the chunked-write system prompt this module injects. + * + * @param {string} model + * @returns {boolean} + */ +export function isAgenticModel(model) { + return typeof model === "string" && model.endsWith(KIRO_AGENTIC_SUFFIX); +} + +/** + * Strip the `-agentic` suffix from a model id, leaving the upstream-real id. + * + * @param {string} model + * @returns {string} + */ +export function stripAgenticSuffix(model) { + if (!isAgenticModel(model)) return model; + return model.slice(0, -KIRO_AGENTIC_SUFFIX.length); +} + +/** + * Detect whether a model id is a 9router synthetic thinking variant + * (e.g. `claude-sonnet-4.5-thinking`). Same upstream model as the base; the + * only difference is `enabled` injection. + * + * Note: real Kiro thinking-capable variants exist (e.g. `kimi-k2-thinking` in + * other providers), but for the `kr/` namespace there is no `-thinking` + * model on Kiro upstream. Treat the suffix as a synthetic alias. + * + * @param {string} model Model id with `-agentic` already stripped + * @returns {boolean} + */ +export function isThinkingModel(model) { + return typeof model === "string" && model.endsWith(KIRO_THINKING_SUFFIX); +} + +/** + * Strip the `-thinking` suffix from a model id. + * + * @param {string} model + * @returns {string} + */ +export function stripThinkingSuffix(model) { + if (!isThinkingModel(model)) return model; + return model.slice(0, -KIRO_THINKING_SUFFIX.length); +} + +/** + * Resolve a 9router model id to the real upstream Kiro model id, plus flags + * describing which behaviours the suffixes implied. + * + * resolveKiroModel("claude-sonnet-4.5-thinking-agentic") + * => { upstream: "claude-sonnet-4.5", agentic: true, thinking: true } + * resolveKiroModel("claude-sonnet-4.5-thinking") + * => { upstream: "claude-sonnet-4.5", agentic: false, thinking: true } + * resolveKiroModel("claude-sonnet-4.5-agentic") + * => { upstream: "claude-sonnet-4.5", agentic: true, thinking: false } + * resolveKiroModel("claude-sonnet-4.5") + * => { upstream: "claude-sonnet-4.5", agentic: false, thinking: false } + * + * @param {string} model + * @returns {{ upstream: string, agentic: boolean, thinking: boolean }} + */ +export function resolveKiroModel(model) { + let upstream = model; + let agentic = false; + let thinking = false; + if (isAgenticModel(upstream)) { + agentic = true; + upstream = stripAgenticSuffix(upstream); + } + if (isThinkingModel(upstream)) { + thinking = true; + upstream = stripThinkingSuffix(upstream); + } + return { upstream, agentic, thinking }; +} + +/** + * Build the magic system-prompt prefix that turns Kiro reasoning on. + * Same shape as CLIProxyAPIPlus. + * + * @param {number} [budget=KIRO_THINKING_BUDGET_DEFAULT] + */ +export function buildThinkingSystemPrefix(budget = KIRO_THINKING_BUDGET_DEFAULT) { + const safeBudget = Math.max(1, Math.min(32000, Number(budget) || KIRO_THINKING_BUDGET_DEFAULT)); + return `enabled\n${safeBudget}`; +} + +function pickHeader(headers, name) { + if (!headers) return undefined; + if (typeof headers.get === "function") { + return headers.get(name); + } + const lower = name.toLowerCase(); + for (const key of Object.keys(headers)) { + if (key.toLowerCase() === lower) { + return headers[key]; + } + } + return undefined; +} + +function containsThinkingModeTag(body) { + const messages = Array.isArray(body?.messages) ? body.messages : []; + for (const msg of messages) { + if (!msg) continue; + if (msg.role !== "system" && msg.role !== "user") continue; + const content = msg.content; + if (typeof content === "string") { + if (containsTagInText(content)) return true; + } else if (Array.isArray(content)) { + for (const part of content) { + const text = part?.text; + if (typeof text === "string" && containsTagInText(text)) return true; + } + } + } + if (typeof body?.system === "string" && containsTagInText(body.system)) return true; + return false; +} + +function containsTagInText(text) { + if (!text) return false; + if (!text.includes("")) return false; + return text.includes("enabled") + || text.includes("interleaved"); +} diff --git a/open-sse/config/providerModels.js b/open-sse/config/providerModels.js index a3e76d3..8259eb6 100644 --- a/open-sse/config/providerModels.js +++ b/open-sse/config/providerModels.js @@ -132,6 +132,7 @@ export const PROVIDER_MODELS = { { id: "text-embedding-3-large", name: "Text Embedding 3 Large (GitHub)", type: "embedding" }, ], kr: [ // Kiro AI + // --- Base Claude variants --- // { id: "claude-opus-4.5", name: "Claude Opus 4.5" }, { id: "claude-sonnet-4.5", name: "Claude Sonnet 4.5" }, { id: "claude-haiku-4.5", name: "Claude Haiku 4.5" }, @@ -139,6 +140,16 @@ export const PROVIDER_MODELS = { { id: "qwen3-coder-next", name: "Qwen3 Coder Next", strip: ["image", "audio"] }, { id: "glm-5", name: "GLM 5" }, { id: "MiniMax-M2.5", name: "MiniMax M2.5" }, + // --- Thinking variants (alias to base; thinking is enabled at request time + // via enabled system-prompt injection) --- + { id: "claude-sonnet-4.5-thinking", name: "Claude Sonnet 4.5 (Thinking)" }, + { id: "claude-haiku-4.5-thinking", name: "Claude Haiku 4.5 (Thinking)" }, + // --- Agentic variants (synthetic; same upstream model + chunked-write + // system prompt to dodge Kiro's 2-3 min server timeout on big writes) --- + { id: "claude-sonnet-4.5-agentic", name: "Claude Sonnet 4.5 (Agentic)" }, + { id: "claude-haiku-4.5-agentic", name: "Claude Haiku 4.5 (Agentic)" }, + { id: "claude-sonnet-4.5-thinking-agentic", name: "Claude Sonnet 4.5 (Thinking + Agentic)" }, + { id: "claude-haiku-4.5-thinking-agentic", name: "Claude Haiku 4.5 (Thinking + Agentic)" }, ], cu: [ // Cursor IDE { id: "default", name: "Auto (Server Picks)" }, diff --git a/open-sse/executors/kiro.js b/open-sse/executors/kiro.js index 120f440..7af3f3f 100644 --- a/open-sse/executors/kiro.js +++ b/open-sse/executors/kiro.js @@ -87,6 +87,8 @@ export class KiroExecutor extends BaseExecutor { endDetected: false, finishEmitted: false, hasToolCalls: false, + hasReasoningContent: false, + reasoningChunkCount: 0, toolCallIndex: 0, seenToolIds: new Map() }; @@ -143,6 +145,41 @@ export class KiroExecutor extends BaseExecutor { controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`)); } + // Handle reasoningContentEvent (Kiro thinking / reasoning) + // Kiro returns reasoning as a separate event when the request system + // prompt contains enabled. Surface it + // as OpenAI delta.reasoning_content so downstream translators can map + // it back to Claude thinking blocks / Anthropic reasoning, etc. + if (eventType === "reasoningContentEvent") { + const reasoning = event.payload?.reasoningContentEvent || event.payload || {}; + const reasoningText = (typeof reasoning === "string") + ? reasoning + : (reasoning.text || reasoning.content || ""); + if (reasoningText) { + state.hasReasoningContent = true; + state.totalContentLength += reasoningText.length; + + const reasoningDelta = state.reasoningChunkCount === 0 && chunkIndex === 0 + ? { role: "assistant", reasoning_content: reasoningText } + : { reasoning_content: reasoningText }; + + const chunk = { + id: responseId, + object: "chat.completion.chunk", + created, + model, + choices: [{ + index: 0, + delta: reasoningDelta, + finish_reason: null + }] + }; + chunkIndex++; + state.reasoningChunkCount++; + controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`)); + } + } + // Handle codeEvent if (eventType === "codeEvent" && event.payload?.content) { const chunk = { diff --git a/open-sse/services/kiroModels.js b/open-sse/services/kiroModels.js new file mode 100644 index 0000000..6b52413 --- /dev/null +++ b/open-sse/services/kiroModels.js @@ -0,0 +1,332 @@ +/** + * Kiro model catalog fetcher. + * + * Calls AWS CodeWhisperer's `ListAvailableModels` endpoint to get the live + * catalog for an authenticated Kiro account, then expands each upstream model + * into 9router-shaped variants: + * + * {upstream} - base model + * {upstream}-thinking - same model, thinking on at request time + * {upstream}-agentic - same model, chunked-write prompt prepended + * {upstream}-thinking-agentic - both + * + * The `-thinking` and `-agentic` suffixes do not exist on the Kiro upstream + * API. They are 9router fictions and the `openai-to-kiro` translator strips + * them before the request leaves this process. + * + * The runtime UA is built to match what Kiro IDE itself sends, because the + * upstream rejects requests with malformed `User-Agent` headers (returns 400 + * "format of value 'os/win/10 lang/js ...' is invalid"). + */ + +import { v4 as uuidv4 } from "uuid"; +import { createHash } from "crypto"; +import { refreshKiroToken } from "./tokenRefresh.js"; + +const KIRO_RUNTIME_SDK_VERSION = "1.0.0"; +const KIRO_AGENT_OS = "windows"; +const KIRO_AGENT_OS_VERSION = "10.0.26200"; +const KIRO_NODE_VERSION = "22.21.1"; +const KIRO_VERSION = "0.10.32"; + +const DEFAULT_REGION = "us-east-1"; +const FETCH_TIMEOUT_MS = 30_000; +const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes per credential + +/** @type {Map} */ +const catalogCache = new Map(); + +/** + * Strip the `-agentic` and/or `-thinking` suffixes from a synthetic id, if + * any. Used only for display naming when a Kiro upstream id happens to look + * synthetic (defensive). + */ +function stripSyntheticSuffixes(id) { + let out = id; + if (out.endsWith("-agentic")) out = out.slice(0, -"-agentic".length); + if (out.endsWith("-thinking")) out = out.slice(0, -"-thinking".length); + return out; +} + +/** + * Extract region from a profileArn like + * arn:aws:codewhisperer:us-east-1:123456789012:profile/ABC + */ +function regionFromProfileArn(profileArn) { + if (!profileArn || typeof profileArn !== "string") return DEFAULT_REGION; + const parts = profileArn.split(":"); + if (parts.length >= 4 && parts[3]) return parts[3]; + return DEFAULT_REGION; +} + +/** + * Build the per-account fingerprint headers Kiro upstream validates. + * Keyed off whatever stable identifier we have for this credential, so the + * same account always presents the same machineId. + */ +function buildKiroFingerprintHeaders(credentials) { + const seed = + credentials?.providerSpecificData?.clientId + || credentials?.refreshToken + || credentials?.providerSpecificData?.profileArn + || credentials?.accessToken + || "kiro-anonymous"; + const machineId = createHash("sha256").update(String(seed)).digest("hex"); + + const userAgent = + `aws-sdk-js/${KIRO_RUNTIME_SDK_VERSION} ua/2.1 ` + + `os/${KIRO_AGENT_OS}#${KIRO_AGENT_OS_VERSION} ` + + `lang/js md/nodejs#${KIRO_NODE_VERSION} ` + + `api/codewhispererruntime#${KIRO_RUNTIME_SDK_VERSION} m/N,E ` + + `KiroIDE-${KIRO_VERSION}-${machineId}`; + const amzUserAgent = `aws-sdk-js/${KIRO_RUNTIME_SDK_VERSION} KiroIDE-${KIRO_VERSION}-${machineId}`; + + return { + "User-Agent": userAgent, + "x-amz-user-agent": amzUserAgent, + "x-amzn-kiro-agent-mode": "vibe", + "x-amzn-codewhisperer-optout": "true", + "amz-sdk-request": "attempt=1; max=1", + "amz-sdk-invocation-id": uuidv4(), + "Accept": "application/json" + }; +} + +/** + * Build the synthetic 9router variant set for a single upstream Kiro model. + * + * Returns objects shaped for `PROVIDER_MODELS` (`{ id, name }`) so they can + * be slotted directly into the existing model registry. + * + * The `auto` model is special: Kiro picks the underlying model server-side, + * so the chunked-write `-agentic` prompt is not meaningful (the prompt + * targets coding-agent file writes). Match CLIProxyAPIPlus and skip + * `-agentic` / `-thinking-agentic` for `auto`. + */ +function buildVariants(upstream, displayName) { + const safeUpstream = stripSyntheticSuffixes(upstream); + const display = displayName || `Kiro ${safeUpstream}`; + const isAuto = safeUpstream === "auto"; + + const variants = [ + { + id: safeUpstream, + name: display, + capabilities: { thinking: false, agentic: false } + }, + { + id: `${safeUpstream}-thinking`, + name: `${display} (Thinking)`, + capabilities: { thinking: true, agentic: false } + } + ]; + + if (!isAuto) { + variants.push({ + id: `${safeUpstream}-agentic`, + name: `${display} (Agentic)`, + capabilities: { thinking: false, agentic: true } + }); + variants.push({ + id: `${safeUpstream}-thinking-agentic`, + name: `${display} (Thinking + Agentic)`, + capabilities: { thinking: true, agentic: true } + }); + } + + return variants; +} + +/** + * Format the human-friendly display name for a Kiro model, including the + * rate multiplier when it is something other than 1.0x. + */ +function formatDisplayName(modelName, modelId, rateMultiplier) { + const base = (modelName || modelId || "Kiro").trim(); + const rate = Number(rateMultiplier); + if (!Number.isFinite(rate) || Math.abs(rate - 1.0) < 1e-9 || rate <= 0) { + return `Kiro ${base}`; + } + // Locale-independent decimal formatting. + const rateStr = rate.toFixed(1).replace(",", "."); + return `Kiro ${base} (${rateStr}x credit)`; +} + +/** + * Fetch the raw model catalog from Kiro. Returns the array under `.models` + * from the API response, or throws on network/HTTP error. + */ +async function fetchKiroCatalogRaw(credentials, signal) { + const profileArn = credentials?.providerSpecificData?.profileArn || ""; + const region = regionFromProfileArn(profileArn); + const params = new URLSearchParams(); + params.set("origin", "AI_EDITOR"); + if (profileArn) params.set("profileArn", profileArn); + const url = `https://q.${region}.amazonaws.com/ListAvailableModels?${params.toString()}`; + + const headers = { + ...buildKiroFingerprintHeaders(credentials), + "Authorization": `Bearer ${credentials?.accessToken || ""}` + }; + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort("timeout"), FETCH_TIMEOUT_MS); + // Forward outer cancellation if any. + if (signal && typeof signal.addEventListener === "function") { + signal.addEventListener("abort", () => controller.abort(signal.reason)); + } + + let response; + try { + response = await fetch(url, { + method: "GET", + headers, + signal: controller.signal + }); + } finally { + clearTimeout(timer); + } + + if (!response.ok) { + const text = await response.text().catch(() => ""); + const err = new Error(`Kiro ListAvailableModels ${response.status}: ${text || response.statusText}`); + err.status = response.status; + err.body = text; + throw err; + } + + const data = await response.json(); + const models = Array.isArray(data?.models) ? data.models : []; + return models; +} + +/** + * Build a stable cache key for a Kiro credential. Uses the most stable id we + * have available so different login sessions for the same account share a + * cache entry. + */ +function cacheKey(credentials) { + const psd = credentials?.providerSpecificData || {}; + const seed = + psd.profileArn + || psd.clientId + || credentials?.refreshToken + || credentials?.accessToken + || "anonymous"; + return createHash("sha256").update(`kiro:${seed}`).digest("hex"); +} + +/** + * Resolve the live Kiro model catalog for a credential and expand each entry + * into 9router variants (`-thinking`, `-agentic`, `-thinking-agentic`). + * + * On any error (network, 4xx, 5xx), returns `null` so callers can fall back + * to the static catalog without taking down the dashboard or `/v1/models`. + * + * @param {object} credentials Connection record (accessToken, refreshToken, + * providerSpecificData {profileArn, authMethod, clientId, clientSecret, region}) + * @param {object} [options] + * @param {boolean} [options.forceRefresh] Bypass the per-credential cache. + * @param {object} [options.log] Logger. + * @param {function} [options.onCredentialsRefreshed] Persist refreshed token + * back to your credential store. Called with `{ accessToken, refreshToken, + * expiresIn }` whenever a 401 triggers a token refresh. + * @returns {Promise<{ models: object[], rawModels: object[] } | null>} + */ +export async function resolveKiroModels(credentials, options = {}) { + if (!credentials || !credentials.accessToken) { + options.log?.debug?.("KIRO_MODELS", "No accessToken; skipping live fetch"); + return null; + } + + const key = cacheKey(credentials); + const now = Date.now(); + if (!options.forceRefresh) { + const cached = catalogCache.get(key); + if (cached && cached.expiresAt > now) { + return { models: cached.models, rawModels: cached.rawModels }; + } + } + + let raw; + try { + raw = await fetchKiroCatalogRaw(credentials, options.signal); + } catch (err) { + if (err && err.status === 401 && credentials.refreshToken) { + options.log?.info?.("KIRO_MODELS", "Got 401 from Kiro; refreshing token"); + const refreshed = await refreshKiroToken( + credentials.refreshToken, + credentials.providerSpecificData, + options.log + ); + if (refreshed?.accessToken) { + const next = { ...credentials, ...refreshed }; + if (typeof options.onCredentialsRefreshed === "function") { + try { await options.onCredentialsRefreshed(refreshed); } catch (e) { + options.log?.warn?.("KIRO_MODELS", `onCredentialsRefreshed failed: ${e?.message || e}`); + } + } + try { + raw = await fetchKiroCatalogRaw(next, options.signal); + // Update the in-memory credential reference too so retry logic uses + // the fresh token consistently. + credentials.accessToken = next.accessToken; + if (next.refreshToken) credentials.refreshToken = next.refreshToken; + } catch (err2) { + options.log?.warn?.("KIRO_MODELS", `Retry after refresh failed: ${err2?.message || err2}`); + return null; + } + } else { + options.log?.warn?.("KIRO_MODELS", "Token refresh did not return accessToken"); + return null; + } + } else { + options.log?.warn?.("KIRO_MODELS", `ListAvailableModels failed: ${err?.message || err}`); + return null; + } + } + + const expanded = []; + for (const m of raw) { + if (!m || typeof m !== "object") continue; + const upstreamId = m.modelId || m.id; + if (!upstreamId) continue; + const display = formatDisplayName(m.modelName, upstreamId, m.rateMultiplier); + const ctx = Number(m?.tokenLimits?.maxInputTokens) || 200_000; + for (const v of buildVariants(upstreamId, display)) { + expanded.push({ + ...v, + // Carry over context window + raw upstream metadata so the caller + // (e.g. the dashboard models endpoint) can render it. + contextLength: ctx, + rateMultiplier: Number.isFinite(Number(m.rateMultiplier)) ? Number(m.rateMultiplier) : 1.0, + upstreamModelId: upstreamId, + description: m.description || "" + }); + } + } + + catalogCache.set(key, { + expiresAt: now + CACHE_TTL_MS, + models: expanded, + rawModels: raw + }); + + return { models: expanded, rawModels: raw }; +} + +/** + * Drop any cached catalog for this credential. Call this after rotating / + * importing tokens so the next fetch is fresh. + */ +export function invalidateKiroModelCache(credentials) { + if (!credentials) return; + catalogCache.delete(cacheKey(credentials)); +} + +/** + * Drop the entire in-memory cache. Mostly for tests / manual debug. + */ +export function clearKiroModelCache() { + catalogCache.clear(); +} diff --git a/open-sse/translator/request/openai-to-kiro.js b/open-sse/translator/request/openai-to-kiro.js index b16af83..716ec86 100644 --- a/open-sse/translator/request/openai-to-kiro.js +++ b/open-sse/translator/request/openai-to-kiro.js @@ -5,6 +5,12 @@ import { register } from "../index.js"; import { FORMATS } from "../formats.js"; import { v4 as uuidv4 } from "uuid"; +import { + resolveKiroModel, + isThinkingEnabled, + buildThinkingSystemPrefix, + KIRO_AGENTIC_SYSTEM_PROMPT +} from "../../config/kiroConstants.js"; /** * Convert OpenAI messages to Kiro format @@ -282,6 +288,20 @@ function convertMessages(messages, tools, model) { /** * Build Kiro payload from OpenAI format + * + * Two 9router-specific behaviours implemented here: + * + * 1. `-agentic` model suffix. Synthetic variant — same upstream model, but we + * inject a chunked-write system prompt to keep large file writes under + * Kiro's 2-3 minute server timeout. The suffix is stripped before being + * sent upstream. + * + * 2. Thinking / reasoning. Kiro does not accept `thinking.type` or + * `reasoning_effort` natively. The only way to enable reasoning is to + * inject `enabled` into the user content + * sent upstream. Detection covers Anthropic-Beta header, Claude API + * `thinking`, OpenAI `reasoning_effort`, AMP/Cursor magic tags, and model + * name hints. */ export function buildKiroPayload(model, body, stream, credentials) { const messages = body.messages || []; @@ -290,14 +310,29 @@ export function buildKiroPayload(model, body, stream, credentials) { const temperature = body.temperature; const topP = body.top_p; - const { history, currentMessage } = convertMessages(messages, tools, model); + const { upstream: upstreamModel, agentic, thinking: modelImpliesThinking } = resolveKiroModel(model); + const thinkingEnabled = modelImpliesThinking || isThinkingEnabled(body, null, model); + + const { history, currentMessage } = convertMessages(messages, tools, upstreamModel); const profileArn = credentials?.providerSpecificData?.profileArn || ""; let finalContent = currentMessage?.userInputMessage?.content || ""; const timestamp = new Date().toISOString(); - finalContent = `[Context: Current time is ${timestamp}]\n\n${finalContent}`; - + + // Build the system-prompt prefix that goes ABOVE the user message body. + // Order: thinking_mode tag first (so Kiro sees it before any user text), + // then context/timestamp marker, then optional agentic chunked-write prompt. + const prefixParts = []; + if (thinkingEnabled) { + prefixParts.push(buildThinkingSystemPrefix()); + } + prefixParts.push(`[Context: Current time is ${timestamp}]`); + if (agentic) { + prefixParts.push(KIRO_AGENTIC_SYSTEM_PROMPT); + } + finalContent = `${prefixParts.join("\n\n")}\n\n${finalContent}`; + const payload = { conversationState: { chatTriggerType: "MANUAL", @@ -305,8 +340,11 @@ export function buildKiroPayload(model, body, stream, credentials) { currentMessage: { userInputMessage: { content: finalContent, - modelId: model, + modelId: upstreamModel, origin: "AI_EDITOR", + ...(currentMessage?.userInputMessage?.images?.length > 0 && { + images: currentMessage.userInputMessage.images + }), ...(currentMessage?.userInputMessage?.userInputMessageContext && { userInputMessageContext: currentMessage.userInputMessage.userInputMessageContext }) @@ -327,6 +365,12 @@ export function buildKiroPayload(model, body, stream, credentials) { if (topP !== undefined) payload.inferenceConfig.topP = topP; } + // Tag payload so the executor can route the upstream model id correctly. + Object.defineProperty(payload, "_kiroUpstreamModel", { + value: upstreamModel, + enumerable: false + }); + return payload; } diff --git a/open-sse/translator/response/kiro-to-openai.js b/open-sse/translator/response/kiro-to-openai.js index 9454c57..a1a15b6 100644 --- a/open-sse/translator/response/kiro-to-openai.js +++ b/open-sse/translator/response/kiro-to-openai.js @@ -85,12 +85,18 @@ export function convertKiroToOpenAI(chunk, state) { return openaiChunk; } - // Handle reasoning/thinking events + // Handle reasoning/thinking events. + // Kiro emits reasoningContentEvent when the request enabled thinking via + // the enabled system-prompt tag. We surface + // this as OpenAI delta.reasoning_content so downstream translators can map + // it to Claude thinking blocks / Anthropic reasoning / etc. if (eventType === "reasoningContentEvent" || data.reasoningContentEvent) { - const content = data.reasoningContentEvent?.content || data.content || ""; + const reasoning = data.reasoningContentEvent || data; + const content = (typeof reasoning === "string") + ? reasoning + : (reasoning.text || reasoning.content || data.content || ""); if (!content) return null; - // Convert to thinking block format (Claude-style) const openaiChunk = { id: state.responseId, object: "chat.completion.chunk", @@ -100,7 +106,7 @@ export function convertKiroToOpenAI(chunk, state) { index: 0, delta: { ...(state.chunkIndex === 0 ? { role: "assistant" } : {}), - content: `${content}` + reasoning_content: content }, finish_reason: null }] diff --git a/src/app/api/providers/[id]/models/route.js b/src/app/api/providers/[id]/models/route.js index 59ff4ea..2942352 100644 --- a/src/app/api/providers/[id]/models/route.js +++ b/src/app/api/providers/[id]/models/route.js @@ -5,6 +5,7 @@ import { KiroService } from "@/lib/oauth/services/kiro"; import { GEMINI_CONFIG } from "@/lib/oauth/constants/oauth"; import { refreshGoogleToken, updateProviderCredentials, refreshKiroToken } from "@/sse/services/tokenRefresh"; import { resolveOllamaLocalHost } from "open-sse/config/providers.js"; +import { resolveKiroModels } from "open-sse/services/kiroModels.js"; const GEMINI_CLI_MODELS_URL = "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels"; @@ -288,52 +289,53 @@ export async function GET(request, { params }) { }); } - // Kiro: Try dynamic model fetching first + // Kiro: Use resolveKiroModels to fetch live catalog + expand variants if (connection.provider === "kiro") { + const credentials = { + accessToken: connection.accessToken, + refreshToken: connection.refreshToken, + providerSpecificData: connection.providerSpecificData || {} + }; let warning; try { - const kiroService = new KiroService(); - const profileArn = connection.providerSpecificData?.profileArn; - const accessToken = connection.accessToken; - const refreshToken = connection.refreshToken; - - if (accessToken && profileArn) { - try { - const models = await kiroService.listAvailableModels(accessToken, profileArn); - return NextResponse.json({ - provider: connection.provider, - connectionId: connection.id, - models - }); - } catch (error) { - if (error.message.includes("AccessDeniedException") && refreshToken) { - console.log("Kiro token invalid/expired. Attempting refresh..."); - const refreshed = await refreshKiroToken(refreshToken, connection.providerSpecificData); - - if (refreshed?.accessToken) { - await updateProviderCredentials(connection.id, { - accessToken: refreshed.accessToken, - refreshToken: refreshed.refreshToken || refreshToken, - expiresIn: refreshed.expiresIn, - }); - - const models = await kiroService.listAvailableModels(refreshed.accessToken, profileArn); - return NextResponse.json({ - provider: connection.provider, - connectionId: connection.id, - models - }); - } + const result = await resolveKiroModels(credentials, { + log: console, + onCredentialsRefreshed: async (refreshed) => { + if (refreshed?.accessToken) { + await updateProviderCredentials(connection.id, { + accessToken: refreshed.accessToken, + refreshToken: refreshed.refreshToken || connection.refreshToken, + expiresIn: refreshed.expiresIn, + }); + connection.accessToken = refreshed.accessToken; + if (refreshed.refreshToken) connection.refreshToken = refreshed.refreshToken; } - throw error; // Let outer catch handle it } + }); + + if (result?.models?.length) { + const models = result.models.map((m) => ({ + id: m.id, + name: m.name, + upstreamModelId: m.upstreamModelId, + contextLength: m.contextLength, + rateMultiplier: m.rateMultiplier, + capabilities: m.capabilities, + description: m.description + })); + return NextResponse.json({ + provider: connection.provider, + connectionId: connection.id, + models + }); } + warning = "Kiro returned no models; falling back to static catalog."; } catch (error) { warning = `Failed to fetch Kiro models: ${error.message}`; console.log("Failed to fetch Kiro models dynamically, falling back to static:", error.message); } - // Return empty dynamic list so UI falls back to static provider models. + // Empty dynamic list → UI falls back to static provider models. return NextResponse.json({ provider: connection.provider, connectionId: connection.id, diff --git a/src/app/api/v1/models/route.js b/src/app/api/v1/models/route.js index 89b0329..ab7abc9 100644 --- a/src/app/api/v1/models/route.js +++ b/src/app/api/v1/models/route.js @@ -7,6 +7,7 @@ import { } from "@/shared/constants/providers"; import { getProviderConnections, getCombos, getCustomModels, getModelAliases } from "@/lib/localDb"; import { getDisabledModels } from "@/lib/disabledModelsDb"; +import { resolveKiroModels } from "open-sse/services/kiroModels.js"; const parseOpenAIStyleModels = (data) => { if (Array.isArray(data)) return data; diff --git a/tests/unit/openai-to-kiro.test.js b/tests/unit/openai-to-kiro.test.js new file mode 100644 index 0000000..3e82ccb --- /dev/null +++ b/tests/unit/openai-to-kiro.test.js @@ -0,0 +1,146 @@ +/** + * Unit tests for open-sse/translator/request/openai-to-kiro.js + * + * Tests cover: + * - buildKiroPayload() - basic message conversion + * - Image forwarding fix: images in currentMessage must be included in payload + */ + +import { describe, it, expect } from "vitest"; +import { buildKiroPayload } from "../../open-sse/translator/request/openai-to-kiro.js"; + +describe("buildKiroPayload", () => { + describe("basic message conversion", () => { + it("should convert a simple text message", () => { + const body = { + messages: [{ role: "user", content: "Hello" }] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + expect(currentMsg.userInputMessage.content).toContain("Hello"); + expect(currentMsg.userInputMessage.modelId).toBe("claude-sonnet-4.6"); + expect(currentMsg.userInputMessage.origin).toBe("AI_EDITOR"); + }); + + it("should not include images field when no images are present", () => { + const body = { + messages: [{ role: "user", content: "No images here" }] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + expect(currentMsg.userInputMessage.images).toBeUndefined(); + }); + }); + + describe("image forwarding", () => { + it("should forward base64 image from image_url content part", () => { + const fakeBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="; + const body = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Describe this image" }, + { type: "image_url", image_url: { url: `data:image/png;base64,${fakeBase64}` } } + ] + } + ] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + expect(currentMsg.userInputMessage.images).toBeDefined(); + expect(currentMsg.userInputMessage.images).toHaveLength(1); + expect(currentMsg.userInputMessage.images[0].format).toBe("png"); + expect(currentMsg.userInputMessage.images[0].source.bytes).toBe(fakeBase64); + }); + + it("should forward multiple base64 images", () => { + const fakeBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="; + const body = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Compare these images" }, + { type: "image_url", image_url: { url: `data:image/jpeg;base64,${fakeBase64}` } }, + { type: "image_url", image_url: { url: `data:image/png;base64,${fakeBase64}` } } + ] + } + ] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + expect(currentMsg.userInputMessage.images).toHaveLength(2); + expect(currentMsg.userInputMessage.images[0].format).toBe("jpeg"); + expect(currentMsg.userInputMessage.images[1].format).toBe("png"); + }); + + it("should not include images field when images array is empty", () => { + const body = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Just text" } + ] + } + ] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + expect(currentMsg.userInputMessage.images).toBeUndefined(); + }); + + it("should include both images and text content together", () => { + const fakeBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="; + const body = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: "What is in this image?" }, + { type: "image_url", image_url: { url: `data:image/jpeg;base64,${fakeBase64}` } } + ] + } + ] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + expect(currentMsg.userInputMessage.content).toContain("What is in this image?"); + expect(currentMsg.userInputMessage.images).toHaveLength(1); + }); + + it("should treat http image URLs as text fallback (Kiro only supports base64)", () => { + const body = { + messages: [ + { + role: "user", + content: [ + { type: "text", text: "Look at this" }, + { type: "image_url", image_url: { url: "https://example.com/photo.jpg" } } + ] + } + ] + }; + + const result = buildKiroPayload("claude-sonnet-4.6", body, true, {}); + + const currentMsg = result.conversationState.currentMessage; + // HTTP URLs are not supported by Kiro — converted to text placeholder + expect(currentMsg.userInputMessage.images).toBeUndefined(); + expect(currentMsg.userInputMessage.content).toContain("[Image: https://example.com/photo.jpg]"); + }); + }); +});