https://github.com/decolua/9router/pull/1167

https://github.com/decolua/9router/pull/1166
2026-05-16 11:20:08 +07:00 · 2026-05-16 11:20:08 +07:00 · 4a575f21a2
commit 4a575f21a2
parent 74043f59ab
9 changed files with 884 additions and 43 deletions
--- a/open-sse/config/kiroConstants.js
+++ b/open-sse/config/kiroConstants.js
@ -0,0 +1,262 @@
+/**
+ * Kiro-specific constants and helpers.
+ *
+ * Mirrors the behaviour of `internal/translator/kiro/common/constants.go` and
+ * `internal/translator/kiro/claude/kiro_claude_request.go` from the
+ * CLIProxyAPIPlus reference implementation, scoped down to what 9router needs:
+ *
+ *   - `-agentic` model suffix detection + chunked-write system prompt
+ *   - reasoning / thinking trigger detection (Anthropic-Beta header,
+ *     Claude `thinking`, OpenAI `reasoning_effort`, AMP/Cursor magic tag)
+ *   - the `<thinking_mode>enabled</thinking_mode>` system-prompt injection
+ *     that turns Kiro reasoning on
+ *
+ * Kiro upstream does not advertise `-agentic` model IDs; they are a 9router
+ * fiction. The suffix is stripped before the request leaves this process.
+ */
+
+export const KIRO_AGENTIC_SUFFIX = "-agentic";
+export const KIRO_THINKING_SUFFIX = "-thinking";
+
+export const KIRO_THINKING_BUDGET_DEFAULT = 16000;
+
+export const KIRO_AGENTIC_SYSTEM_PROMPT = `
+# CRITICAL: CHUNKED WRITE PROTOCOL (MANDATORY)
+
+You MUST follow these rules for ALL file operations. Violation causes server timeouts and task failure.
+
+## ABSOLUTE LIMITS
+- **MAXIMUM 350 LINES** per single write/edit operation - NO EXCEPTIONS
+- **RECOMMENDED 300 LINES** or less for optimal performance
+- **NEVER** write entire files in one operation if >300 lines
+
+## MANDATORY CHUNKED WRITE STRATEGY
+
+### For NEW FILES (>300 lines total):
+1. FIRST: Write initial chunk (first 250-300 lines) using write_to_file/fsWrite
+2. THEN: Append remaining content in 250-300 line chunks using file append operations
+3. REPEAT: Continue appending until complete
+
+### For EDITING EXISTING FILES:
+1. Use surgical edits (apply_diff/targeted edits) - change ONLY what's needed
+2. NEVER rewrite entire files - use incremental modifications
+3. Split large refactors into multiple small, focused edits
+
+### For LARGE CODE GENERATION:
+1. Generate in logical sections (imports, types, functions separately)
+2. Write each section as a separate operation
+3. Use append operations for subsequent sections
+
+## EXAMPLES OF CORRECT BEHAVIOR
+
+CORRECT: Writing a 600-line file
+- Operation 1: Write lines 1-300 (initial file creation)
+- Operation 2: Append lines 301-600
+
+CORRECT: Editing multiple functions
+- Operation 1: Edit function A
+- Operation 2: Edit function B
+- Operation 3: Edit function C
+
+WRONG: Writing 500 lines in single operation -> TIMEOUT
+WRONG: Rewriting entire file to change 5 lines -> TIMEOUT
+WRONG: Generating massive code blocks without chunking -> TIMEOUT
+
+## WHY THIS MATTERS
+- Server has 2-3 minute timeout for operations
+- Large writes exceed timeout and FAIL completely
+- Chunked writes are FASTER and more RELIABLE
+- Failed writes waste time and require retry
+
+REMEMBER: When in doubt, write LESS per operation. Multiple small operations > one large operation.
+`.trim();
+
+/**
+ * Detect whether an inbound request is asking for reasoning / thinking output.
+ *
+ * Sources of intent (any one is enough):
+ *   - HTTP header `Anthropic-Beta: ...interleaved-thinking...`
+ *   - JSON `thinking.type === "enabled"` (Claude Messages API)
+ *   - JSON `reasoning_effort` in {low, medium, high, auto} (OpenAI o1/o3)
+ *   - JSON `reasoning.effort` in {low, medium, high, auto} (OpenAI Responses)
+ *   - System prompt contains `<thinking_mode>enabled</thinking_mode>` or
+ *     `<thinking_mode>interleaved</thinking_mode>` (AMP / Cursor)
+ *   - Model name contains `thinking` or `-reason`
+ *
+ * @param {object} body OpenAI-shaped request body (post-translation)
+ * @param {object} [headers] Original inbound HTTP headers (case-insensitive)
+ * @param {string} [model] Model id the caller asked for (post-strip ok)
+ * @returns {boolean}
+ */
+export function isThinkingEnabled(body, headers, model) {
+  if (headers) {
+    const beta = pickHeader(headers, "anthropic-beta");
+    if (typeof beta === "string" && beta.toLowerCase().includes("interleaved-thinking")) {
+      return true;
+    }
+  }
+
+  if (body && typeof body === "object") {
+    const thinking = body.thinking;
+    if (thinking && typeof thinking === "object" && thinking.type === "enabled") {
+      const budget = Number(thinking.budget_tokens);
+      if (!Number.isFinite(budget) || budget > 0) {
+        return true;
+      }
+    }
+
+    const effort = body.reasoning_effort
+      ?? (body.reasoning && typeof body.reasoning === "object" ? body.reasoning.effort : null);
+    if (typeof effort === "string") {
+      const v = effort.toLowerCase();
+      if (v && v !== "none" && (v === "low" || v === "medium" || v === "high" || v === "auto")) {
+        return true;
+      }
+    }
+
+    if (containsThinkingModeTag(body)) {
+      return true;
+    }
+  }
+
+  if (typeof model === "string" && model) {
+    const m = model.toLowerCase();
+    if (m.includes("thinking") || m.includes("-reason")) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/**
+ * Detect whether a model id refers to a 9router synthetic agentic variant.
+ * Agentic variants share the same upstream model as the base; the only
+ * difference is the chunked-write system prompt this module injects.
+ *
+ * @param {string} model
+ * @returns {boolean}
+ */
+export function isAgenticModel(model) {
+  return typeof model === "string" && model.endsWith(KIRO_AGENTIC_SUFFIX);
+}
+
+/**
+ * Strip the `-agentic` suffix from a model id, leaving the upstream-real id.
+ *
+ * @param {string} model
+ * @returns {string}
+ */
+export function stripAgenticSuffix(model) {
+  if (!isAgenticModel(model)) return model;
+  return model.slice(0, -KIRO_AGENTIC_SUFFIX.length);
+}
+
+/**
+ * Detect whether a model id is a 9router synthetic thinking variant
+ * (e.g. `claude-sonnet-4.5-thinking`). Same upstream model as the base; the
+ * only difference is `<thinking_mode>enabled</thinking_mode>` injection.
+ *
+ * Note: real Kiro thinking-capable variants exist (e.g. `kimi-k2-thinking` in
+ * other providers), but for the `kr/` namespace there is no `-thinking`
+ * model on Kiro upstream. Treat the suffix as a synthetic alias.
+ *
+ * @param {string} model Model id with `-agentic` already stripped
+ * @returns {boolean}
+ */
+export function isThinkingModel(model) {
+  return typeof model === "string" && model.endsWith(KIRO_THINKING_SUFFIX);
+}
+
+/**
+ * Strip the `-thinking` suffix from a model id.
+ *
+ * @param {string} model
+ * @returns {string}
+ */
+export function stripThinkingSuffix(model) {
+  if (!isThinkingModel(model)) return model;
+  return model.slice(0, -KIRO_THINKING_SUFFIX.length);
+}
+
+/**
+ * Resolve a 9router model id to the real upstream Kiro model id, plus flags
+ * describing which behaviours the suffixes implied.
+ *
+ *   resolveKiroModel("claude-sonnet-4.5-thinking-agentic")
+ *     => { upstream: "claude-sonnet-4.5", agentic: true, thinking: true }
+ *   resolveKiroModel("claude-sonnet-4.5-thinking")
+ *     => { upstream: "claude-sonnet-4.5", agentic: false, thinking: true }
+ *   resolveKiroModel("claude-sonnet-4.5-agentic")
+ *     => { upstream: "claude-sonnet-4.5", agentic: true, thinking: false }
+ *   resolveKiroModel("claude-sonnet-4.5")
+ *     => { upstream: "claude-sonnet-4.5", agentic: false, thinking: false }
+ *
+ * @param {string} model
+ * @returns {{ upstream: string, agentic: boolean, thinking: boolean }}
+ */
+export function resolveKiroModel(model) {
+  let upstream = model;
+  let agentic = false;
+  let thinking = false;
+  if (isAgenticModel(upstream)) {
+    agentic = true;
+    upstream = stripAgenticSuffix(upstream);
+  }
+  if (isThinkingModel(upstream)) {
+    thinking = true;
+    upstream = stripThinkingSuffix(upstream);
+  }
+  return { upstream, agentic, thinking };
+}
+
+/**
+ * Build the magic system-prompt prefix that turns Kiro reasoning on.
+ * Same shape as CLIProxyAPIPlus.
+ *
+ * @param {number} [budget=KIRO_THINKING_BUDGET_DEFAULT]
+ */
+export function buildThinkingSystemPrefix(budget = KIRO_THINKING_BUDGET_DEFAULT) {
+  const safeBudget = Math.max(1, Math.min(32000, Number(budget) || KIRO_THINKING_BUDGET_DEFAULT));
+  return `<thinking_mode>enabled</thinking_mode>\n<max_thinking_length>${safeBudget}</max_thinking_length>`;
+}
+
+function pickHeader(headers, name) {
+  if (!headers) return undefined;
+  if (typeof headers.get === "function") {
+    return headers.get(name);
+  }
+  const lower = name.toLowerCase();
+  for (const key of Object.keys(headers)) {
+    if (key.toLowerCase() === lower) {
+      return headers[key];
+    }
+  }
+  return undefined;
+}
+
+function containsThinkingModeTag(body) {
+  const messages = Array.isArray(body?.messages) ? body.messages : [];
+  for (const msg of messages) {
+    if (!msg) continue;
+    if (msg.role !== "system" && msg.role !== "user") continue;
+    const content = msg.content;
+    if (typeof content === "string") {
+      if (containsTagInText(content)) return true;
+    } else if (Array.isArray(content)) {
+      for (const part of content) {
+        const text = part?.text;
+        if (typeof text === "string" && containsTagInText(text)) return true;
+      }
+    }
+  }
+  if (typeof body?.system === "string" && containsTagInText(body.system)) return true;
+  return false;
+}
+
+function containsTagInText(text) {
+  if (!text) return false;
+  if (!text.includes("<thinking_mode>")) return false;
+  return text.includes("<thinking_mode>enabled</thinking_mode>")
+    || text.includes("<thinking_mode>interleaved</thinking_mode>");
+}
--- a/open-sse/config/providerModels.js
+++ b/open-sse/config/providerModels.js
@ -132,6 +132,7 @@ export const PROVIDER_MODELS = {
    { id: "text-embedding-3-large", name: "Text Embedding 3 Large (GitHub)", type: "embedding" },
  ],
  kr: [  // Kiro AI
+    // --- Base Claude variants ---
    // { id: "claude-opus-4.5", name: "Claude Opus 4.5" },
    { id: "claude-sonnet-4.5", name: "Claude Sonnet 4.5" },
    { id: "claude-haiku-4.5", name: "Claude Haiku 4.5" },
@ -139,6 +140,16 @@ export const PROVIDER_MODELS = {
    { id: "qwen3-coder-next", name: "Qwen3 Coder Next", strip: ["image", "audio"] },
    { id: "glm-5", name: "GLM 5" },
    { id: "MiniMax-M2.5", name: "MiniMax M2.5" },
+    // --- Thinking variants (alias to base; thinking is enabled at request time
+    //     via <thinking_mode>enabled</thinking_mode> system-prompt injection) ---
+    { id: "claude-sonnet-4.5-thinking", name: "Claude Sonnet 4.5 (Thinking)" },
+    { id: "claude-haiku-4.5-thinking", name: "Claude Haiku 4.5 (Thinking)" },
+    // --- Agentic variants (synthetic; same upstream model + chunked-write
+    //     system prompt to dodge Kiro's 2-3 min server timeout on big writes) ---
+    { id: "claude-sonnet-4.5-agentic", name: "Claude Sonnet 4.5 (Agentic)" },
+    { id: "claude-haiku-4.5-agentic", name: "Claude Haiku 4.5 (Agentic)" },
+    { id: "claude-sonnet-4.5-thinking-agentic", name: "Claude Sonnet 4.5 (Thinking + Agentic)" },
+    { id: "claude-haiku-4.5-thinking-agentic", name: "Claude Haiku 4.5 (Thinking + Agentic)" },
  ],
  cu: [  // Cursor IDE
    { id: "default", name: "Auto (Server Picks)" },
--- a/open-sse/executors/kiro.js
+++ b/open-sse/executors/kiro.js
@ -87,6 +87,8 @@ export class KiroExecutor extends BaseExecutor {
      endDetected: false,
      finishEmitted: false,
      hasToolCalls: false,
+      hasReasoningContent: false,
+      reasoningChunkCount: 0,
      toolCallIndex: 0,
      seenToolIds: new Map()
    };
@ -143,6 +145,41 @@ export class KiroExecutor extends BaseExecutor {
            controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`));
          }

+          // Handle reasoningContentEvent (Kiro thinking / reasoning)
+          // Kiro returns reasoning as a separate event when the request system
+          // prompt contains <thinking_mode>enabled</thinking_mode>. Surface it
+          // as OpenAI delta.reasoning_content so downstream translators can map
+          // it back to Claude thinking blocks / Anthropic reasoning, etc.
+          if (eventType === "reasoningContentEvent") {
+            const reasoning = event.payload?.reasoningContentEvent || event.payload || {};
+            const reasoningText = (typeof reasoning === "string")
+              ? reasoning
+              : (reasoning.text || reasoning.content || "");
+            if (reasoningText) {
+              state.hasReasoningContent = true;
+              state.totalContentLength += reasoningText.length;
+
+              const reasoningDelta = state.reasoningChunkCount === 0 && chunkIndex === 0
+                ? { role: "assistant", reasoning_content: reasoningText }
+                : { reasoning_content: reasoningText };
+
+              const chunk = {
+                id: responseId,
+                object: "chat.completion.chunk",
+                created,
+                model,
+                choices: [{
+                  index: 0,
+                  delta: reasoningDelta,
+                  finish_reason: null
+                }]
+              };
+              chunkIndex++;
+              state.reasoningChunkCount++;
+              controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`));
+            }
+          }
+
          // Handle codeEvent
          if (eventType === "codeEvent" && event.payload?.content) {
            const chunk = {
--- a/open-sse/services/kiroModels.js
+++ b/open-sse/services/kiroModels.js
@ -0,0 +1,332 @@
+/**
+ * Kiro model catalog fetcher.
+ *
+ * Calls AWS CodeWhisperer's `ListAvailableModels` endpoint to get the live
+ * catalog for an authenticated Kiro account, then expands each upstream model
+ * into 9router-shaped variants:
+ *
+ *   {upstream}                          - base model
+ *   {upstream}-thinking                 - same model, thinking on at request time
+ *   {upstream}-agentic                  - same model, chunked-write prompt prepended
+ *   {upstream}-thinking-agentic         - both
+ *
+ * The `-thinking` and `-agentic` suffixes do not exist on the Kiro upstream
+ * API. They are 9router fictions and the `openai-to-kiro` translator strips
+ * them before the request leaves this process.
+ *
+ * The runtime UA is built to match what Kiro IDE itself sends, because the
+ * upstream rejects requests with malformed `User-Agent` headers (returns 400
+ * "format of value 'os/win/10 lang/js ...' is invalid").
+ */
+
+import { v4 as uuidv4 } from "uuid";
+import { createHash } from "crypto";
+import { refreshKiroToken } from "./tokenRefresh.js";
+
+const KIRO_RUNTIME_SDK_VERSION = "1.0.0";
+const KIRO_AGENT_OS = "windows";
+const KIRO_AGENT_OS_VERSION = "10.0.26200";
+const KIRO_NODE_VERSION = "22.21.1";
+const KIRO_VERSION = "0.10.32";
+
+const DEFAULT_REGION = "us-east-1";
+const FETCH_TIMEOUT_MS = 30_000;
+const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes per credential
+
+/** @type {Map<string, { expiresAt: number, models: any[] }>} */
+const catalogCache = new Map();
+
+/**
+ * Strip the `-agentic` and/or `-thinking` suffixes from a synthetic id, if
+ * any. Used only for display naming when a Kiro upstream id happens to look
+ * synthetic (defensive).
+ */
+function stripSyntheticSuffixes(id) {
+  let out = id;
+  if (out.endsWith("-agentic")) out = out.slice(0, -"-agentic".length);
+  if (out.endsWith("-thinking")) out = out.slice(0, -"-thinking".length);
+  return out;
+}
+
+/**
+ * Extract region from a profileArn like
+ *   arn:aws:codewhisperer:us-east-1:123456789012:profile/ABC
+ */
+function regionFromProfileArn(profileArn) {
+  if (!profileArn || typeof profileArn !== "string") return DEFAULT_REGION;
+  const parts = profileArn.split(":");
+  if (parts.length >= 4 && parts[3]) return parts[3];
+  return DEFAULT_REGION;
+}
+
+/**
+ * Build the per-account fingerprint headers Kiro upstream validates.
+ * Keyed off whatever stable identifier we have for this credential, so the
+ * same account always presents the same machineId.
+ */
+function buildKiroFingerprintHeaders(credentials) {
+  const seed =
+    credentials?.providerSpecificData?.clientId
+    || credentials?.refreshToken
+    || credentials?.providerSpecificData?.profileArn
+    || credentials?.accessToken
+    || "kiro-anonymous";
+  const machineId = createHash("sha256").update(String(seed)).digest("hex");
+
+  const userAgent =
+    `aws-sdk-js/${KIRO_RUNTIME_SDK_VERSION} ua/2.1 ` +
+    `os/${KIRO_AGENT_OS}#${KIRO_AGENT_OS_VERSION} ` +
+    `lang/js md/nodejs#${KIRO_NODE_VERSION} ` +
+    `api/codewhispererruntime#${KIRO_RUNTIME_SDK_VERSION} m/N,E ` +
+    `KiroIDE-${KIRO_VERSION}-${machineId}`;
+  const amzUserAgent = `aws-sdk-js/${KIRO_RUNTIME_SDK_VERSION} KiroIDE-${KIRO_VERSION}-${machineId}`;
+
+  return {
+    "User-Agent": userAgent,
+    "x-amz-user-agent": amzUserAgent,
+    "x-amzn-kiro-agent-mode": "vibe",
+    "x-amzn-codewhisperer-optout": "true",
+    "amz-sdk-request": "attempt=1; max=1",
+    "amz-sdk-invocation-id": uuidv4(),
+    "Accept": "application/json"
+  };
+}
+
+/**
+ * Build the synthetic 9router variant set for a single upstream Kiro model.
+ *
+ * Returns objects shaped for `PROVIDER_MODELS` (`{ id, name }`) so they can
+ * be slotted directly into the existing model registry.
+ *
+ * The `auto` model is special: Kiro picks the underlying model server-side,
+ * so the chunked-write `-agentic` prompt is not meaningful (the prompt
+ * targets coding-agent file writes). Match CLIProxyAPIPlus and skip
+ * `-agentic` / `-thinking-agentic` for `auto`.
+ */
+function buildVariants(upstream, displayName) {
+  const safeUpstream = stripSyntheticSuffixes(upstream);
+  const display = displayName || `Kiro ${safeUpstream}`;
+  const isAuto = safeUpstream === "auto";
+
+  const variants = [
+    {
+      id: safeUpstream,
+      name: display,
+      capabilities: { thinking: false, agentic: false }
+    },
+    {
+      id: `${safeUpstream}-thinking`,
+      name: `${display} (Thinking)`,
+      capabilities: { thinking: true, agentic: false }
+    }
+  ];
+
+  if (!isAuto) {
+    variants.push({
+      id: `${safeUpstream}-agentic`,
+      name: `${display} (Agentic)`,
+      capabilities: { thinking: false, agentic: true }
+    });
+    variants.push({
+      id: `${safeUpstream}-thinking-agentic`,
+      name: `${display} (Thinking + Agentic)`,
+      capabilities: { thinking: true, agentic: true }
+    });
+  }
+
+  return variants;
+}
+
+/**
+ * Format the human-friendly display name for a Kiro model, including the
+ * rate multiplier when it is something other than 1.0x.
+ */
+function formatDisplayName(modelName, modelId, rateMultiplier) {
+  const base = (modelName || modelId || "Kiro").trim();
+  const rate = Number(rateMultiplier);
+  if (!Number.isFinite(rate) || Math.abs(rate - 1.0) < 1e-9 || rate <= 0) {
+    return `Kiro ${base}`;
+  }
+  // Locale-independent decimal formatting.
+  const rateStr = rate.toFixed(1).replace(",", ".");
+  return `Kiro ${base} (${rateStr}x credit)`;
+}
+
+/**
+ * Fetch the raw model catalog from Kiro. Returns the array under `.models`
+ * from the API response, or throws on network/HTTP error.
+ */
+async function fetchKiroCatalogRaw(credentials, signal) {
+  const profileArn = credentials?.providerSpecificData?.profileArn || "";
+  const region = regionFromProfileArn(profileArn);
+  const params = new URLSearchParams();
+  params.set("origin", "AI_EDITOR");
+  if (profileArn) params.set("profileArn", profileArn);
+  const url = `https://q.${region}.amazonaws.com/ListAvailableModels?${params.toString()}`;
+
+  const headers = {
+    ...buildKiroFingerprintHeaders(credentials),
+    "Authorization": `Bearer ${credentials?.accessToken || ""}`
+  };
+
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort("timeout"), FETCH_TIMEOUT_MS);
+  // Forward outer cancellation if any.
+  if (signal && typeof signal.addEventListener === "function") {
+    signal.addEventListener("abort", () => controller.abort(signal.reason));
+  }
+
+  let response;
+  try {
+    response = await fetch(url, {
+      method: "GET",
+      headers,
+      signal: controller.signal
+    });
+  } finally {
+    clearTimeout(timer);
+  }
+
+  if (!response.ok) {
+    const text = await response.text().catch(() => "");
+    const err = new Error(`Kiro ListAvailableModels ${response.status}: ${text || response.statusText}`);
+    err.status = response.status;
+    err.body = text;
+    throw err;
+  }
+
+  const data = await response.json();
+  const models = Array.isArray(data?.models) ? data.models : [];
+  return models;
+}
+
+/**
+ * Build a stable cache key for a Kiro credential. Uses the most stable id we
+ * have available so different login sessions for the same account share a
+ * cache entry.
+ */
+function cacheKey(credentials) {
+  const psd = credentials?.providerSpecificData || {};
+  const seed =
+    psd.profileArn
+    || psd.clientId
+    || credentials?.refreshToken
+    || credentials?.accessToken
+    || "anonymous";
+  return createHash("sha256").update(`kiro:${seed}`).digest("hex");
+}
+
+/**
+ * Resolve the live Kiro model catalog for a credential and expand each entry
+ * into 9router variants (`-thinking`, `-agentic`, `-thinking-agentic`).
+ *
+ * On any error (network, 4xx, 5xx), returns `null` so callers can fall back
+ * to the static catalog without taking down the dashboard or `/v1/models`.
+ *
+ * @param {object} credentials Connection record (accessToken, refreshToken,
+ *   providerSpecificData {profileArn, authMethod, clientId, clientSecret, region})
+ * @param {object} [options]
+ * @param {boolean} [options.forceRefresh] Bypass the per-credential cache.
+ * @param {object}  [options.log] Logger.
+ * @param {function} [options.onCredentialsRefreshed] Persist refreshed token
+ *   back to your credential store. Called with `{ accessToken, refreshToken,
+ *   expiresIn }` whenever a 401 triggers a token refresh.
+ * @returns {Promise<{ models: object[], rawModels: object[] } | null>}
+ */
+export async function resolveKiroModels(credentials, options = {}) {
+  if (!credentials || !credentials.accessToken) {
+    options.log?.debug?.("KIRO_MODELS", "No accessToken; skipping live fetch");
+    return null;
+  }
+
+  const key = cacheKey(credentials);
+  const now = Date.now();
+  if (!options.forceRefresh) {
+    const cached = catalogCache.get(key);
+    if (cached && cached.expiresAt > now) {
+      return { models: cached.models, rawModels: cached.rawModels };
+    }
+  }
+
+  let raw;
+  try {
+    raw = await fetchKiroCatalogRaw(credentials, options.signal);
+  } catch (err) {
+    if (err && err.status === 401 && credentials.refreshToken) {
+      options.log?.info?.("KIRO_MODELS", "Got 401 from Kiro; refreshing token");
+      const refreshed = await refreshKiroToken(
+        credentials.refreshToken,
+        credentials.providerSpecificData,
+        options.log
+      );
+      if (refreshed?.accessToken) {
+        const next = { ...credentials, ...refreshed };
+        if (typeof options.onCredentialsRefreshed === "function") {
+          try { await options.onCredentialsRefreshed(refreshed); } catch (e) {
+            options.log?.warn?.("KIRO_MODELS", `onCredentialsRefreshed failed: ${e?.message || e}`);
+          }
+        }
+        try {
+          raw = await fetchKiroCatalogRaw(next, options.signal);
+          // Update the in-memory credential reference too so retry logic uses
+          // the fresh token consistently.
+          credentials.accessToken = next.accessToken;
+          if (next.refreshToken) credentials.refreshToken = next.refreshToken;
+        } catch (err2) {
+          options.log?.warn?.("KIRO_MODELS", `Retry after refresh failed: ${err2?.message || err2}`);
+          return null;
+        }
+      } else {
+        options.log?.warn?.("KIRO_MODELS", "Token refresh did not return accessToken");
+        return null;
+      }
+    } else {
+      options.log?.warn?.("KIRO_MODELS", `ListAvailableModels failed: ${err?.message || err}`);
+      return null;
+    }
+  }
+
+  const expanded = [];
+  for (const m of raw) {
+    if (!m || typeof m !== "object") continue;
+    const upstreamId = m.modelId || m.id;
+    if (!upstreamId) continue;
+    const display = formatDisplayName(m.modelName, upstreamId, m.rateMultiplier);
+    const ctx = Number(m?.tokenLimits?.maxInputTokens) || 200_000;
+    for (const v of buildVariants(upstreamId, display)) {
+      expanded.push({
+        ...v,
+        // Carry over context window + raw upstream metadata so the caller
+        // (e.g. the dashboard models endpoint) can render it.
+        contextLength: ctx,
+        rateMultiplier: Number.isFinite(Number(m.rateMultiplier)) ? Number(m.rateMultiplier) : 1.0,
+        upstreamModelId: upstreamId,
+        description: m.description || ""
+      });
+    }
+  }
+
+  catalogCache.set(key, {
+    expiresAt: now + CACHE_TTL_MS,
+    models: expanded,
+    rawModels: raw
+  });
+
+  return { models: expanded, rawModels: raw };
+}
+
+/**
+ * Drop any cached catalog for this credential. Call this after rotating /
+ * importing tokens so the next fetch is fresh.
+ */
+export function invalidateKiroModelCache(credentials) {
+  if (!credentials) return;
+  catalogCache.delete(cacheKey(credentials));
+}
+
+/**
+ * Drop the entire in-memory cache. Mostly for tests / manual debug.
+ */
+export function clearKiroModelCache() {
+  catalogCache.clear();
+}
--- a/open-sse/translator/request/openai-to-kiro.js
+++ b/open-sse/translator/request/openai-to-kiro.js
@ -5,6 +5,12 @@
 import { register } from "../index.js";
 import { FORMATS } from "../formats.js";
 import { v4 as uuidv4 } from "uuid";
+import {
+  resolveKiroModel,
+  isThinkingEnabled,
+  buildThinkingSystemPrefix,
+  KIRO_AGENTIC_SYSTEM_PROMPT
+} from "../../config/kiroConstants.js";

 /**
 * Convert OpenAI messages to Kiro format
@ -282,6 +288,20 @@ function convertMessages(messages, tools, model) {

 /**
 * Build Kiro payload from OpenAI format
+ *
+ * Two 9router-specific behaviours implemented here:
+ *
+ * 1. `-agentic` model suffix. Synthetic variant — same upstream model, but we
+ *    inject a chunked-write system prompt to keep large file writes under
+ *    Kiro's 2-3 minute server timeout. The suffix is stripped before being
+ *    sent upstream.
+ *
+ * 2. Thinking / reasoning. Kiro does not accept `thinking.type` or
+ *    `reasoning_effort` natively. The only way to enable reasoning is to
+ *    inject `<thinking_mode>enabled</thinking_mode>` into the user content
+ *    sent upstream. Detection covers Anthropic-Beta header, Claude API
+ *    `thinking`, OpenAI `reasoning_effort`, AMP/Cursor magic tags, and model
+ *    name hints.
 */
 export function buildKiroPayload(model, body, stream, credentials) {
  const messages = body.messages || [];
@ -290,14 +310,29 @@ export function buildKiroPayload(model, body, stream, credentials) {
  const temperature = body.temperature;
  const topP = body.top_p;

-  const { history, currentMessage } = convertMessages(messages, tools, model);
+  const { upstream: upstreamModel, agentic, thinking: modelImpliesThinking } = resolveKiroModel(model);
+  const thinkingEnabled = modelImpliesThinking || isThinkingEnabled(body, null, model);
+
+  const { history, currentMessage } = convertMessages(messages, tools, upstreamModel);

  const profileArn = credentials?.providerSpecificData?.profileArn || "";

  let finalContent = currentMessage?.userInputMessage?.content || "";
  const timestamp = new Date().toISOString();
-  finalContent = `[Context: Current time is ${timestamp}]\n\n${finalContent}`;
-  
+
+  // Build the system-prompt prefix that goes ABOVE the user message body.
+  // Order: thinking_mode tag first (so Kiro sees it before any user text),
+  // then context/timestamp marker, then optional agentic chunked-write prompt.
+  const prefixParts = [];
+  if (thinkingEnabled) {
+    prefixParts.push(buildThinkingSystemPrefix());
+  }
+  prefixParts.push(`[Context: Current time is ${timestamp}]`);
+  if (agentic) {
+    prefixParts.push(KIRO_AGENTIC_SYSTEM_PROMPT);
+  }
+  finalContent = `${prefixParts.join("\n\n")}\n\n${finalContent}`;
+
  const payload = {
    conversationState: {
      chatTriggerType: "MANUAL",
@ -305,8 +340,11 @@ export function buildKiroPayload(model, body, stream, credentials) {
      currentMessage: {
        userInputMessage: {
          content: finalContent,
-          modelId: model,
+          modelId: upstreamModel,
          origin: "AI_EDITOR",
+          ...(currentMessage?.userInputMessage?.images?.length > 0 && {
+            images: currentMessage.userInputMessage.images
+          }),
          ...(currentMessage?.userInputMessage?.userInputMessageContext && {
            userInputMessageContext: currentMessage.userInputMessage.userInputMessageContext
          })
@ -327,6 +365,12 @@ export function buildKiroPayload(model, body, stream, credentials) {
    if (topP !== undefined) payload.inferenceConfig.topP = topP;
  }

+  // Tag payload so the executor can route the upstream model id correctly.
+  Object.defineProperty(payload, "_kiroUpstreamModel", {
+    value: upstreamModel,
+    enumerable: false
+  });
+
  return payload;
 }

--- a/open-sse/translator/response/kiro-to-openai.js
+++ b/open-sse/translator/response/kiro-to-openai.js
@ -85,12 +85,18 @@ export function convertKiroToOpenAI(chunk, state) {
    return openaiChunk;
  }

-  // Handle reasoning/thinking events
+  // Handle reasoning/thinking events.
+  // Kiro emits reasoningContentEvent when the request enabled thinking via
+  // the <thinking_mode>enabled</thinking_mode> system-prompt tag. We surface
+  // this as OpenAI delta.reasoning_content so downstream translators can map
+  // it to Claude thinking blocks / Anthropic reasoning / etc.
  if (eventType === "reasoningContentEvent" || data.reasoningContentEvent) {
-    const content = data.reasoningContentEvent?.content || data.content || "";
+    const reasoning = data.reasoningContentEvent || data;
+    const content = (typeof reasoning === "string")
+      ? reasoning
+      : (reasoning.text || reasoning.content || data.content || "");
    if (!content) return null;

-    // Convert to thinking block format (Claude-style)
    const openaiChunk = {
      id: state.responseId,
      object: "chat.completion.chunk",
@ -100,7 +106,7 @@ export function convertKiroToOpenAI(chunk, state) {
        index: 0,
        delta: {
          ...(state.chunkIndex === 0 ? { role: "assistant" } : {}),
-          content: `<thinking>${content}</thinking>`
+          reasoning_content: content
        },
        finish_reason: null
      }]
--- a/src/app/api/providers/[id]/models/route.js
+++ b/src/app/api/providers/[id]/models/route.js
@ -5,6 +5,7 @@ import { KiroService } from "@/lib/oauth/services/kiro";
 import { GEMINI_CONFIG } from "@/lib/oauth/constants/oauth";
 import { refreshGoogleToken, updateProviderCredentials, refreshKiroToken } from "@/sse/services/tokenRefresh";
 import { resolveOllamaLocalHost } from "open-sse/config/providers.js";
+import { resolveKiroModels } from "open-sse/services/kiroModels.js";

 const GEMINI_CLI_MODELS_URL = "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels";

@ -288,52 +289,53 @@ export async function GET(request, { params }) {
      });
    }

-    // Kiro: Try dynamic model fetching first
+    // Kiro: Use resolveKiroModels to fetch live catalog + expand variants
    if (connection.provider === "kiro") {
+      const credentials = {
+        accessToken: connection.accessToken,
+        refreshToken: connection.refreshToken,
+        providerSpecificData: connection.providerSpecificData || {}
+      };
      let warning;
      try {
-        const kiroService = new KiroService();
-        const profileArn = connection.providerSpecificData?.profileArn;
-        const accessToken = connection.accessToken;
-        const refreshToken = connection.refreshToken;
-
-        if (accessToken && profileArn) {
-          try {
-            const models = await kiroService.listAvailableModels(accessToken, profileArn);
-            return NextResponse.json({
-              provider: connection.provider,
-              connectionId: connection.id,
-              models
-            });
-          } catch (error) {
-            if (error.message.includes("AccessDeniedException") && refreshToken) {
-              console.log("Kiro token invalid/expired. Attempting refresh...");
-              const refreshed = await refreshKiroToken(refreshToken, connection.providerSpecificData);
-
-              if (refreshed?.accessToken) {
-                await updateProviderCredentials(connection.id, {
-                  accessToken: refreshed.accessToken,
-                  refreshToken: refreshed.refreshToken || refreshToken,
-                  expiresIn: refreshed.expiresIn,
-                });
-
-                const models = await kiroService.listAvailableModels(refreshed.accessToken, profileArn);
-                return NextResponse.json({
-                  provider: connection.provider,
-                  connectionId: connection.id,
-                  models
-                });
-              }
+        const result = await resolveKiroModels(credentials, {
+          log: console,
+          onCredentialsRefreshed: async (refreshed) => {
+            if (refreshed?.accessToken) {
+              await updateProviderCredentials(connection.id, {
+                accessToken: refreshed.accessToken,
+                refreshToken: refreshed.refreshToken || connection.refreshToken,
+                expiresIn: refreshed.expiresIn,
+              });
+              connection.accessToken = refreshed.accessToken;
+              if (refreshed.refreshToken) connection.refreshToken = refreshed.refreshToken;
            }
-            throw error; // Let outer catch handle it
          }
+        });
+
+        if (result?.models?.length) {
+          const models = result.models.map((m) => ({
+            id: m.id,
+            name: m.name,
+            upstreamModelId: m.upstreamModelId,
+            contextLength: m.contextLength,
+            rateMultiplier: m.rateMultiplier,
+            capabilities: m.capabilities,
+            description: m.description
+          }));
+          return NextResponse.json({
+            provider: connection.provider,
+            connectionId: connection.id,
+            models
+          });
        }
+        warning = "Kiro returned no models; falling back to static catalog.";
      } catch (error) {
        warning = `Failed to fetch Kiro models: ${error.message}`;
        console.log("Failed to fetch Kiro models dynamically, falling back to static:", error.message);
      }

-      // Return empty dynamic list so UI falls back to static provider models.
+      // Empty dynamic list → UI falls back to static provider models.
      return NextResponse.json({
        provider: connection.provider,
        connectionId: connection.id,
--- a/src/app/api/v1/models/route.js
+++ b/src/app/api/v1/models/route.js
@ -7,6 +7,7 @@ import {
 } from "@/shared/constants/providers";
 import { getProviderConnections, getCombos, getCustomModels, getModelAliases } from "@/lib/localDb";
 import { getDisabledModels } from "@/lib/disabledModelsDb";
+import { resolveKiroModels } from "open-sse/services/kiroModels.js";

 const parseOpenAIStyleModels = (data) => {
  if (Array.isArray(data)) return data;
--- a/tests/unit/openai-to-kiro.test.js
+++ b/tests/unit/openai-to-kiro.test.js
@ -0,0 +1,146 @@
+/**
+ * Unit tests for open-sse/translator/request/openai-to-kiro.js
+ *
+ * Tests cover:
+ *  - buildKiroPayload() - basic message conversion
+ *  - Image forwarding fix: images in currentMessage must be included in payload
+ */
+
+import { describe, it, expect } from "vitest";
+import { buildKiroPayload } from "../../open-sse/translator/request/openai-to-kiro.js";
+
+describe("buildKiroPayload", () => {
+  describe("basic message conversion", () => {
+    it("should convert a simple text message", () => {
+      const body = {
+        messages: [{ role: "user", content: "Hello" }]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      expect(currentMsg.userInputMessage.content).toContain("Hello");
+      expect(currentMsg.userInputMessage.modelId).toBe("claude-sonnet-4.6");
+      expect(currentMsg.userInputMessage.origin).toBe("AI_EDITOR");
+    });
+
+    it("should not include images field when no images are present", () => {
+      const body = {
+        messages: [{ role: "user", content: "No images here" }]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      expect(currentMsg.userInputMessage.images).toBeUndefined();
+    });
+  });
+
+  describe("image forwarding", () => {
+    it("should forward base64 image from image_url content part", () => {
+      const fakeBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
+      const body = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Describe this image" },
+              { type: "image_url", image_url: { url: `data:image/png;base64,${fakeBase64}` } }
+            ]
+          }
+        ]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      expect(currentMsg.userInputMessage.images).toBeDefined();
+      expect(currentMsg.userInputMessage.images).toHaveLength(1);
+      expect(currentMsg.userInputMessage.images[0].format).toBe("png");
+      expect(currentMsg.userInputMessage.images[0].source.bytes).toBe(fakeBase64);
+    });
+
+    it("should forward multiple base64 images", () => {
+      const fakeBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
+      const body = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Compare these images" },
+              { type: "image_url", image_url: { url: `data:image/jpeg;base64,${fakeBase64}` } },
+              { type: "image_url", image_url: { url: `data:image/png;base64,${fakeBase64}` } }
+            ]
+          }
+        ]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      expect(currentMsg.userInputMessage.images).toHaveLength(2);
+      expect(currentMsg.userInputMessage.images[0].format).toBe("jpeg");
+      expect(currentMsg.userInputMessage.images[1].format).toBe("png");
+    });
+
+    it("should not include images field when images array is empty", () => {
+      const body = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Just text" }
+            ]
+          }
+        ]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      expect(currentMsg.userInputMessage.images).toBeUndefined();
+    });
+
+    it("should include both images and text content together", () => {
+      const fakeBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
+      const body = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "What is in this image?" },
+              { type: "image_url", image_url: { url: `data:image/jpeg;base64,${fakeBase64}` } }
+            ]
+          }
+        ]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      expect(currentMsg.userInputMessage.content).toContain("What is in this image?");
+      expect(currentMsg.userInputMessage.images).toHaveLength(1);
+    });
+
+    it("should treat http image URLs as text fallback (Kiro only supports base64)", () => {
+      const body = {
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: "Look at this" },
+              { type: "image_url", image_url: { url: "https://example.com/photo.jpg" } }
+            ]
+          }
+        ]
+      };
+
+      const result = buildKiroPayload("claude-sonnet-4.6", body, true, {});
+
+      const currentMsg = result.conversationState.currentMessage;
+      // HTTP URLs are not supported by Kiro — converted to text placeholder
+      expect(currentMsg.userInputMessage.images).toBeUndefined();
+      expect(currentMsg.userInputMessage.content).toContain("[Image: https://example.com/photo.jpg]");
+    });
+  });
+});