From 4774150ca7dc525262a50a0b2ee9adb0a9b8bb94 Mon Sep 17 00:00:00 2001 From: Anurag Saxena Date: Sun, 22 Mar 2026 22:24:58 -0400 Subject: [PATCH] fix: combo 503 cooldown wait before fallthrough + 406 on disabled creds (#382) - fixes #335: on transient 503/502/504, wait for short cooldown (up to 5s) before falling to next combo model, giving the provider a chance to recover rather than immediately skipping it - fixes #334: when all combo models have no active credentials, return 503 (Service Unavailable) instead of 406 (Not Acceptable), which is more accurate and retriable by clients --- open-sse/services/combo.js | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/open-sse/services/combo.js b/open-sse/services/combo.js index 1f134f3..d61c813 100644 --- a/open-sse/services/combo.js +++ b/open-sse/services/combo.js @@ -74,13 +74,22 @@ export async function handleComboChat({ body, models, handleSingleModel, log }) } // Check if should fallback to next model - const { shouldFallback } = checkFallbackError(result.status, errorText); - + const { shouldFallback, cooldownMs } = checkFallbackError(result.status, errorText); + if (!shouldFallback) { log.warn("COMBO", `Model ${modelStr} failed (no fallback)`, { status: result.status }); return result; } + // For transient errors (503/502/504), wait for cooldown before falling through + // so a briefly-overloaded provider gets a chance to recover rather than being + // skipped immediately (fixes: combo falls through on transient 503) + if (cooldownMs && cooldownMs > 0 && cooldownMs <= 5000 && + (result.status === 503 || result.status === 502 || result.status === 504)) { + log.info("COMBO", `Model ${modelStr} transient ${result.status}, waiting ${cooldownMs}ms before next`); + await new Promise(r => setTimeout(r, cooldownMs)); + } + // Fallback to next model lastError = errorText || String(result.status); if (!lastStatus) lastStatus = result.status; @@ -94,7 +103,11 @@ export async function handleComboChat({ body, models, handleSingleModel, log }) } // All models failed - const status = 406; + // Use 503 (Service Unavailable) rather than 406 (Not Acceptable) — 406 implies + // the request itself is invalid, but here the providers are simply unavailable + // or have no active credentials. 503 is more accurate and retryable by clients. + const allDisabled = lastError && lastError.toLowerCase().includes("no credentials"); + const status = allDisabled ? 503 : (lastStatus || 503); const msg = lastError || "All combo models unavailable"; if (earliestRetryAfter) {