From a065d5f64cff8d7dbdb3ce12969600defb388da3 Mon Sep 17 00:00:00 2001 From: yushen Date: Tue, 3 Feb 2026 18:27:21 +0800 Subject: [PATCH] fix(auth-profiles): add jittered cooldowns --- src/agent/auth-profiles/usage.test.ts | 39 ++++++++++++++++----------- src/agent/auth-profiles/usage.ts | 22 ++++++++++----- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/agent/auth-profiles/usage.test.ts b/src/agent/auth-profiles/usage.test.ts index daaeed8e..185f8adf 100644 --- a/src/agent/auth-profiles/usage.test.ts +++ b/src/agent/auth-profiles/usage.test.ts @@ -19,18 +19,24 @@ import type { ProfileUsageStats } from "./types.js"; describe("calculateCooldownMs", () => { it("applies exponential backoff with a 1h cap", () => { - expect(calculateCooldownMs(1)).toBe(60_000); // 1 min - expect(calculateCooldownMs(2)).toBe(5 * 60_000); // 5 min - expect(calculateCooldownMs(3)).toBe(25 * 60_000); // 25 min - expect(calculateCooldownMs(4)).toBe(60 * 60_000); // 1 hour (cap) - expect(calculateCooldownMs(5)).toBe(60 * 60_000); // 1 hour (cap) - expect(calculateCooldownMs(100)).toBe(60 * 60_000); // still capped + const max = () => 1; // equal-jitter max + expect(calculateCooldownMs(1, max)).toBe(60_000); // 1 min + expect(calculateCooldownMs(2, max)).toBe(5 * 60_000); // 5 min + expect(calculateCooldownMs(3, max)).toBe(25 * 60_000); // 25 min + expect(calculateCooldownMs(4, max)).toBe(60 * 60_000); // 1 hour (cap) + expect(calculateCooldownMs(5, max)).toBe(60 * 60_000); // 1 hour (cap) + expect(calculateCooldownMs(100, max)).toBe(60 * 60_000); // still capped }); it("returns 0 for errorCount <= 0", () => { expect(calculateCooldownMs(0)).toBe(0); expect(calculateCooldownMs(-1)).toBe(0); }); + + it("applies equal jitter with a 50% floor", () => { + const min = () => 0; + expect(calculateCooldownMs(1, min)).toBe(30_000); // 50% of 1 min + }); }); // ============================================================ @@ -40,11 +46,12 @@ describe("calculateCooldownMs", () => { describe("calculateBillingDisableMs", () => { it("applies exponential backoff with a 24h cap", () => { const h = 60 * 60 * 1000; - expect(calculateBillingDisableMs(1)).toBe(5 * h); // 5h - expect(calculateBillingDisableMs(2)).toBe(10 * h); // 10h - expect(calculateBillingDisableMs(3)).toBe(20 * h); // 20h - expect(calculateBillingDisableMs(4)).toBe(24 * h); // 24h (cap) - expect(calculateBillingDisableMs(5)).toBe(24 * h); // still capped + const max = () => 1; + expect(calculateBillingDisableMs(1, max)).toBe(5 * h); // 5h + expect(calculateBillingDisableMs(2, max)).toBe(10 * h); // 10h + expect(calculateBillingDisableMs(3, max)).toBe(20 * h); // 20h + expect(calculateBillingDisableMs(4, max)).toBe(24 * h); // 24h (cap) + expect(calculateBillingDisableMs(5, max)).toBe(24 * h); // still capped }); it("returns 0 for count <= 0", () => { @@ -94,7 +101,7 @@ describe("computeNextProfileUsageStats", () => { const now = 1_700_000_000_000; it("increments errorCount and sets cooldown for non-billing failure", () => { - const next = computeNextProfileUsageStats({}, "rate_limit", now); + const next = computeNextProfileUsageStats({}, "rate_limit", now, () => 1); expect(next.errorCount).toBe(1); expect(next.lastFailureAt).toBe(now); expect(next.cooldownUntil).toBe(now + COOLDOWN_BASE_MS); @@ -108,14 +115,14 @@ describe("computeNextProfileUsageStats", () => { lastFailureAt: now - 1000, failureCounts: { rate_limit: 2 }, }; - const next = computeNextProfileUsageStats(stats, "rate_limit", now); + const next = computeNextProfileUsageStats(stats, "rate_limit", now, () => 1); expect(next.errorCount).toBe(3); // Error 3 -> 25 min cooldown expect(next.cooldownUntil).toBe(now + 25 * 60_000); }); it("sets disabledUntil for billing failures (~5h by default)", () => { - const next = computeNextProfileUsageStats({}, "billing", now); + const next = computeNextProfileUsageStats({}, "billing", now, () => 1); expect(next.errorCount).toBe(1); expect(next.disabledUntil).toBe(now + 5 * 60 * 60 * 1000); expect(next.disabledReason).toBe("billing"); @@ -129,7 +136,7 @@ describe("computeNextProfileUsageStats", () => { lastFailureAt: oldFailure, failureCounts: { auth: 3, rate_limit: 2 }, }; - const next = computeNextProfileUsageStats(stats, "auth", now); + const next = computeNextProfileUsageStats(stats, "auth", now, () => 1); // Counters reset, so this is treated as error #1 expect(next.errorCount).toBe(1); expect(next.failureCounts?.auth).toBe(1); @@ -141,7 +148,7 @@ describe("computeNextProfileUsageStats", () => { errorCount: 10, lastFailureAt: now - 1000, }; - const next = computeNextProfileUsageStats(stats, "unknown", now); + const next = computeNextProfileUsageStats(stats, "unknown", now, () => 1); expect(next.cooldownUntil).toBe(now + COOLDOWN_MAX_MS); }); }); diff --git a/src/agent/auth-profiles/usage.ts b/src/agent/auth-profiles/usage.ts index 181c78de..5aad992b 100644 --- a/src/agent/auth-profiles/usage.ts +++ b/src/agent/auth-profiles/usage.ts @@ -44,10 +44,18 @@ export function isProfileInCooldown(stats: ProfileUsageStats, now?: number): boo * * Formula: min(COOLDOWN_MAX_MS, COOLDOWN_BASE_MS * COOLDOWN_FACTOR ^ min(errorCount - 1, 3)) */ -export function calculateCooldownMs(errorCount: number): number { +function applyEqualJitter(baseMs: number, rng?: () => number): number { + if (baseMs <= 0) return 0; + const rand = Math.min(1, Math.max(0, (rng ?? Math.random)())); + const half = Math.floor(baseMs / 2); + return half + Math.floor(rand * (baseMs - half)); +} + +export function calculateCooldownMs(errorCount: number, rng?: () => number): number { if (errorCount <= 0) return 0; const exponent = Math.min(errorCount - 1, 3); - return Math.min(COOLDOWN_MAX_MS, COOLDOWN_BASE_MS * COOLDOWN_FACTOR ** exponent); + const base = Math.min(COOLDOWN_MAX_MS, COOLDOWN_BASE_MS * COOLDOWN_FACTOR ** exponent); + return applyEqualJitter(base, rng); } /** @@ -56,13 +64,14 @@ export function calculateCooldownMs(errorCount: number): number { * * Formula: min(BILLING_MAX_HOURS, BILLING_BACKOFF_HOURS * 2 ^ (count - 1)) * hours_to_ms */ -export function calculateBillingDisableMs(billingFailCount: number): number { +export function calculateBillingDisableMs(billingFailCount: number, rng?: () => number): number { if (billingFailCount <= 0) return 0; const hours = Math.min( BILLING_MAX_HOURS, BILLING_BACKOFF_HOURS * 2 ** (billingFailCount - 1), ); - return hours * 60 * 60 * 1000; + const base = hours * 60 * 60 * 1000; + return applyEqualJitter(base, rng); } // ============================================================ @@ -83,6 +92,7 @@ export function computeNextProfileUsageStats( stats: ProfileUsageStats, reason: AuthProfileFailureReason, now?: number, + rng?: () => number, ): ProfileUsageStats { const ts = now ?? Date.now(); const next = { ...stats }; @@ -106,11 +116,11 @@ export function computeNextProfileUsageStats( // Apply cooldown based on failure reason if (reason === "billing") { const billingCount = next.failureCounts.billing ?? 1; - const disableMs = calculateBillingDisableMs(billingCount); + const disableMs = calculateBillingDisableMs(billingCount, rng); next.disabledUntil = ts + disableMs; next.disabledReason = "billing"; } else { - const cooldownMs = calculateCooldownMs(next.errorCount); + const cooldownMs = calculateCooldownMs(next.errorCount, rng); next.cooldownUntil = ts + cooldownMs; }