feat: add sticky round-robin routing strategy

Implements a "sticky" round-robin strategy that uses the same provider
account for a configurable number of consecutive calls (default 3)
before switching to the next one. This optimizes for prompt caching
by reducing organization/account rotation. Adds a configuration input
to the Profile settings page.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Catalin Stanciu 2026-01-06 22:35:21 +02:00 committed by decolua
parent f2abcc6585
commit 4f292aae63
3 changed files with 84 additions and 14 deletions

View file

@ -76,6 +76,24 @@ export default function ProfilePage() {
}
};
const updateStickyLimit = async (limit) => {
const numLimit = parseInt(limit);
if (isNaN(numLimit) || numLimit < 1) return;
try {
const res = await fetch("/api/settings", {
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ stickyRoundRobinLimit: numLimit }),
});
if (res.ok) {
setSettings(prev => ({ ...prev, stickyRoundRobinLimit: numLimit }));
}
} catch (err) {
console.error("Failed to update sticky limit:", err);
}
};
return (
<div className="max-w-2xl mx-auto">
<div className="flex flex-col gap-6">
@ -165,9 +183,31 @@ export default function ProfilePage() {
disabled={loading}
/>
</div>
{/* Sticky Round Robin Limit */}
{settings.fallbackStrategy === "round-robin" && (
<div className="flex items-center justify-between pt-2 border-t border-border/50">
<div>
<p className="font-medium">Sticky Limit</p>
<p className="text-sm text-text-muted">
Calls per account before switching
</p>
</div>
<Input
type="number"
min="1"
max="10"
value={settings.stickyRoundRobinLimit || 3}
onChange={(e) => updateStickyLimit(e.target.value)}
disabled={loading}
className="w-20 text-center"
/>
</div>
)}
<p className="text-xs text-text-muted italic pt-2 border-t border-border/50">
{settings.fallbackStrategy === "round-robin"
? "Currently distributing requests across all available accounts."
? `Currently distributing requests across all available accounts with ${settings.stickyRoundRobinLimit || 3} calls per account.`
: "Currently using accounts in priority order (Fill First)."}
</p>
</div>

View file

@ -39,7 +39,8 @@ const defaultData = {
combos: [],
apiKeys: [],
settings: {
cloudEnabled: false
cloudEnabled: false,
stickyRoundRobinLimit: 3
}
};
@ -178,7 +179,8 @@ export async function createProviderConnection(data) {
"displayName", "email", "globalPriority", "defaultModel",
"accessToken", "refreshToken", "expiresAt", "tokenType",
"scope", "idToken", "projectId", "apiKey", "testStatus",
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn", "errorCode"
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn", "errorCode",
"consecutiveUseCount"
];
for (const field of optionalFields) {
@ -470,7 +472,8 @@ export async function cleanupProviderConnections() {
"displayName", "email", "globalPriority", "defaultModel",
"accessToken", "refreshToken", "expiresAt", "tokenType",
"scope", "idToken", "projectId", "apiKey", "testStatus",
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn"
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn",
"consecutiveUseCount"
];
let cleaned = 0;

View file

@ -33,17 +33,44 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
let connection;
if (strategy === "round-robin") {
// Sort by lastUsed (nulls first) to pick the least recently used
const sorted = [...availableConnections].sort((a, b) => {
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
if (!a.lastUsedAt) return -1;
if (!b.lastUsedAt) return 1;
return new Date(a.lastUsedAt) - new Date(b.lastUsedAt);
});
connection = sorted[0];
const stickyLimit = settings.stickyRoundRobinLimit || 3;
// Update lastUsedAt asynchronously
updateProviderConnection(connection.id, { lastUsedAt: new Date().toISOString() }).catch(() => {});
// Sort by lastUsed (most recent first) to find current candidate
const byRecency = [...availableConnections].sort((a, b) => {
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
if (!a.lastUsedAt) return 1;
if (!b.lastUsedAt) return -1;
return new Date(b.lastUsedAt) - new Date(a.lastUsedAt);
});
const current = byRecency[0];
const currentCount = current?.consecutiveUseCount || 0;
if (current && current.lastUsedAt && currentCount < stickyLimit) {
// Stay with current account
connection = current;
// Update lastUsedAt and increment count
updateProviderConnection(connection.id, {
lastUsedAt: new Date().toISOString(),
consecutiveUseCount: (connection.consecutiveUseCount || 0) + 1
}).catch(() => {});
} else {
// Pick the least recently used (excluding current if possible)
const sortedByOldest = [...availableConnections].sort((a, b) => {
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
if (!a.lastUsedAt) return -1;
if (!b.lastUsedAt) return 1;
return new Date(a.lastUsedAt) - new Date(b.lastUsedAt);
});
connection = sortedByOldest[0];
// Update lastUsedAt and reset count to 1
updateProviderConnection(connection.id, {
lastUsedAt: new Date().toISOString(),
consecutiveUseCount: 1
}).catch(() => {});
}
} else {
// Default: fill-first (already sorted by priority in getProviderConnections)
connection = availableConnections[0];