feat: add sticky round-robin routing strategy
Implements a "sticky" round-robin strategy that uses the same provider account for a configurable number of consecutive calls (default 3) before switching to the next one. This optimizes for prompt caching by reducing organization/account rotation. Adds a configuration input to the Profile settings page. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
f2abcc6585
commit
4f292aae63
3 changed files with 84 additions and 14 deletions
|
|
@ -76,6 +76,24 @@ export default function ProfilePage() {
|
|||
}
|
||||
};
|
||||
|
||||
const updateStickyLimit = async (limit) => {
|
||||
const numLimit = parseInt(limit);
|
||||
if (isNaN(numLimit) || numLimit < 1) return;
|
||||
|
||||
try {
|
||||
const res = await fetch("/api/settings", {
|
||||
method: "PATCH",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ stickyRoundRobinLimit: numLimit }),
|
||||
});
|
||||
if (res.ok) {
|
||||
setSettings(prev => ({ ...prev, stickyRoundRobinLimit: numLimit }));
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Failed to update sticky limit:", err);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="max-w-2xl mx-auto">
|
||||
<div className="flex flex-col gap-6">
|
||||
|
|
@ -165,9 +183,31 @@ export default function ProfilePage() {
|
|||
disabled={loading}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Sticky Round Robin Limit */}
|
||||
{settings.fallbackStrategy === "round-robin" && (
|
||||
<div className="flex items-center justify-between pt-2 border-t border-border/50">
|
||||
<div>
|
||||
<p className="font-medium">Sticky Limit</p>
|
||||
<p className="text-sm text-text-muted">
|
||||
Calls per account before switching
|
||||
</p>
|
||||
</div>
|
||||
<Input
|
||||
type="number"
|
||||
min="1"
|
||||
max="10"
|
||||
value={settings.stickyRoundRobinLimit || 3}
|
||||
onChange={(e) => updateStickyLimit(e.target.value)}
|
||||
disabled={loading}
|
||||
className="w-20 text-center"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<p className="text-xs text-text-muted italic pt-2 border-t border-border/50">
|
||||
{settings.fallbackStrategy === "round-robin"
|
||||
? "Currently distributing requests across all available accounts."
|
||||
? `Currently distributing requests across all available accounts with ${settings.stickyRoundRobinLimit || 3} calls per account.`
|
||||
: "Currently using accounts in priority order (Fill First)."}
|
||||
</p>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -39,7 +39,8 @@ const defaultData = {
|
|||
combos: [],
|
||||
apiKeys: [],
|
||||
settings: {
|
||||
cloudEnabled: false
|
||||
cloudEnabled: false,
|
||||
stickyRoundRobinLimit: 3
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -178,7 +179,8 @@ export async function createProviderConnection(data) {
|
|||
"displayName", "email", "globalPriority", "defaultModel",
|
||||
"accessToken", "refreshToken", "expiresAt", "tokenType",
|
||||
"scope", "idToken", "projectId", "apiKey", "testStatus",
|
||||
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn", "errorCode"
|
||||
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn", "errorCode",
|
||||
"consecutiveUseCount"
|
||||
];
|
||||
|
||||
for (const field of optionalFields) {
|
||||
|
|
@ -470,7 +472,8 @@ export async function cleanupProviderConnections() {
|
|||
"displayName", "email", "globalPriority", "defaultModel",
|
||||
"accessToken", "refreshToken", "expiresAt", "tokenType",
|
||||
"scope", "idToken", "projectId", "apiKey", "testStatus",
|
||||
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn"
|
||||
"lastTested", "lastError", "lastErrorAt", "rateLimitedUntil", "expiresIn",
|
||||
"consecutiveUseCount"
|
||||
];
|
||||
|
||||
let cleaned = 0;
|
||||
|
|
|
|||
|
|
@ -33,17 +33,44 @@ export async function getProviderCredentials(provider, excludeConnectionId = nul
|
|||
|
||||
let connection;
|
||||
if (strategy === "round-robin") {
|
||||
// Sort by lastUsed (nulls first) to pick the least recently used
|
||||
const sorted = [...availableConnections].sort((a, b) => {
|
||||
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
|
||||
if (!a.lastUsedAt) return -1;
|
||||
if (!b.lastUsedAt) return 1;
|
||||
return new Date(a.lastUsedAt) - new Date(b.lastUsedAt);
|
||||
});
|
||||
connection = sorted[0];
|
||||
const stickyLimit = settings.stickyRoundRobinLimit || 3;
|
||||
|
||||
// Update lastUsedAt asynchronously
|
||||
updateProviderConnection(connection.id, { lastUsedAt: new Date().toISOString() }).catch(() => {});
|
||||
// Sort by lastUsed (most recent first) to find current candidate
|
||||
const byRecency = [...availableConnections].sort((a, b) => {
|
||||
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
|
||||
if (!a.lastUsedAt) return 1;
|
||||
if (!b.lastUsedAt) return -1;
|
||||
return new Date(b.lastUsedAt) - new Date(a.lastUsedAt);
|
||||
});
|
||||
|
||||
const current = byRecency[0];
|
||||
const currentCount = current?.consecutiveUseCount || 0;
|
||||
|
||||
if (current && current.lastUsedAt && currentCount < stickyLimit) {
|
||||
// Stay with current account
|
||||
connection = current;
|
||||
// Update lastUsedAt and increment count
|
||||
updateProviderConnection(connection.id, {
|
||||
lastUsedAt: new Date().toISOString(),
|
||||
consecutiveUseCount: (connection.consecutiveUseCount || 0) + 1
|
||||
}).catch(() => {});
|
||||
} else {
|
||||
// Pick the least recently used (excluding current if possible)
|
||||
const sortedByOldest = [...availableConnections].sort((a, b) => {
|
||||
if (!a.lastUsedAt && !b.lastUsedAt) return (a.priority || 999) - (b.priority || 999);
|
||||
if (!a.lastUsedAt) return -1;
|
||||
if (!b.lastUsedAt) return 1;
|
||||
return new Date(a.lastUsedAt) - new Date(b.lastUsedAt);
|
||||
});
|
||||
|
||||
connection = sortedByOldest[0];
|
||||
|
||||
// Update lastUsedAt and reset count to 1
|
||||
updateProviderConnection(connection.id, {
|
||||
lastUsedAt: new Date().toISOString(),
|
||||
consecutiveUseCount: 1
|
||||
}).catch(() => {});
|
||||
}
|
||||
} else {
|
||||
// Default: fill-first (already sorted by priority in getProviderConnections)
|
||||
connection = availableConnections[0];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue