test(litellm): add integration tests for retry and rate limiting

Solution-ID: SOL-1735410001 Issue-ID: ISS-1766921318981-21 Task-ID: T3
2026-03-27 20:00:44 +08:00 · 2025-12-29 15:14:03 +08:00
parent 5d31bfd9fa
commit 8578d2d426
2 changed files with 517 additions and 1 deletions
--- a/ccw/src/tools/litellm-executor.ts
+++ b/ccw/src/tools/litellm-executor.ts
@@ -19,6 +19,10 @@ export interface LiteLLMExecutionOptions {
  includeDirs?: string[]; // Additional directories for @patterns
  enableCache?: boolean; // Override endpoint cache setting
  onOutput?: (data: { type: string; data: string }) => void;
+  /** Number of retries after the initial attempt (default: 0) */
+  maxRetries?: number;
+  /** Base delay for exponential backoff in milliseconds (default: 1000) */
+  retryBaseDelayMs?: number;
 }

 export interface LiteLLMExecutionResult {
@@ -180,7 +184,15 @@ export async function executeLiteLLMEndpoint(
    }

    // Use litellm-client to call chat
-    const response = await client.chat(finalPrompt, endpoint.model);
+    const response = await callWithRetries(
+      () => client.chat(finalPrompt, endpoint.model),
+      {
+        maxRetries: options.maxRetries ?? 0,
+        baseDelayMs: options.retryBaseDelayMs ?? 1000,
+        onOutput,
+        rateLimitKey: `${provider.type}:${endpoint.model}`,
+      },
+    );

    if (onOutput) {
      onOutput({ type: 'stdout', data: response });
@@ -239,3 +251,74 @@ function getProviderBaseUrlEnvVarName(providerType: string): string | null {

  return envVarMap[providerType] || null;
 }
+
+const rateLimitRetryQueueNextAt = new Map<string, number>();
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+function isRateLimitError(errorMessage: string): boolean {
+  return /429|rate limit|too many requests/i.test(errorMessage);
+}
+
+function isRetryableError(errorMessage: string): boolean {
+  // Never retry auth/config errors
+  if (/401|403|unauthorized|forbidden/i.test(errorMessage)) {
+    return false;
+  }
+
+  // Retry rate limits, transient server errors, and network timeouts
+  return /(429|500|502|503|504|timeout|timed out|econnreset|enotfound|econnrefused|socket hang up)/i.test(
+    errorMessage,
+  );
+}
+
+async function callWithRetries(
+  call: () => Promise<string>,
+  options: {
+    maxRetries: number;
+    baseDelayMs: number;
+    onOutput?: (data: { type: string; data: string }) => void;
+    rateLimitKey: string;
+  },
+): Promise<string> {
+  const { maxRetries, baseDelayMs, onOutput, rateLimitKey } = options;
+  let attempt = 0;
+
+  while (true) {
+    try {
+      return await call();
+    } catch (err) {
+      const errorMessage = err instanceof Error ? err.message : String(err);
+
+      if (attempt >= maxRetries || !isRetryableError(errorMessage)) {
+        throw err;
+      }
+
+      const delayMs = baseDelayMs * 2 ** attempt;
+
+      if (onOutput) {
+        onOutput({
+          type: 'stderr',
+          data: `[LiteLLM retry ${attempt + 1}/${maxRetries}: waiting ${delayMs}ms] ${errorMessage}\n`,
+        });
+      }
+
+      attempt += 1;
+
+      if (isRateLimitError(errorMessage)) {
+        const now = Date.now();
+        const earliestAt = now + delayMs;
+        const queuedAt = rateLimitRetryQueueNextAt.get(rateLimitKey) ?? 0;
+        const scheduledAt = Math.max(queuedAt, earliestAt);
+        rateLimitRetryQueueNextAt.set(rateLimitKey, scheduledAt + delayMs);
+
+        await sleep(scheduledAt - now);
+        continue;
+      }
+
+      await sleep(delayMs);
+    }
+  }
+}