mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-04 01:40:45 +08:00
357 lines
9.9 KiB
TypeScript
357 lines
9.9 KiB
TypeScript
/**
|
|
* LiteLLM Executor - Execute LiteLLM endpoints with context caching
|
|
* Integrates with context-cache for file packing and LiteLLM client for API calls
|
|
*/
|
|
|
|
import { getLiteLLMClient } from './litellm-client.js';
|
|
import { handler as contextCacheHandler } from './context-cache.js';
|
|
import {
|
|
findEndpointById,
|
|
getProviderWithResolvedEnvVars,
|
|
} from '../config/litellm-api-config-manager.js';
|
|
import type { CustomEndpoint, ProviderCredential } from '../types/litellm-api-config.js';
|
|
import type { CliOutputUnit } from './cli-output-converter.js';
|
|
|
|
export interface LiteLLMExecutionOptions {
|
|
prompt: string;
|
|
endpointId: string; // Custom endpoint ID (e.g., "my-gpt4o")
|
|
baseDir: string; // Project base directory
|
|
cwd?: string; // Working directory for file resolution
|
|
includeDirs?: string[]; // Additional directories for @patterns
|
|
enableCache?: boolean; // Override endpoint cache setting
|
|
onOutput?: (unit: CliOutputUnit) => void;
|
|
/** Number of retries after the initial attempt (default: 0) */
|
|
maxRetries?: number;
|
|
/** Base delay for exponential backoff in milliseconds (default: 1000) */
|
|
retryBaseDelayMs?: number;
|
|
}
|
|
|
|
export interface LiteLLMExecutionResult {
|
|
success: boolean;
|
|
output: string;
|
|
model: string;
|
|
provider: string;
|
|
cacheUsed: boolean;
|
|
cachedFiles?: string[];
|
|
error?: string;
|
|
}
|
|
|
|
/**
|
|
* Extract @patterns from prompt text
|
|
*/
|
|
export function extractPatterns(prompt: string): string[] {
|
|
// Match @path patterns: @src/**/*.ts, @CLAUDE.md, @../shared/**/*
|
|
const regex = /@([^\s]+)/g;
|
|
const patterns: string[] = [];
|
|
let match;
|
|
while ((match = regex.exec(prompt)) !== null) {
|
|
patterns.push('@' + match[1]);
|
|
}
|
|
return patterns;
|
|
}
|
|
|
|
/**
|
|
* Execute LiteLLM endpoint with optional context caching
|
|
*/
|
|
export async function executeLiteLLMEndpoint(
|
|
options: LiteLLMExecutionOptions
|
|
): Promise<LiteLLMExecutionResult> {
|
|
const { prompt, endpointId, baseDir, cwd, includeDirs, enableCache, onOutput } = options;
|
|
|
|
// 1. Find endpoint configuration
|
|
const endpoint = findEndpointById(baseDir, endpointId);
|
|
if (!endpoint) {
|
|
return {
|
|
success: false,
|
|
output: '',
|
|
model: '',
|
|
provider: '',
|
|
cacheUsed: false,
|
|
error: `Endpoint not found: ${endpointId}`,
|
|
};
|
|
}
|
|
|
|
// 2. Get provider with resolved env vars
|
|
const provider = getProviderWithResolvedEnvVars(baseDir, endpoint.providerId);
|
|
if (!provider) {
|
|
return {
|
|
success: false,
|
|
output: '',
|
|
model: '',
|
|
provider: '',
|
|
cacheUsed: false,
|
|
error: `Provider not found: ${endpoint.providerId}`,
|
|
};
|
|
}
|
|
|
|
// Verify API key is available
|
|
if (!provider.resolvedApiKey) {
|
|
return {
|
|
success: false,
|
|
output: '',
|
|
model: endpoint.model,
|
|
provider: provider.type,
|
|
cacheUsed: false,
|
|
error: `API key not configured for provider: ${provider.name}`,
|
|
};
|
|
}
|
|
|
|
// 3. Process context cache if enabled
|
|
let finalPrompt = prompt;
|
|
let cacheUsed = false;
|
|
let cachedFiles: string[] = [];
|
|
|
|
const shouldCache = enableCache ?? endpoint.cacheStrategy.enabled;
|
|
if (shouldCache) {
|
|
const patterns = extractPatterns(prompt);
|
|
if (patterns.length > 0) {
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[Context cache: Found ${patterns.length} @patterns]\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
// Pack files into cache
|
|
const packResult = await contextCacheHandler({
|
|
operation: 'pack',
|
|
patterns,
|
|
cwd: cwd || process.cwd(),
|
|
include_dirs: includeDirs,
|
|
ttl: endpoint.cacheStrategy.ttlMinutes * 60 * 1000,
|
|
max_file_size: endpoint.cacheStrategy.maxSizeKB * 1024,
|
|
});
|
|
|
|
if (packResult.success && packResult.result) {
|
|
const pack = packResult.result as any;
|
|
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[Context cache: Packed ${pack.files_packed} files, ${pack.total_bytes} bytes]\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
// Read cached content
|
|
const readResult = await contextCacheHandler({
|
|
operation: 'read',
|
|
session_id: pack.session_id,
|
|
limit: endpoint.cacheStrategy.maxSizeKB * 1024,
|
|
});
|
|
|
|
if (readResult.success && readResult.result) {
|
|
const read = readResult.result as any;
|
|
// Prepend cached content to prompt
|
|
finalPrompt = `${read.content}\n\n---\n\n${prompt}`;
|
|
cacheUsed = true;
|
|
cachedFiles = pack.files_packed ? Array(pack.files_packed).fill('...') : [];
|
|
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[Context cache: Applied to prompt]\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
}
|
|
} else if (packResult.error) {
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[Context cache warning: ${packResult.error}]\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 4. Call LiteLLM
|
|
try {
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[LiteLLM: Calling ${provider.type}/${endpoint.model}]\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
const client = getLiteLLMClient({
|
|
pythonPath: 'python',
|
|
timeout: 120000, // 2 minutes
|
|
});
|
|
|
|
// Configure provider credentials via environment
|
|
// LiteLLM uses standard env vars like OPENAI_API_KEY, ANTHROPIC_API_KEY
|
|
const envVarName = getProviderEnvVarName(provider.type);
|
|
if (envVarName) {
|
|
process.env[envVarName] = provider.resolvedApiKey;
|
|
}
|
|
|
|
// Set base URL if custom
|
|
if (provider.apiBase) {
|
|
const baseUrlEnvVar = getProviderBaseUrlEnvVarName(provider.type);
|
|
if (baseUrlEnvVar) {
|
|
process.env[baseUrlEnvVar] = provider.apiBase;
|
|
}
|
|
}
|
|
|
|
// Set custom headers from provider advanced settings
|
|
if (provider.advancedSettings?.customHeaders) {
|
|
process.env['CCW_LITELLM_EXTRA_HEADERS'] = JSON.stringify(provider.advancedSettings.customHeaders);
|
|
} else {
|
|
// Clear any previous custom headers
|
|
delete process.env['CCW_LITELLM_EXTRA_HEADERS'];
|
|
}
|
|
|
|
// Use litellm-client to call chat
|
|
const response = await callWithRetries(
|
|
() => client.chat(finalPrompt, endpoint.model),
|
|
{
|
|
maxRetries: options.maxRetries ?? 0,
|
|
baseDelayMs: options.retryBaseDelayMs ?? 1000,
|
|
onOutput,
|
|
rateLimitKey: `${provider.type}:${endpoint.model}`,
|
|
},
|
|
);
|
|
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stdout',
|
|
content: response,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
output: response,
|
|
model: endpoint.model,
|
|
provider: provider.type,
|
|
cacheUsed,
|
|
cachedFiles,
|
|
};
|
|
} catch (error) {
|
|
const errorMsg = (error as Error).message;
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[LiteLLM error: ${errorMsg}]\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return {
|
|
success: false,
|
|
output: '',
|
|
model: endpoint.model,
|
|
provider: provider.type,
|
|
cacheUsed,
|
|
error: errorMsg,
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get environment variable name for provider API key
|
|
*/
|
|
function getProviderEnvVarName(providerType: string): string | null {
|
|
const envVarMap: Record<string, string> = {
|
|
openai: 'OPENAI_API_KEY',
|
|
anthropic: 'ANTHROPIC_API_KEY',
|
|
google: 'GOOGLE_API_KEY',
|
|
azure: 'AZURE_API_KEY',
|
|
mistral: 'MISTRAL_API_KEY',
|
|
deepseek: 'DEEPSEEK_API_KEY',
|
|
};
|
|
|
|
return envVarMap[providerType] || null;
|
|
}
|
|
|
|
/**
|
|
* Get environment variable name for provider base URL
|
|
*/
|
|
function getProviderBaseUrlEnvVarName(providerType: string): string | null {
|
|
const envVarMap: Record<string, string> = {
|
|
openai: 'OPENAI_API_BASE',
|
|
anthropic: 'ANTHROPIC_API_BASE',
|
|
azure: 'AZURE_API_BASE',
|
|
};
|
|
|
|
return envVarMap[providerType] || null;
|
|
}
|
|
|
|
const rateLimitRetryQueueNextAt = new Map<string, number>();
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
function isRateLimitError(errorMessage: string): boolean {
|
|
return /429|rate limit|too many requests/i.test(errorMessage);
|
|
}
|
|
|
|
function isRetryableError(errorMessage: string): boolean {
|
|
// Never retry auth/config errors
|
|
if (/401|403|unauthorized|forbidden/i.test(errorMessage)) {
|
|
return false;
|
|
}
|
|
|
|
// Retry rate limits, transient server errors, and network timeouts
|
|
return /(429|500|502|503|504|timeout|timed out|econnreset|enotfound|econnrefused|socket hang up)/i.test(
|
|
errorMessage,
|
|
);
|
|
}
|
|
|
|
async function callWithRetries(
|
|
call: () => Promise<string>,
|
|
options: {
|
|
maxRetries: number;
|
|
baseDelayMs: number;
|
|
onOutput?: (unit: CliOutputUnit) => void;
|
|
rateLimitKey: string;
|
|
},
|
|
): Promise<string> {
|
|
const { maxRetries, baseDelayMs, onOutput, rateLimitKey } = options;
|
|
let attempt = 0;
|
|
|
|
while (true) {
|
|
try {
|
|
return await call();
|
|
} catch (err) {
|
|
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
|
|
if (attempt >= maxRetries || !isRetryableError(errorMessage)) {
|
|
throw err;
|
|
}
|
|
|
|
const delayMs = baseDelayMs * 2 ** attempt;
|
|
|
|
if (onOutput) {
|
|
onOutput({
|
|
type: 'stderr',
|
|
content: `[LiteLLM retry ${attempt + 1}/${maxRetries}: waiting ${delayMs}ms] ${errorMessage}\n`,
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
attempt += 1;
|
|
|
|
if (isRateLimitError(errorMessage)) {
|
|
const now = Date.now();
|
|
const earliestAt = now + delayMs;
|
|
const queuedAt = rateLimitRetryQueueNextAt.get(rateLimitKey) ?? 0;
|
|
const scheduledAt = Math.max(queuedAt, earliestAt);
|
|
rateLimitRetryQueueNextAt.set(rateLimitKey, scheduledAt + delayMs);
|
|
|
|
await sleep(scheduledAt - now);
|
|
continue;
|
|
}
|
|
|
|
await sleep(delayMs);
|
|
}
|
|
}
|
|
}
|