mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-11 02:33:51 +08:00
feat: Add unified LiteLLM API management with dashboard UI and CLI integration
- Create ccw-litellm Python package with AbstractEmbedder and AbstractLLMClient interfaces - Add BaseEmbedder abstraction and factory pattern to codex-lens for pluggable backends - Implement API Settings dashboard page for provider credentials and custom endpoints - Add REST API routes for CRUD operations on providers and endpoints - Extend CLI with --model parameter for custom endpoint routing - Integrate existing context-cache for @pattern file resolution - Add provider model registry with predefined models per provider type - Include i18n translations (en/zh) for all new UI elements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,10 @@ import { spawn, ChildProcess } from 'child_process';
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync, unlinkSync, readdirSync, statSync } from 'fs';
|
||||
import { join, relative } from 'path';
|
||||
|
||||
// LiteLLM integration
|
||||
import { executeLiteLLMEndpoint } from './litellm-executor.js';
|
||||
import { findEndpointById } from '../config/litellm-api-config-manager.js';
|
||||
|
||||
// Native resume support
|
||||
import {
|
||||
trackNewSession,
|
||||
@@ -592,6 +596,66 @@ async function executeCliTool(
|
||||
const workingDir = cd || process.cwd();
|
||||
ensureHistoryDir(workingDir); // Ensure history directory exists
|
||||
|
||||
// NEW: Check if model is a custom LiteLLM endpoint ID
|
||||
if (model && !['gemini', 'qwen', 'codex'].includes(tool)) {
|
||||
const endpoint = findEndpointById(workingDir, model);
|
||||
if (endpoint) {
|
||||
// Route to LiteLLM executor
|
||||
if (onOutput) {
|
||||
onOutput({ type: 'stderr', data: `[Routing to LiteLLM endpoint: ${model}]\n` });
|
||||
}
|
||||
|
||||
const result = await executeLiteLLMEndpoint({
|
||||
prompt,
|
||||
endpointId: model,
|
||||
baseDir: workingDir,
|
||||
cwd: cd,
|
||||
includeDirs: includeDirs ? includeDirs.split(',').map(d => d.trim()) : undefined,
|
||||
enableCache: true,
|
||||
onOutput: onOutput || undefined,
|
||||
});
|
||||
|
||||
// Convert LiteLLM result to ExecutionOutput format
|
||||
const startTime = Date.now();
|
||||
const endTime = Date.now();
|
||||
const duration = endTime - startTime;
|
||||
|
||||
const execution: ExecutionRecord = {
|
||||
id: customId || `${Date.now()}-litellm`,
|
||||
timestamp: new Date(startTime).toISOString(),
|
||||
tool: 'litellm',
|
||||
model: result.model,
|
||||
mode,
|
||||
prompt,
|
||||
status: result.success ? 'success' : 'error',
|
||||
exit_code: result.success ? 0 : 1,
|
||||
duration_ms: duration,
|
||||
output: {
|
||||
stdout: result.output,
|
||||
stderr: result.error || '',
|
||||
truncated: false,
|
||||
},
|
||||
};
|
||||
|
||||
const conversation = convertToConversation(execution);
|
||||
|
||||
// Try to save to history
|
||||
try {
|
||||
saveConversation(workingDir, conversation);
|
||||
} catch (err) {
|
||||
console.error('[CLI Executor] Failed to save LiteLLM history:', (err as Error).message);
|
||||
}
|
||||
|
||||
return {
|
||||
success: result.success,
|
||||
execution,
|
||||
conversation,
|
||||
stdout: result.output,
|
||||
stderr: result.error || '',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Get SQLite store for native session lookup
|
||||
const store = await getSqliteStore(workingDir);
|
||||
|
||||
|
||||
246
ccw/src/tools/litellm-client.ts
Normal file
246
ccw/src/tools/litellm-client.ts
Normal file
@@ -0,0 +1,246 @@
|
||||
/**
|
||||
* LiteLLM Client - Bridge between CCW and ccw-litellm Python package
|
||||
* Provides LLM chat and embedding capabilities via spawned Python process
|
||||
*
|
||||
* Features:
|
||||
* - Chat completions with multiple models
|
||||
* - Text embeddings generation
|
||||
* - Configuration management
|
||||
* - JSON protocol communication
|
||||
*/
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
|
||||
export interface LiteLLMConfig {
|
||||
pythonPath?: string; // Default 'python'
|
||||
configPath?: string; // Configuration file path
|
||||
timeout?: number; // Default 60000ms
|
||||
}
|
||||
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface ChatResponse {
|
||||
content: string;
|
||||
model: string;
|
||||
usage?: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface EmbedResponse {
|
||||
vectors: number[][];
|
||||
dimensions: number;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface LiteLLMStatus {
|
||||
available: boolean;
|
||||
version?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class LiteLLMClient {
|
||||
private pythonPath: string;
|
||||
private configPath?: string;
|
||||
private timeout: number;
|
||||
|
||||
constructor(config: LiteLLMConfig = {}) {
|
||||
this.pythonPath = config.pythonPath || 'python';
|
||||
this.configPath = config.configPath;
|
||||
this.timeout = config.timeout || 60000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute Python ccw-litellm command
|
||||
*/
|
||||
private async executePython(args: string[], options: { timeout?: number } = {}): Promise<string> {
|
||||
const timeout = options.timeout || this.timeout;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn(this.pythonPath, ['-m', 'ccw_litellm.cli', ...args], {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
env: { ...process.env }
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
let timedOut = false;
|
||||
|
||||
// Set up timeout
|
||||
const timeoutId = setTimeout(() => {
|
||||
timedOut = true;
|
||||
proc.kill('SIGTERM');
|
||||
reject(new Error(`Command timed out after ${timeout}ms`));
|
||||
}, timeout);
|
||||
|
||||
proc.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
proc.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
proc.on('error', (error) => {
|
||||
clearTimeout(timeoutId);
|
||||
reject(new Error(`Failed to spawn Python process: ${error.message}`));
|
||||
});
|
||||
|
||||
proc.on('close', (code) => {
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (timedOut) {
|
||||
return; // Already rejected
|
||||
}
|
||||
|
||||
if (code === 0) {
|
||||
resolve(stdout.trim());
|
||||
} else {
|
||||
const errorMsg = stderr.trim() || `Process exited with code ${code}`;
|
||||
reject(new Error(errorMsg));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if ccw-litellm is available
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
try {
|
||||
await this.executePython(['version'], { timeout: 5000 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get status information
|
||||
*/
|
||||
async getStatus(): Promise<LiteLLMStatus> {
|
||||
try {
|
||||
const output = await this.executePython(['version'], { timeout: 5000 });
|
||||
return {
|
||||
available: true,
|
||||
version: output.trim()
|
||||
};
|
||||
} catch (error: any) {
|
||||
return {
|
||||
available: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current configuration
|
||||
*/
|
||||
async getConfig(): Promise<any> {
|
||||
const output = await this.executePython(['config', '--json']);
|
||||
return JSON.parse(output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for texts
|
||||
*/
|
||||
async embed(texts: string[], model: string = 'default'): Promise<EmbedResponse> {
|
||||
if (!texts || texts.length === 0) {
|
||||
throw new Error('texts array cannot be empty');
|
||||
}
|
||||
|
||||
const args = ['embed', '--model', model, '--output', 'json'];
|
||||
|
||||
// Add texts as arguments
|
||||
for (const text of texts) {
|
||||
args.push(text);
|
||||
}
|
||||
|
||||
const output = await this.executePython(args, { timeout: this.timeout * 2 });
|
||||
const vectors = JSON.parse(output);
|
||||
|
||||
return {
|
||||
vectors,
|
||||
dimensions: vectors[0]?.length || 0,
|
||||
model
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Chat with LLM
|
||||
*/
|
||||
async chat(message: string, model: string = 'default'): Promise<string> {
|
||||
if (!message) {
|
||||
throw new Error('message cannot be empty');
|
||||
}
|
||||
|
||||
const args = ['chat', '--model', model, message];
|
||||
return this.executePython(args, { timeout: this.timeout * 2 });
|
||||
}
|
||||
|
||||
/**
|
||||
* Multi-turn chat with messages array
|
||||
*/
|
||||
async chatMessages(messages: ChatMessage[], model: string = 'default'): Promise<ChatResponse> {
|
||||
if (!messages || messages.length === 0) {
|
||||
throw new Error('messages array cannot be empty');
|
||||
}
|
||||
|
||||
// For now, just use the last user message
|
||||
// TODO: Implement full message history support in ccw-litellm
|
||||
const lastMessage = messages[messages.length - 1];
|
||||
const content = await this.chat(lastMessage.content, model);
|
||||
|
||||
return {
|
||||
content,
|
||||
model,
|
||||
usage: undefined // TODO: Add usage tracking
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
let _client: LiteLLMClient | null = null;
|
||||
|
||||
/**
|
||||
* Get or create singleton LiteLLM client
|
||||
*/
|
||||
export function getLiteLLMClient(config?: LiteLLMConfig): LiteLLMClient {
|
||||
if (!_client) {
|
||||
_client = new LiteLLMClient(config);
|
||||
}
|
||||
return _client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if LiteLLM is available
|
||||
*/
|
||||
export async function checkLiteLLMAvailable(): Promise<boolean> {
|
||||
try {
|
||||
const client = getLiteLLMClient();
|
||||
return await client.isAvailable();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get LiteLLM status
|
||||
*/
|
||||
export async function getLiteLLMStatus(): Promise<LiteLLMStatus> {
|
||||
try {
|
||||
const client = getLiteLLMClient();
|
||||
return await client.getStatus();
|
||||
} catch (error: any) {
|
||||
return {
|
||||
available: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
241
ccw/src/tools/litellm-executor.ts
Normal file
241
ccw/src/tools/litellm-executor.ts
Normal file
@@ -0,0 +1,241 @@
|
||||
/**
|
||||
* LiteLLM Executor - Execute LiteLLM endpoints with context caching
|
||||
* Integrates with context-cache for file packing and LiteLLM client for API calls
|
||||
*/
|
||||
|
||||
import { getLiteLLMClient } from './litellm-client.js';
|
||||
import { handler as contextCacheHandler } from './context-cache.js';
|
||||
import {
|
||||
findEndpointById,
|
||||
getProviderWithResolvedEnvVars,
|
||||
} from '../config/litellm-api-config-manager.js';
|
||||
import type { CustomEndpoint, ProviderCredential } from '../types/litellm-api-config.js';
|
||||
|
||||
export interface LiteLLMExecutionOptions {
|
||||
prompt: string;
|
||||
endpointId: string; // Custom endpoint ID (e.g., "my-gpt4o")
|
||||
baseDir: string; // Project base directory
|
||||
cwd?: string; // Working directory for file resolution
|
||||
includeDirs?: string[]; // Additional directories for @patterns
|
||||
enableCache?: boolean; // Override endpoint cache setting
|
||||
onOutput?: (data: { type: string; data: string }) => void;
|
||||
}
|
||||
|
||||
export interface LiteLLMExecutionResult {
|
||||
success: boolean;
|
||||
output: string;
|
||||
model: string;
|
||||
provider: string;
|
||||
cacheUsed: boolean;
|
||||
cachedFiles?: string[];
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract @patterns from prompt text
|
||||
*/
|
||||
export function extractPatterns(prompt: string): string[] {
|
||||
// Match @path patterns: @src/**/*.ts, @CLAUDE.md, @../shared/**/*
|
||||
const regex = /@([^\s]+)/g;
|
||||
const patterns: string[] = [];
|
||||
let match;
|
||||
while ((match = regex.exec(prompt)) !== null) {
|
||||
patterns.push('@' + match[1]);
|
||||
}
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute LiteLLM endpoint with optional context caching
|
||||
*/
|
||||
export async function executeLiteLLMEndpoint(
|
||||
options: LiteLLMExecutionOptions
|
||||
): Promise<LiteLLMExecutionResult> {
|
||||
const { prompt, endpointId, baseDir, cwd, includeDirs, enableCache, onOutput } = options;
|
||||
|
||||
// 1. Find endpoint configuration
|
||||
const endpoint = findEndpointById(baseDir, endpointId);
|
||||
if (!endpoint) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
model: '',
|
||||
provider: '',
|
||||
cacheUsed: false,
|
||||
error: `Endpoint not found: ${endpointId}`,
|
||||
};
|
||||
}
|
||||
|
||||
// 2. Get provider with resolved env vars
|
||||
const provider = getProviderWithResolvedEnvVars(baseDir, endpoint.providerId);
|
||||
if (!provider) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
model: '',
|
||||
provider: '',
|
||||
cacheUsed: false,
|
||||
error: `Provider not found: ${endpoint.providerId}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Verify API key is available
|
||||
if (!provider.resolvedApiKey) {
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
model: endpoint.model,
|
||||
provider: provider.type,
|
||||
cacheUsed: false,
|
||||
error: `API key not configured for provider: ${provider.name}`,
|
||||
};
|
||||
}
|
||||
|
||||
// 3. Process context cache if enabled
|
||||
let finalPrompt = prompt;
|
||||
let cacheUsed = false;
|
||||
let cachedFiles: string[] = [];
|
||||
|
||||
const shouldCache = enableCache ?? endpoint.cacheStrategy.enabled;
|
||||
if (shouldCache) {
|
||||
const patterns = extractPatterns(prompt);
|
||||
if (patterns.length > 0) {
|
||||
if (onOutput) {
|
||||
onOutput({ type: 'stderr', data: `[Context cache: Found ${patterns.length} @patterns]\n` });
|
||||
}
|
||||
|
||||
// Pack files into cache
|
||||
const packResult = await contextCacheHandler({
|
||||
operation: 'pack',
|
||||
patterns,
|
||||
cwd: cwd || process.cwd(),
|
||||
include_dirs: includeDirs,
|
||||
ttl: endpoint.cacheStrategy.ttlMinutes * 60 * 1000,
|
||||
max_file_size: endpoint.cacheStrategy.maxSizeKB * 1024,
|
||||
});
|
||||
|
||||
if (packResult.success && packResult.result) {
|
||||
const pack = packResult.result as any;
|
||||
|
||||
if (onOutput) {
|
||||
onOutput({
|
||||
type: 'stderr',
|
||||
data: `[Context cache: Packed ${pack.files_packed} files, ${pack.total_bytes} bytes]\n`,
|
||||
});
|
||||
}
|
||||
|
||||
// Read cached content
|
||||
const readResult = await contextCacheHandler({
|
||||
operation: 'read',
|
||||
session_id: pack.session_id,
|
||||
limit: endpoint.cacheStrategy.maxSizeKB * 1024,
|
||||
});
|
||||
|
||||
if (readResult.success && readResult.result) {
|
||||
const read = readResult.result as any;
|
||||
// Prepend cached content to prompt
|
||||
finalPrompt = `${read.content}\n\n---\n\n${prompt}`;
|
||||
cacheUsed = true;
|
||||
cachedFiles = pack.files_packed ? Array(pack.files_packed).fill('...') : [];
|
||||
|
||||
if (onOutput) {
|
||||
onOutput({ type: 'stderr', data: `[Context cache: Applied to prompt]\n` });
|
||||
}
|
||||
}
|
||||
} else if (packResult.error) {
|
||||
if (onOutput) {
|
||||
onOutput({ type: 'stderr', data: `[Context cache warning: ${packResult.error}]\n` });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Call LiteLLM
|
||||
try {
|
||||
if (onOutput) {
|
||||
onOutput({
|
||||
type: 'stderr',
|
||||
data: `[LiteLLM: Calling ${provider.type}/${endpoint.model}]\n`,
|
||||
});
|
||||
}
|
||||
|
||||
const client = getLiteLLMClient({
|
||||
pythonPath: 'python',
|
||||
timeout: 120000, // 2 minutes
|
||||
});
|
||||
|
||||
// Configure provider credentials via environment
|
||||
// LiteLLM uses standard env vars like OPENAI_API_KEY, ANTHROPIC_API_KEY
|
||||
const envVarName = getProviderEnvVarName(provider.type);
|
||||
if (envVarName) {
|
||||
process.env[envVarName] = provider.resolvedApiKey;
|
||||
}
|
||||
|
||||
// Set base URL if custom
|
||||
if (provider.apiBase) {
|
||||
const baseUrlEnvVar = getProviderBaseUrlEnvVarName(provider.type);
|
||||
if (baseUrlEnvVar) {
|
||||
process.env[baseUrlEnvVar] = provider.apiBase;
|
||||
}
|
||||
}
|
||||
|
||||
// Use litellm-client to call chat
|
||||
const response = await client.chat(finalPrompt, endpoint.model);
|
||||
|
||||
if (onOutput) {
|
||||
onOutput({ type: 'stdout', data: response });
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: response,
|
||||
model: endpoint.model,
|
||||
provider: provider.type,
|
||||
cacheUsed,
|
||||
cachedFiles,
|
||||
};
|
||||
} catch (error) {
|
||||
const errorMsg = (error as Error).message;
|
||||
if (onOutput) {
|
||||
onOutput({ type: 'stderr', data: `[LiteLLM error: ${errorMsg}]\n` });
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
output: '',
|
||||
model: endpoint.model,
|
||||
provider: provider.type,
|
||||
cacheUsed,
|
||||
error: errorMsg,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get environment variable name for provider API key
|
||||
*/
|
||||
function getProviderEnvVarName(providerType: string): string | null {
|
||||
const envVarMap: Record<string, string> = {
|
||||
openai: 'OPENAI_API_KEY',
|
||||
anthropic: 'ANTHROPIC_API_KEY',
|
||||
google: 'GOOGLE_API_KEY',
|
||||
azure: 'AZURE_API_KEY',
|
||||
mistral: 'MISTRAL_API_KEY',
|
||||
deepseek: 'DEEPSEEK_API_KEY',
|
||||
};
|
||||
|
||||
return envVarMap[providerType] || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get environment variable name for provider base URL
|
||||
*/
|
||||
function getProviderBaseUrlEnvVarName(providerType: string): string | null {
|
||||
const envVarMap: Record<string, string> = {
|
||||
openai: 'OPENAI_API_BASE',
|
||||
anthropic: 'ANTHROPIC_API_BASE',
|
||||
azure: 'AZURE_API_BASE',
|
||||
};
|
||||
|
||||
return envVarMap[providerType] || null;
|
||||
}
|
||||
Reference in New Issue
Block a user