mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: Unified Embedding Pool with auto-discovery
Architecture refactoring for multi-provider rotation: Backend: - Add EmbeddingPoolConfig type with autoDiscover support - Implement discoverProvidersForModel() for auto-aggregation - Add GET/PUT /api/litellm-api/embedding-pool endpoints - Add GET /api/litellm-api/embedding-pool/discover/:model preview - Convert ccw-litellm status check to async with 5-min cache - Maintain backward compatibility with legacy rotation config Frontend: - Add "Embedding Pool" tab in API Settings - Auto-discover providers when target model selected - Show provider/key count with include/exclude controls - Increase sidebar width (280px → 320px) - Add sync result feedback on save Other: - Remove worker count limits (was max=32) - Add i18n translations (EN/CN) - Update .gitignore for .mcp.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,3 +29,4 @@ COMMAND_TEMPLATE_ORCHESTRATOR.md
|
||||
settings.json
|
||||
*.mcp.json
|
||||
.mcp.json
|
||||
.ace-tool/
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
* Manages provider credentials, custom endpoints, and cache settings
|
||||
*/
|
||||
|
||||
import { existsSync, readFileSync, writeFileSync } from 'fs';
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
||||
import { homedir } from 'os';
|
||||
import { join } from 'path';
|
||||
import { StoragePaths, GlobalPaths, ensureStorageDir } from './storage-paths.js';
|
||||
import type {
|
||||
@@ -15,6 +16,7 @@ import type {
|
||||
CacheStrategy,
|
||||
CodexLensEmbeddingRotation,
|
||||
CodexLensEmbeddingProvider,
|
||||
EmbeddingPoolConfig,
|
||||
} from '../types/litellm-api-config.js';
|
||||
|
||||
/**
|
||||
@@ -372,11 +374,12 @@ export function getCodexLensEmbeddingRotation(baseDir: string): CodexLensEmbeddi
|
||||
|
||||
/**
|
||||
* Update CodexLens embedding rotation config
|
||||
* Also triggers sync to CodexLens settings.json
|
||||
*/
|
||||
export function updateCodexLensEmbeddingRotation(
|
||||
baseDir: string,
|
||||
rotationConfig: CodexLensEmbeddingRotation | undefined
|
||||
): void {
|
||||
): { syncResult: { success: boolean; message: string; endpointCount?: number } } {
|
||||
const config = loadLiteLLMApiConfig(baseDir);
|
||||
|
||||
if (rotationConfig) {
|
||||
@@ -386,6 +389,10 @@ export function updateCodexLensEmbeddingRotation(
|
||||
}
|
||||
|
||||
saveConfig(baseDir, config);
|
||||
|
||||
// Auto-sync to CodexLens settings.json
|
||||
const syncResult = syncCodexLensConfig(baseDir);
|
||||
return { syncResult };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -474,6 +481,7 @@ export function getEmbeddingProvidersForRotation(baseDir: string): Array<{
|
||||
/**
|
||||
* Generate rotation endpoints for ccw_litellm
|
||||
* Creates endpoint list from rotation config for parallel embedding
|
||||
* Supports both legacy codexlensEmbeddingRotation and new embeddingPoolConfig
|
||||
*/
|
||||
export function generateRotationEndpoints(baseDir: string): Array<{
|
||||
name: string;
|
||||
@@ -484,12 +492,115 @@ export function generateRotationEndpoints(baseDir: string): Array<{
|
||||
max_concurrent: number;
|
||||
}> {
|
||||
const config = loadLiteLLMApiConfig(baseDir);
|
||||
|
||||
// Prefer embeddingPoolConfig, fallback to codexlensEmbeddingRotation for backward compatibility
|
||||
const poolConfig = config.embeddingPoolConfig;
|
||||
const rotationConfig = config.codexlensEmbeddingRotation;
|
||||
|
||||
if (!rotationConfig || !rotationConfig.enabled) {
|
||||
return [];
|
||||
// Check if new poolConfig is enabled
|
||||
if (poolConfig && poolConfig.enabled) {
|
||||
return generateEndpointsFromPool(baseDir, poolConfig, config);
|
||||
}
|
||||
|
||||
// Fallback to legacy rotation config
|
||||
if (rotationConfig && rotationConfig.enabled) {
|
||||
return generateEndpointsFromLegacyRotation(baseDir, rotationConfig, config);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate endpoints from new embeddingPoolConfig (with auto-discovery support)
|
||||
*/
|
||||
function generateEndpointsFromPool(
|
||||
baseDir: string,
|
||||
poolConfig: EmbeddingPoolConfig,
|
||||
config: LiteLLMApiConfig
|
||||
): Array<{
|
||||
name: string;
|
||||
api_key: string;
|
||||
api_base: string;
|
||||
model: string;
|
||||
weight: number;
|
||||
max_concurrent: number;
|
||||
}> {
|
||||
const endpoints: Array<{
|
||||
name: string;
|
||||
api_key: string;
|
||||
api_base: string;
|
||||
model: string;
|
||||
weight: number;
|
||||
max_concurrent: number;
|
||||
}> = [];
|
||||
|
||||
if (poolConfig.autoDiscover) {
|
||||
// Auto-discover all providers offering targetModel
|
||||
const discovered = discoverProvidersForModel(baseDir, poolConfig.targetModel);
|
||||
const excludedIds = new Set(poolConfig.excludedProviderIds || []);
|
||||
|
||||
for (const disc of discovered) {
|
||||
// Skip excluded providers
|
||||
if (excludedIds.has(disc.providerId)) continue;
|
||||
|
||||
// Find the provider config
|
||||
const provider = config.providers.find(p => p.id === disc.providerId);
|
||||
if (!provider || !provider.enabled) continue;
|
||||
|
||||
// Find the embedding model
|
||||
const embeddingModel = provider.embeddingModels?.find(m => m.id === disc.modelId);
|
||||
if (!embeddingModel || !embeddingModel.enabled) continue;
|
||||
|
||||
// Get API base (model-specific or provider default)
|
||||
const apiBase = embeddingModel.endpointSettings?.baseUrl ||
|
||||
provider.apiBase ||
|
||||
getDefaultApiBaseForType(provider.type);
|
||||
|
||||
// Get API keys to use
|
||||
let keysToUse: Array<{ id: string; key: string; label: string }> = [];
|
||||
|
||||
if (provider.apiKeys && provider.apiKeys.length > 0) {
|
||||
// Use all enabled keys
|
||||
keysToUse = provider.apiKeys
|
||||
.filter(k => k.enabled)
|
||||
.map(k => ({ id: k.id, key: k.key, label: k.label || k.id }));
|
||||
} else if (provider.apiKey) {
|
||||
// Single key fallback
|
||||
keysToUse = [{ id: 'default', key: provider.apiKey, label: 'Default' }];
|
||||
}
|
||||
|
||||
// Create endpoint for each key
|
||||
for (const keyInfo of keysToUse) {
|
||||
endpoints.push({
|
||||
name: `${provider.name}-${keyInfo.label}`,
|
||||
api_key: resolveEnvVar(keyInfo.key),
|
||||
api_base: apiBase,
|
||||
model: embeddingModel.name,
|
||||
weight: 1.0, // Default weight for auto-discovered providers
|
||||
max_concurrent: poolConfig.defaultMaxConcurrentPerKey,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return endpoints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate endpoints from legacy codexlensEmbeddingRotation config
|
||||
*/
|
||||
function generateEndpointsFromLegacyRotation(
|
||||
baseDir: string,
|
||||
rotationConfig: CodexLensEmbeddingRotation,
|
||||
config: LiteLLMApiConfig
|
||||
): Array<{
|
||||
name: string;
|
||||
api_key: string;
|
||||
api_base: string;
|
||||
model: string;
|
||||
weight: number;
|
||||
max_concurrent: number;
|
||||
}> {
|
||||
const endpoints: Array<{
|
||||
name: string;
|
||||
api_key: string;
|
||||
@@ -551,6 +662,191 @@ export function generateRotationEndpoints(baseDir: string): Array<{
|
||||
return endpoints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync CodexLens settings with CCW API config
|
||||
* Writes rotation endpoints to ~/.codexlens/settings.json
|
||||
* This enables the Python backend to use UI-configured rotation
|
||||
* Supports both new embeddingPoolConfig and legacy codexlensEmbeddingRotation
|
||||
*/
|
||||
export function syncCodexLensConfig(baseDir: string): { success: boolean; message: string; endpointCount?: number } {
|
||||
try {
|
||||
const config = loadLiteLLMApiConfig(baseDir);
|
||||
|
||||
// Prefer embeddingPoolConfig, fallback to codexlensEmbeddingRotation
|
||||
const poolConfig = config.embeddingPoolConfig;
|
||||
const rotationConfig = config.codexlensEmbeddingRotation;
|
||||
|
||||
// Get CodexLens settings path
|
||||
const codexlensDir = join(homedir(), '.codexlens');
|
||||
const settingsPath = join(codexlensDir, 'settings.json');
|
||||
|
||||
// Ensure directory exists
|
||||
if (!existsSync(codexlensDir)) {
|
||||
mkdirSync(codexlensDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Load existing settings or create new
|
||||
let settings: Record<string, unknown> = {};
|
||||
if (existsSync(settingsPath)) {
|
||||
try {
|
||||
settings = JSON.parse(readFileSync(settingsPath, 'utf-8'));
|
||||
} catch {
|
||||
settings = {};
|
||||
}
|
||||
}
|
||||
|
||||
// Check if either config is enabled
|
||||
const isPoolEnabled = poolConfig && poolConfig.enabled;
|
||||
const isRotationEnabled = rotationConfig && rotationConfig.enabled;
|
||||
|
||||
// If neither is enabled, remove rotation endpoints and return
|
||||
if (!isPoolEnabled && !isRotationEnabled) {
|
||||
if (settings.litellm_rotation_endpoints) {
|
||||
delete settings.litellm_rotation_endpoints;
|
||||
delete settings.litellm_rotation_strategy;
|
||||
delete settings.litellm_rotation_cooldown;
|
||||
delete settings.litellm_target_model;
|
||||
writeFileSync(settingsPath, JSON.stringify(settings, null, 2), 'utf-8');
|
||||
}
|
||||
return { success: true, message: 'Rotation disabled, cleared endpoints', endpointCount: 0 };
|
||||
}
|
||||
|
||||
// Generate rotation endpoints (function handles priority internally)
|
||||
const endpoints = generateRotationEndpoints(baseDir);
|
||||
|
||||
if (endpoints.length === 0) {
|
||||
return { success: false, message: 'No valid endpoints generated from rotation config' };
|
||||
}
|
||||
|
||||
// Update settings with rotation config (use poolConfig if available)
|
||||
settings.litellm_rotation_endpoints = endpoints;
|
||||
|
||||
if (isPoolEnabled) {
|
||||
settings.litellm_rotation_strategy = poolConfig!.strategy;
|
||||
settings.litellm_rotation_cooldown = poolConfig!.defaultCooldown;
|
||||
settings.litellm_target_model = poolConfig!.targetModel;
|
||||
} else {
|
||||
settings.litellm_rotation_strategy = rotationConfig!.strategy;
|
||||
settings.litellm_rotation_cooldown = rotationConfig!.defaultCooldown;
|
||||
settings.litellm_target_model = rotationConfig!.targetModel;
|
||||
}
|
||||
|
||||
// Write updated settings
|
||||
writeFileSync(settingsPath, JSON.stringify(settings, null, 2), 'utf-8');
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Synced ${endpoints.length} rotation endpoints to CodexLens`,
|
||||
endpointCount: endpoints.length,
|
||||
};
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
console.error('[LiteLLM Config] Failed to sync CodexLens config:', errorMessage);
|
||||
return { success: false, message: `Sync failed: ${errorMessage}` };
|
||||
}
|
||||
}
|
||||
|
||||
// ===========================
|
||||
// Embedding Pool Management (Generic, with Auto-Discovery)
|
||||
// ===========================
|
||||
|
||||
/**
|
||||
* Get embedding pool config
|
||||
*/
|
||||
export function getEmbeddingPoolConfig(baseDir: string): EmbeddingPoolConfig | undefined {
|
||||
const config = loadLiteLLMApiConfig(baseDir);
|
||||
return config.embeddingPoolConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update embedding pool config
|
||||
* Also triggers sync to CodexLens settings.json if enabled
|
||||
*/
|
||||
export function updateEmbeddingPoolConfig(
|
||||
baseDir: string,
|
||||
poolConfig: EmbeddingPoolConfig | undefined
|
||||
): { syncResult: { success: boolean; message: string; endpointCount?: number } } {
|
||||
const config = loadLiteLLMApiConfig(baseDir);
|
||||
|
||||
if (poolConfig) {
|
||||
config.embeddingPoolConfig = poolConfig;
|
||||
} else {
|
||||
delete config.embeddingPoolConfig;
|
||||
}
|
||||
|
||||
saveConfig(baseDir, config);
|
||||
|
||||
// Auto-sync to CodexLens settings.json
|
||||
const syncResult = syncCodexLensConfig(baseDir);
|
||||
return { syncResult };
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover all providers that offer a specific embedding model
|
||||
* Returns list of {providerId, providerName, modelId, modelName, apiKeys[]}
|
||||
*/
|
||||
export function discoverProvidersForModel(baseDir: string, targetModel: string): Array<{
|
||||
providerId: string;
|
||||
providerName: string;
|
||||
modelId: string;
|
||||
modelName: string;
|
||||
apiKeys: Array<{ keyId: string; keyLabel: string; enabled: boolean }>;
|
||||
}> {
|
||||
const config = loadLiteLLMApiConfig(baseDir);
|
||||
const result: Array<{
|
||||
providerId: string;
|
||||
providerName: string;
|
||||
modelId: string;
|
||||
modelName: string;
|
||||
apiKeys: Array<{ keyId: string; keyLabel: string; enabled: boolean }>;
|
||||
}> = [];
|
||||
|
||||
for (const provider of config.providers) {
|
||||
if (!provider.enabled) continue;
|
||||
|
||||
// Check if provider has embedding models matching targetModel
|
||||
const matchingModels = (provider.embeddingModels || []).filter(
|
||||
m => m.enabled && (m.id === targetModel || m.name === targetModel)
|
||||
);
|
||||
|
||||
if (matchingModels.length === 0) continue;
|
||||
|
||||
// Get API keys (single key or multiple from apiKeys array)
|
||||
const apiKeys: Array<{ keyId: string; keyLabel: string; enabled: boolean }> = [];
|
||||
|
||||
if (provider.apiKeys && provider.apiKeys.length > 0) {
|
||||
// Use multi-key configuration
|
||||
for (const keyEntry of provider.apiKeys) {
|
||||
apiKeys.push({
|
||||
keyId: keyEntry.id,
|
||||
keyLabel: keyEntry.label || keyEntry.id,
|
||||
enabled: keyEntry.enabled,
|
||||
});
|
||||
}
|
||||
} else if (provider.apiKey) {
|
||||
// Single key fallback
|
||||
apiKeys.push({
|
||||
keyId: 'default',
|
||||
keyLabel: 'Default Key',
|
||||
enabled: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Add each matching model
|
||||
for (const model of matchingModels) {
|
||||
result.push({
|
||||
providerId: provider.id,
|
||||
providerName: provider.name,
|
||||
modelId: model.id,
|
||||
modelName: model.name,
|
||||
apiKeys,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ===========================
|
||||
// YAML Config Generation for ccw_litellm
|
||||
// ===========================
|
||||
@@ -713,4 +1009,4 @@ function objectToYaml(obj: unknown, indent: number = 0): string {
|
||||
}
|
||||
|
||||
// Re-export types
|
||||
export type { ProviderCredential, CustomEndpoint, ProviderType, CacheStrategy, CodexLensEmbeddingRotation, CodexLensEmbeddingProvider };
|
||||
export type { ProviderCredential, CustomEndpoint, ProviderType, CacheStrategy, CodexLensEmbeddingRotation, CodexLensEmbeddingProvider, EmbeddingPoolConfig };
|
||||
|
||||
@@ -26,14 +26,36 @@ import {
|
||||
updateCodexLensEmbeddingRotation,
|
||||
getEmbeddingProvidersForRotation,
|
||||
generateRotationEndpoints,
|
||||
syncCodexLensConfig,
|
||||
getEmbeddingPoolConfig,
|
||||
updateEmbeddingPoolConfig,
|
||||
discoverProvidersForModel,
|
||||
type ProviderCredential,
|
||||
type CustomEndpoint,
|
||||
type ProviderType,
|
||||
type CodexLensEmbeddingRotation,
|
||||
type EmbeddingPoolConfig,
|
||||
} from '../../config/litellm-api-config-manager.js';
|
||||
import { getContextCacheStore } from '../../tools/context-cache-store.js';
|
||||
import { getLiteLLMClient } from '../../tools/litellm-client.js';
|
||||
|
||||
// Cache for ccw-litellm status check
|
||||
let ccwLitellmStatusCache: {
|
||||
data: { installed: boolean; version?: string; error?: string } | null;
|
||||
timestamp: number;
|
||||
ttl: number;
|
||||
} = {
|
||||
data: null,
|
||||
timestamp: 0,
|
||||
ttl: 5 * 60 * 1000, // 5 minutes
|
||||
};
|
||||
|
||||
// Clear cache (call after install)
|
||||
export function clearCcwLitellmStatusCache() {
|
||||
ccwLitellmStatusCache.data = null;
|
||||
ccwLitellmStatusCache.timestamp = 0;
|
||||
}
|
||||
|
||||
export interface RouteContext {
|
||||
pathname: string;
|
||||
url: URL;
|
||||
@@ -533,42 +555,56 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
|
||||
|
||||
// GET /api/litellm-api/ccw-litellm/status - Check ccw-litellm installation status
|
||||
if (pathname === '/api/litellm-api/ccw-litellm/status' && req.method === 'GET') {
|
||||
try {
|
||||
const { execSync } = await import('child_process');
|
||||
// Check cache first
|
||||
if (ccwLitellmStatusCache.data &&
|
||||
Date.now() - ccwLitellmStatusCache.timestamp < ccwLitellmStatusCache.ttl) {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(ccwLitellmStatusCache.data));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Async check
|
||||
try {
|
||||
const { exec } = await import('child_process');
|
||||
const { promisify } = await import('util');
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
// Try multiple Python executables
|
||||
const pythonExecutables = ['python', 'python3', 'py'];
|
||||
// Use single quotes inside Python code for Windows compatibility
|
||||
const pythonCode = "import ccw_litellm; print(getattr(ccw_litellm, '__version__', 'installed'))";
|
||||
|
||||
let installed = false;
|
||||
let version = '';
|
||||
let lastError = '';
|
||||
let result: { installed: boolean; version?: string; error?: string } = { installed: false };
|
||||
|
||||
for (const pythonExe of pythonExecutables) {
|
||||
try {
|
||||
const output = execSync(`${pythonExe} -c "${pythonCode}"`, {
|
||||
encoding: 'utf-8',
|
||||
timeout: 10000,
|
||||
const { stdout } = await execAsync(`${pythonExe} -c "${pythonCode}"`, {
|
||||
timeout: 5000,
|
||||
windowsHide: true
|
||||
});
|
||||
version = output.trim();
|
||||
const version = stdout.trim();
|
||||
if (version) {
|
||||
installed = true;
|
||||
result = { installed: true, version };
|
||||
console.log(`[ccw-litellm status] Found with ${pythonExe}: ${version}`);
|
||||
break;
|
||||
}
|
||||
} catch (err) {
|
||||
lastError = (err as Error).message;
|
||||
console.log(`[ccw-litellm status] ${pythonExe} failed:`, lastError.substring(0, 100));
|
||||
result.error = (err as Error).message;
|
||||
console.log(`[ccw-litellm status] ${pythonExe} failed:`, result.error.substring(0, 100));
|
||||
}
|
||||
}
|
||||
|
||||
// Update cache
|
||||
ccwLitellmStatusCache = {
|
||||
data: result,
|
||||
timestamp: Date.now(),
|
||||
ttl: 5 * 60 * 1000,
|
||||
};
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(installed ? { installed: true, version } : { installed: false, error: lastError }));
|
||||
res.end(JSON.stringify(result));
|
||||
} catch (err) {
|
||||
const errorResult = { installed: false, error: (err as Error).message };
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ installed: false, error: (err as Error).message }));
|
||||
res.end(JSON.stringify(errorResult));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -601,14 +637,14 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
|
||||
const rotationConfig = body as CodexLensEmbeddingRotation | null;
|
||||
|
||||
try {
|
||||
updateCodexLensEmbeddingRotation(initialPath, rotationConfig || undefined);
|
||||
const { syncResult } = updateCodexLensEmbeddingRotation(initialPath, rotationConfig || undefined);
|
||||
|
||||
broadcastToClients({
|
||||
type: 'CODEXLENS_ROTATION_UPDATED',
|
||||
payload: { rotationConfig, timestamp: new Date().toISOString() }
|
||||
payload: { rotationConfig, syncResult, timestamp: new Date().toISOString() }
|
||||
});
|
||||
|
||||
return { success: true, rotationConfig };
|
||||
return { success: true, rotationConfig, syncResult };
|
||||
} catch (err) {
|
||||
return { error: (err as Error).message, status: 500 };
|
||||
}
|
||||
@@ -633,6 +669,116 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
|
||||
return true;
|
||||
}
|
||||
|
||||
// POST /api/litellm-api/codexlens/rotation/sync - Manually sync rotation config to CodexLens
|
||||
if (pathname === '/api/litellm-api/codexlens/rotation/sync' && req.method === 'POST') {
|
||||
try {
|
||||
const syncResult = syncCodexLensConfig(initialPath);
|
||||
|
||||
if (syncResult.success) {
|
||||
broadcastToClients({
|
||||
type: 'CODEXLENS_CONFIG_SYNCED',
|
||||
payload: { ...syncResult, timestamp: new Date().toISOString() }
|
||||
});
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(syncResult));
|
||||
} catch (err) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ success: false, message: (err as Error).message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ===========================
|
||||
// Embedding Pool Routes (New Generic API)
|
||||
// ===========================
|
||||
|
||||
// GET /api/litellm-api/embedding-pool - Get pool config and available models
|
||||
if (pathname === '/api/litellm-api/embedding-pool' && req.method === 'GET') {
|
||||
try {
|
||||
const poolConfig = getEmbeddingPoolConfig(initialPath);
|
||||
|
||||
// Get list of all available embedding models from all providers
|
||||
const config = loadLiteLLMApiConfig(initialPath);
|
||||
const availableModels: Array<{ modelId: string; modelName: string; providers: string[] }> = [];
|
||||
const modelMap = new Map<string, { modelId: string; modelName: string; providers: string[] }>();
|
||||
|
||||
for (const provider of config.providers) {
|
||||
if (!provider.enabled || !provider.embeddingModels) continue;
|
||||
|
||||
for (const model of provider.embeddingModels) {
|
||||
if (!model.enabled) continue;
|
||||
|
||||
const key = model.id;
|
||||
if (modelMap.has(key)) {
|
||||
modelMap.get(key)!.providers.push(provider.name);
|
||||
} else {
|
||||
modelMap.set(key, {
|
||||
modelId: model.id,
|
||||
modelName: model.name,
|
||||
providers: [provider.name],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
availableModels.push(...Array.from(modelMap.values()));
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
poolConfig: poolConfig || null,
|
||||
availableModels,
|
||||
}));
|
||||
} catch (err) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ error: (err as Error).message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// PUT /api/litellm-api/embedding-pool - Update pool config
|
||||
if (pathname === '/api/litellm-api/embedding-pool' && req.method === 'PUT') {
|
||||
handlePostRequest(req, res, async (body: unknown) => {
|
||||
const poolConfig = body as EmbeddingPoolConfig | null;
|
||||
|
||||
try {
|
||||
const { syncResult } = updateEmbeddingPoolConfig(initialPath, poolConfig || undefined);
|
||||
|
||||
broadcastToClients({
|
||||
type: 'EMBEDDING_POOL_UPDATED',
|
||||
payload: { poolConfig, syncResult, timestamp: new Date().toISOString() }
|
||||
});
|
||||
|
||||
return { success: true, poolConfig, syncResult };
|
||||
} catch (err) {
|
||||
return { error: (err as Error).message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
// GET /api/litellm-api/embedding-pool/discover/:model - Preview auto-discovery results
|
||||
const discoverMatch = pathname.match(/^\/api\/litellm-api\/embedding-pool\/discover\/([^/]+)$/);
|
||||
if (discoverMatch && req.method === 'GET') {
|
||||
const targetModel = decodeURIComponent(discoverMatch[1]);
|
||||
|
||||
try {
|
||||
const discovered = discoverProvidersForModel(initialPath, targetModel);
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
targetModel,
|
||||
discovered,
|
||||
count: discovered.length,
|
||||
}));
|
||||
} catch (err) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ error: (err as Error).message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// POST /api/litellm-api/ccw-litellm/install - Install ccw-litellm package
|
||||
if (pathname === '/api/litellm-api/ccw-litellm/install' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async () => {
|
||||
@@ -667,6 +813,8 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
|
||||
proc.stderr?.on('data', (data) => { error += data.toString(); });
|
||||
proc.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
// Clear status cache after successful installation
|
||||
clearCcwLitellmStatusCache();
|
||||
resolve({ success: true, message: 'ccw-litellm installed from PyPI' });
|
||||
} else {
|
||||
resolve({ success: false, error: error || 'Installation failed' });
|
||||
@@ -685,6 +833,9 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
|
||||
proc.stderr?.on('data', (data) => { error += data.toString(); });
|
||||
proc.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
// Clear status cache after successful installation
|
||||
clearCcwLitellmStatusCache();
|
||||
|
||||
// Broadcast installation event
|
||||
broadcastToClients({
|
||||
type: 'CCW_LITELLM_INSTALLED',
|
||||
|
||||
@@ -958,8 +958,8 @@ select.cli-input {
|
||||
|
||||
/* Left Sidebar */
|
||||
.api-settings-sidebar {
|
||||
width: 280px;
|
||||
min-width: 240px;
|
||||
width: 320px;
|
||||
min-width: 280px;
|
||||
border-right: 1px solid hsl(var(--border));
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
|
||||
@@ -19,6 +19,7 @@ const i18n = {
|
||||
'common.delete': 'Delete',
|
||||
'common.cancel': 'Cancel',
|
||||
'common.save': 'Save',
|
||||
'common.include': 'Include',
|
||||
'common.close': 'Close',
|
||||
'common.loading': 'Loading...',
|
||||
'common.error': 'Error',
|
||||
@@ -28,6 +29,8 @@ const i18n = {
|
||||
'common.retry': 'Retry',
|
||||
'common.refresh': 'Refresh',
|
||||
'common.minutes': 'minutes',
|
||||
'common.enabled': 'Enabled',
|
||||
'common.disabled': 'Disabled',
|
||||
|
||||
// Header
|
||||
'header.project': 'Project:',
|
||||
@@ -267,7 +270,7 @@ const i18n = {
|
||||
'codexlens.embeddingModel': 'Embedding Model',
|
||||
'codexlens.modelHint': 'Select embedding model for vector search (models with ✓ are installed)',
|
||||
'codexlens.concurrency': 'API Concurrency',
|
||||
'codexlens.concurrencyHint': 'Number of parallel API calls (1-32). Higher values speed up indexing but may hit rate limits.',
|
||||
'codexlens.concurrencyHint': 'Number of parallel API calls. Higher values speed up indexing but may hit rate limits.',
|
||||
'codexlens.concurrencyCustom': 'Custom',
|
||||
'codexlens.rotation': 'Multi-Provider Rotation',
|
||||
'codexlens.rotationDesc': 'Aggregate multiple API providers and keys for parallel embedding generation',
|
||||
@@ -289,6 +292,8 @@ const i18n = {
|
||||
'codexlens.selectKeys': 'Select Keys',
|
||||
'codexlens.configureRotation': 'Configure Rotation',
|
||||
'codexlens.rotationSaved': 'Rotation config saved successfully',
|
||||
'codexlens.endpointsSynced': 'endpoints synced to CodexLens',
|
||||
'codexlens.syncFailed': 'Sync failed',
|
||||
'codexlens.rotationDeleted': 'Rotation config deleted',
|
||||
'codexlens.totalEndpoints': 'Total Endpoints',
|
||||
'codexlens.fullIndex': 'Full',
|
||||
@@ -312,6 +317,9 @@ const i18n = {
|
||||
'codexlens.runSearch': 'Run Search',
|
||||
'codexlens.results': 'Results',
|
||||
'codexlens.resultsCount': 'results',
|
||||
'codexlens.resultLimit': 'Limit',
|
||||
'codexlens.contentLength': 'Content Length',
|
||||
'codexlens.extraFiles': 'Extra Files',
|
||||
'codexlens.saveConfig': 'Save Configuration',
|
||||
'codexlens.searching': 'Searching...',
|
||||
'codexlens.searchCompleted': 'Search completed',
|
||||
@@ -1470,6 +1478,20 @@ const i18n = {
|
||||
'apiSettings.endpointDeleted': 'Endpoint deleted successfully',
|
||||
'apiSettings.cacheCleared': 'Cache cleared successfully',
|
||||
'apiSettings.cacheSettingsUpdated': 'Cache settings updated',
|
||||
'apiSettings.embeddingPool': 'Embedding Pool',
|
||||
'apiSettings.embeddingPoolDesc': 'Auto-rotate between providers with same model',
|
||||
'apiSettings.targetModel': 'Target Model',
|
||||
'apiSettings.discoveredProviders': 'Discovered Providers',
|
||||
'apiSettings.autoDiscover': 'Auto-discover providers',
|
||||
'apiSettings.excludeProvider': 'Exclude',
|
||||
'apiSettings.defaultCooldown': 'Cooldown (seconds)',
|
||||
'apiSettings.defaultConcurrent': 'Concurrent per key',
|
||||
'apiSettings.poolEnabled': 'Enable Embedding Pool',
|
||||
'apiSettings.noProvidersFound': 'No providers found for this model',
|
||||
'apiSettings.poolSaved': 'Embedding pool config saved',
|
||||
'apiSettings.strategy': 'Strategy',
|
||||
'apiSettings.providerKeys': 'keys',
|
||||
'apiSettings.selectTargetModel': 'Select target model',
|
||||
'apiSettings.confirmDeleteProvider': 'Are you sure you want to delete this provider?',
|
||||
'apiSettings.confirmDeleteEndpoint': 'Are you sure you want to delete this endpoint?',
|
||||
'apiSettings.confirmClearCache': 'Are you sure you want to clear the cache?',
|
||||
@@ -1703,6 +1725,7 @@ const i18n = {
|
||||
'common.delete': '删除',
|
||||
'common.cancel': '取消',
|
||||
'common.save': '保存',
|
||||
'common.include': '包含',
|
||||
'common.close': '关闭',
|
||||
'common.loading': '加载中...',
|
||||
'common.error': '错误',
|
||||
@@ -1712,6 +1735,8 @@ const i18n = {
|
||||
'common.retry': '重试',
|
||||
'common.refresh': '刷新',
|
||||
'common.minutes': '分钟',
|
||||
'common.enabled': '已启用',
|
||||
'common.disabled': '已禁用',
|
||||
|
||||
// Header
|
||||
'header.project': '项目:',
|
||||
@@ -1951,7 +1976,7 @@ const i18n = {
|
||||
'codexlens.embeddingModel': '嵌入模型',
|
||||
'codexlens.modelHint': '选择向量搜索的嵌入模型(带 ✓ 的已安装)',
|
||||
'codexlens.concurrency': 'API 并发数',
|
||||
'codexlens.concurrencyHint': '并行 API 调用数量(1-32)。较高的值可加速索引但可能触发速率限制。',
|
||||
'codexlens.concurrencyHint': '并行 API 调用数量。较高的值可加速索引但可能触发速率限制。',
|
||||
'codexlens.concurrencyCustom': '自定义',
|
||||
'codexlens.rotation': '多供应商轮训',
|
||||
'codexlens.rotationDesc': '聚合多个 API 供应商和密钥进行并行嵌入生成',
|
||||
@@ -1973,6 +1998,8 @@ const i18n = {
|
||||
'codexlens.selectKeys': '选择密钥',
|
||||
'codexlens.configureRotation': '配置轮训',
|
||||
'codexlens.rotationSaved': '轮训配置保存成功',
|
||||
'codexlens.endpointsSynced': '个端点已同步到 CodexLens',
|
||||
'codexlens.syncFailed': '同步失败',
|
||||
'codexlens.rotationDeleted': '轮训配置已删除',
|
||||
'codexlens.totalEndpoints': '总端点数',
|
||||
'codexlens.fullIndex': '全部',
|
||||
@@ -1996,6 +2023,9 @@ const i18n = {
|
||||
'codexlens.runSearch': '运行搜索',
|
||||
'codexlens.results': '结果',
|
||||
'codexlens.resultsCount': '个结果',
|
||||
'codexlens.resultLimit': '数量限制',
|
||||
'codexlens.contentLength': '内容长度',
|
||||
'codexlens.extraFiles': '额外文件',
|
||||
'codexlens.saveConfig': '保存配置',
|
||||
'codexlens.searching': '搜索中...',
|
||||
'codexlens.searchCompleted': '搜索完成',
|
||||
@@ -3163,6 +3193,20 @@ const i18n = {
|
||||
'apiSettings.endpointDeleted': '端点删除成功',
|
||||
'apiSettings.cacheCleared': '缓存清除成功',
|
||||
'apiSettings.cacheSettingsUpdated': '缓存设置已更新',
|
||||
'apiSettings.embeddingPool': '高可用嵌入',
|
||||
'apiSettings.embeddingPoolDesc': '自动轮训相同模型的供应商',
|
||||
'apiSettings.targetModel': '目标模型',
|
||||
'apiSettings.discoveredProviders': '发现的供应商',
|
||||
'apiSettings.autoDiscover': '自动发现供应商',
|
||||
'apiSettings.excludeProvider': '排除',
|
||||
'apiSettings.defaultCooldown': '冷却时间(秒)',
|
||||
'apiSettings.defaultConcurrent': '每密钥并发数',
|
||||
'apiSettings.poolEnabled': '启用嵌入池',
|
||||
'apiSettings.noProvidersFound': '未找到提供此模型的供应商',
|
||||
'apiSettings.poolSaved': '嵌入池配置已保存',
|
||||
'apiSettings.strategy': '策略',
|
||||
'apiSettings.providerKeys': '密钥',
|
||||
'apiSettings.selectTargetModel': '选择目标模型',
|
||||
'apiSettings.confirmDeleteProvider': '确定要删除此提供商吗?',
|
||||
'apiSettings.confirmDeleteEndpoint': '确定要删除此端点吗?',
|
||||
'apiSettings.confirmClearCache': '确定要清除缓存吗?',
|
||||
|
||||
@@ -11,7 +11,12 @@ let selectedProviderId = null;
|
||||
let providerSearchQuery = '';
|
||||
let activeModelTab = 'llm';
|
||||
let expandedModelGroups = new Set();
|
||||
let activeSidebarTab = 'providers'; // 'providers' | 'endpoints' | 'cache'
|
||||
let activeSidebarTab = 'providers'; // 'providers' | 'endpoints' | 'cache' | 'embedding-pool'
|
||||
|
||||
// Embedding Pool state
|
||||
let embeddingPoolConfig = null;
|
||||
let embeddingPoolAvailableModels = [];
|
||||
let embeddingPoolDiscoveredProviders = [];
|
||||
|
||||
// ========== Data Loading ==========
|
||||
|
||||
@@ -61,6 +66,112 @@ async function loadCacheStats() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load embedding pool configuration and available models
|
||||
*/
|
||||
async function loadEmbeddingPoolConfig() {
|
||||
try {
|
||||
const response = await fetch('/api/litellm-api/embedding-pool');
|
||||
if (!response.ok) throw new Error('Failed to load embedding pool config');
|
||||
const data = await response.json();
|
||||
embeddingPoolConfig = data.poolConfig;
|
||||
embeddingPoolAvailableModels = data.availableModels || [];
|
||||
|
||||
// If pool is enabled and has a target model, discover providers
|
||||
if (embeddingPoolConfig && embeddingPoolConfig.enabled && embeddingPoolConfig.targetModel) {
|
||||
await discoverProvidersForTargetModel(embeddingPoolConfig.targetModel);
|
||||
}
|
||||
|
||||
return data;
|
||||
} catch (err) {
|
||||
console.error('Failed to load embedding pool config:', err);
|
||||
showRefreshToast(t('common.error') + ': ' + err.message, 'error');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover providers for a specific target model
|
||||
*/
|
||||
async function discoverProvidersForTargetModel(targetModel) {
|
||||
try {
|
||||
const response = await fetch('/api/litellm-api/embedding-pool/discover/' + encodeURIComponent(targetModel));
|
||||
if (!response.ok) throw new Error('Failed to discover providers');
|
||||
const data = await response.json();
|
||||
embeddingPoolDiscoveredProviders = data.discovered || [];
|
||||
return data;
|
||||
} catch (err) {
|
||||
console.error('Failed to discover providers:', err);
|
||||
embeddingPoolDiscoveredProviders = [];
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save embedding pool configuration
|
||||
*/
|
||||
async function saveEmbeddingPoolConfig() {
|
||||
try {
|
||||
const enabled = document.getElementById('embedding-pool-enabled')?.checked || false;
|
||||
const targetModel = document.getElementById('embedding-pool-target-model')?.value || '';
|
||||
const strategy = document.getElementById('embedding-pool-strategy')?.value || 'round_robin';
|
||||
const defaultCooldown = parseInt(document.getElementById('embedding-pool-cooldown')?.value || '60');
|
||||
const defaultMaxConcurrentPerKey = parseInt(document.getElementById('embedding-pool-concurrent')?.value || '4');
|
||||
|
||||
const poolConfig = enabled ? {
|
||||
enabled: true,
|
||||
targetModel: targetModel,
|
||||
strategy: strategy,
|
||||
autoDiscover: true,
|
||||
excludedProviderIds: embeddingPoolConfig?.excludedProviderIds || [],
|
||||
defaultCooldown: defaultCooldown,
|
||||
defaultMaxConcurrentPerKey: defaultMaxConcurrentPerKey
|
||||
} : null;
|
||||
|
||||
const response = await fetch('/api/litellm-api/embedding-pool', {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(poolConfig)
|
||||
});
|
||||
|
||||
if (!response.ok) throw new Error('Failed to save embedding pool config');
|
||||
|
||||
const result = await response.json();
|
||||
embeddingPoolConfig = result.poolConfig;
|
||||
|
||||
const syncCount = result.syncResult?.syncedEndpoints?.length || 0;
|
||||
showRefreshToast(t('apiSettings.poolSaved') + (syncCount > 0 ? ' (' + syncCount + ' endpoints synced)' : ''), 'success');
|
||||
|
||||
// Reload the embedding pool section
|
||||
await renderEmbeddingPoolMainPanel();
|
||||
|
||||
} catch (err) {
|
||||
console.error('Failed to save embedding pool config:', err);
|
||||
showRefreshToast(t('common.error') + ': ' + err.message, 'error');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle provider exclusion in embedding pool
|
||||
*/
|
||||
async function toggleProviderExclusion(providerId) {
|
||||
if (!embeddingPoolConfig) return;
|
||||
|
||||
const excludedIds = embeddingPoolConfig.excludedProviderIds || [];
|
||||
const index = excludedIds.indexOf(providerId);
|
||||
|
||||
if (index > -1) {
|
||||
excludedIds.splice(index, 1);
|
||||
} else {
|
||||
excludedIds.push(providerId);
|
||||
}
|
||||
|
||||
embeddingPoolConfig.excludedProviderIds = excludedIds;
|
||||
|
||||
// Re-render the discovered providers section
|
||||
renderDiscoveredProviders();
|
||||
}
|
||||
|
||||
// ========== Provider Management ==========
|
||||
|
||||
/**
|
||||
@@ -825,6 +936,9 @@ async function renderApiSettings() {
|
||||
'<button class="sidebar-tab' + (activeSidebarTab === 'endpoints' ? ' active' : '') + '" onclick="switchSidebarTab(\'endpoints\')">' +
|
||||
'<i data-lucide="link"></i> ' + t('apiSettings.endpoints') +
|
||||
'</button>' +
|
||||
'<button class="sidebar-tab' + (activeSidebarTab === 'embedding-pool' ? ' active' : '') + '" onclick="switchSidebarTab(\'embedding-pool\')">' +
|
||||
'<i data-lucide="repeat"></i> ' + t('apiSettings.embeddingPool') +
|
||||
'</button>' +
|
||||
'<button class="sidebar-tab' + (activeSidebarTab === 'cache' ? ' active' : '') + '" onclick="switchSidebarTab(\'cache\')">' +
|
||||
'<i data-lucide="database"></i> ' + t('apiSettings.cache') +
|
||||
'</button>' +
|
||||
@@ -833,7 +947,7 @@ async function renderApiSettings() {
|
||||
// Build sidebar content based on active tab
|
||||
var sidebarContentHtml = '';
|
||||
var addButtonHtml = '';
|
||||
|
||||
|
||||
if (activeSidebarTab === 'providers') {
|
||||
sidebarContentHtml = '<div class="provider-search">' +
|
||||
'<i data-lucide="search" class="search-icon"></i>' +
|
||||
@@ -848,6 +962,10 @@ async function renderApiSettings() {
|
||||
addButtonHtml = '<button class="btn btn-primary btn-full" onclick="showAddEndpointModal()">' +
|
||||
'<i data-lucide="plus"></i> ' + t('apiSettings.addEndpoint') +
|
||||
'</button>';
|
||||
} else if (activeSidebarTab === 'embedding-pool') {
|
||||
sidebarContentHtml = '<div class="embedding-pool-sidebar-info" style="padding: 1rem; color: var(--text-secondary); font-size: 0.875rem;">' +
|
||||
'<p>' + t('apiSettings.embeddingPoolDesc') + '</p>' +
|
||||
'</div>';
|
||||
} else if (activeSidebarTab === 'cache') {
|
||||
sidebarContentHtml = '<div class="cache-sidebar-info" style="padding: 1rem; color: var(--text-secondary); font-size: 0.875rem;">' +
|
||||
'<p>' + t('apiSettings.cacheTabHint') + '</p>' +
|
||||
@@ -887,6 +1005,8 @@ async function renderApiSettings() {
|
||||
} else if (activeSidebarTab === 'endpoints') {
|
||||
renderEndpointsList();
|
||||
renderEndpointsMainPanel();
|
||||
} else if (activeSidebarTab === 'embedding-pool') {
|
||||
renderEmbeddingPoolMainPanel();
|
||||
} else if (activeSidebarTab === 'cache') {
|
||||
renderCacheMainPanel();
|
||||
}
|
||||
@@ -2367,6 +2487,174 @@ function generateKeyId() {
|
||||
return 'key-' + Date.now() + '-' + Math.random().toString(36).substr(2, 9);
|
||||
}
|
||||
|
||||
// ========== Embedding Pool Management ==========
|
||||
|
||||
/**
|
||||
* Render embedding pool main panel
|
||||
*/
|
||||
async function renderEmbeddingPoolMainPanel() {
|
||||
var container = document.getElementById('provider-detail-panel');
|
||||
if (!container) return;
|
||||
|
||||
// Load embedding pool config if not already loaded
|
||||
if (!embeddingPoolConfig) {
|
||||
await loadEmbeddingPoolConfig();
|
||||
}
|
||||
|
||||
const enabled = embeddingPoolConfig?.enabled || false;
|
||||
const targetModel = embeddingPoolConfig?.targetModel || '';
|
||||
const strategy = embeddingPoolConfig?.strategy || 'round_robin';
|
||||
const defaultCooldown = embeddingPoolConfig?.defaultCooldown || 60;
|
||||
const defaultMaxConcurrentPerKey = embeddingPoolConfig?.defaultMaxConcurrentPerKey || 4;
|
||||
|
||||
// Build model dropdown options
|
||||
let modelOptionsHtml = '<option value="">' + t('apiSettings.selectTargetModel') + '</option>';
|
||||
embeddingPoolAvailableModels.forEach(function(model) {
|
||||
const providerCount = model.providers.length;
|
||||
const selected = model.modelId === targetModel ? ' selected' : '';
|
||||
modelOptionsHtml += '<option value="' + model.modelId + '"' + selected + '>' +
|
||||
model.modelName + ' (' + providerCount + ' providers)' +
|
||||
'</option>';
|
||||
});
|
||||
|
||||
var html = '<div class="embedding-pool-main-panel">' +
|
||||
'<div class="panel-header">' +
|
||||
'<h2><i data-lucide="repeat"></i> ' + t('apiSettings.embeddingPool') + '</h2>' +
|
||||
'<p class="panel-subtitle">' + t('apiSettings.embeddingPoolDesc') + '</p>' +
|
||||
'</div>' +
|
||||
|
||||
// Enable/Disable Toggle
|
||||
'<div class="settings-section">' +
|
||||
'<div class="section-header">' +
|
||||
'<h3>' + t('apiSettings.poolEnabled') + '</h3>' +
|
||||
'<label class="toggle-switch">' +
|
||||
'<input type="checkbox" id="embedding-pool-enabled" ' + (enabled ? 'checked' : '') + ' onchange="onEmbeddingPoolEnabledChange(this.checked)" />' +
|
||||
'<span class="toggle-track"><span class="toggle-thumb"></span></span>' +
|
||||
'</label>' +
|
||||
'</div>' +
|
||||
'</div>' +
|
||||
|
||||
// Configuration Form
|
||||
'<div class="settings-section" id="embedding-pool-config" style="' + (enabled ? '' : 'display: none;') + '">' +
|
||||
'<div class="form-group">' +
|
||||
'<label for="embedding-pool-target-model">' + t('apiSettings.targetModel') + '</label>' +
|
||||
'<select id="embedding-pool-target-model" class="cli-input" onchange="onTargetModelChange(this.value)">' +
|
||||
modelOptionsHtml +
|
||||
'</select>' +
|
||||
'</div>' +
|
||||
|
||||
'<div class="form-group">' +
|
||||
'<label for="embedding-pool-strategy">' + t('apiSettings.strategy') + '</label>' +
|
||||
'<select id="embedding-pool-strategy" class="cli-input">' +
|
||||
'<option value="round_robin"' + (strategy === 'round_robin' ? ' selected' : '') + '>Round Robin</option>' +
|
||||
'<option value="latency_aware"' + (strategy === 'latency_aware' ? ' selected' : '') + '>Latency Aware</option>' +
|
||||
'<option value="weighted_random"' + (strategy === 'weighted_random' ? ' selected' : '') + '>Weighted Random</option>' +
|
||||
'</select>' +
|
||||
'</div>' +
|
||||
|
||||
'<div class="form-group">' +
|
||||
'<label for="embedding-pool-cooldown">' + t('apiSettings.defaultCooldown') + '</label>' +
|
||||
'<input type="number" id="embedding-pool-cooldown" class="cli-input" value="' + defaultCooldown + '" min="1" />' +
|
||||
'</div>' +
|
||||
|
||||
'<div class="form-group">' +
|
||||
'<label for="embedding-pool-concurrent">' + t('apiSettings.defaultConcurrent') + '</label>' +
|
||||
'<input type="number" id="embedding-pool-concurrent" class="cli-input" value="' + defaultMaxConcurrentPerKey + '" min="1" />' +
|
||||
'</div>' +
|
||||
|
||||
// Discovered Providers Section
|
||||
'<div id="discovered-providers-section"></div>' +
|
||||
|
||||
'<div class="form-actions">' +
|
||||
'<button class="btn btn-primary" onclick="saveEmbeddingPoolConfig()">' +
|
||||
'<i data-lucide="save"></i> ' + t('common.save') +
|
||||
'</button>' +
|
||||
'</div>' +
|
||||
'</div>' +
|
||||
'</div>';
|
||||
|
||||
container.innerHTML = html;
|
||||
if (window.lucide) lucide.createIcons();
|
||||
|
||||
// Render discovered providers if we have a target model
|
||||
if (enabled && targetModel) {
|
||||
renderDiscoveredProviders();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle embedding pool enabled/disabled toggle
|
||||
*/
|
||||
function onEmbeddingPoolEnabledChange(enabled) {
|
||||
const configSection = document.getElementById('embedding-pool-config');
|
||||
if (configSection) {
|
||||
configSection.style.display = enabled ? '' : 'none';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle target model selection change
|
||||
*/
|
||||
async function onTargetModelChange(modelId) {
|
||||
if (!modelId) {
|
||||
embeddingPoolDiscoveredProviders = [];
|
||||
renderDiscoveredProviders();
|
||||
return;
|
||||
}
|
||||
|
||||
// Discover providers for this model
|
||||
await discoverProvidersForTargetModel(modelId);
|
||||
renderDiscoveredProviders();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render discovered providers list
|
||||
*/
|
||||
function renderDiscoveredProviders() {
|
||||
const container = document.getElementById('discovered-providers-section');
|
||||
if (!container) return;
|
||||
|
||||
if (embeddingPoolDiscoveredProviders.length === 0) {
|
||||
container.innerHTML = '<div class="info-message" style="margin-top: 1rem;">' +
|
||||
'<i data-lucide="info"></i> ' + t('apiSettings.noProvidersFound') +
|
||||
'</div>';
|
||||
if (window.lucide) lucide.createIcons();
|
||||
return;
|
||||
}
|
||||
|
||||
const excludedIds = embeddingPoolConfig?.excludedProviderIds || [];
|
||||
let totalProviders = 0;
|
||||
let totalKeys = 0;
|
||||
|
||||
embeddingPoolDiscoveredProviders.forEach(function(p) {
|
||||
totalProviders++;
|
||||
totalKeys += p.keyCount || 1;
|
||||
});
|
||||
|
||||
let providersHtml = '<div class="discovered-providers-box" style="margin-top: 1rem; padding: 1rem; background: var(--bg-secondary); border-radius: 8px;">' +
|
||||
'<h4>' + t('apiSettings.discoveredProviders') + ' (' + totalProviders + ' providers, ' + totalKeys + ' ' + t('apiSettings.providerKeys') + ')</h4>' +
|
||||
'<div class="providers-list" style="margin-top: 0.75rem;">';
|
||||
|
||||
embeddingPoolDiscoveredProviders.forEach(function(provider) {
|
||||
const isExcluded = excludedIds.indexOf(provider.providerId) > -1;
|
||||
const icon = isExcluded ? 'x-circle' : 'check-circle';
|
||||
const statusClass = isExcluded ? 'text-error' : 'text-success';
|
||||
const keyInfo = provider.keyCount > 1 ? ' (' + provider.keyCount + ' ' + t('apiSettings.providerKeys') + ')' : '';
|
||||
|
||||
providersHtml += '<div class="provider-item" style="display: flex; align-items: center; gap: 0.75rem; padding: 0.5rem; border-bottom: 1px solid var(--border-color);">' +
|
||||
'<i data-lucide="' + icon + '" class="' + statusClass + '"></i>' +
|
||||
'<span style="flex: 1;">' + provider.providerName + keyInfo + '</span>' +
|
||||
'<button class="btn btn-sm ' + (isExcluded ? 'btn-secondary' : 'btn-outline') + '" onclick="toggleProviderExclusion(\'' + provider.providerId + '\')">' +
|
||||
(isExcluded ? t('common.include') : t('apiSettings.excludeProvider')) +
|
||||
'</button>' +
|
||||
'</div>';
|
||||
});
|
||||
|
||||
providersHtml += '</div></div>';
|
||||
container.innerHTML = providersHtml;
|
||||
if (window.lucide) lucide.createIcons();
|
||||
}
|
||||
|
||||
/**
|
||||
* Render API keys section
|
||||
*/
|
||||
|
||||
@@ -271,6 +271,9 @@ function initCodexLensConfigEvents(currentConfig) {
|
||||
var searchType = document.getElementById('searchTypeSelect').value;
|
||||
var searchMode = document.getElementById('searchModeSelect').value;
|
||||
var query = document.getElementById('searchQueryInput').value.trim();
|
||||
var searchLimit = document.getElementById('searchLimitInput')?.value || '5';
|
||||
var contentLength = document.getElementById('contentLengthInput')?.value || '200';
|
||||
var extraFiles = document.getElementById('extraFilesInput')?.value || '10';
|
||||
var resultsDiv = document.getElementById('searchResults');
|
||||
var resultCount = document.getElementById('searchResultCount');
|
||||
var resultContent = document.getElementById('searchResultContent');
|
||||
@@ -286,7 +289,12 @@ function initCodexLensConfigEvents(currentConfig) {
|
||||
|
||||
try {
|
||||
var endpoint = '/api/codexlens/' + searchType;
|
||||
var params = new URLSearchParams({ query: query, limit: '20' });
|
||||
var params = new URLSearchParams({
|
||||
query: query,
|
||||
limit: searchLimit,
|
||||
max_content_length: contentLength,
|
||||
extra_files_count: extraFiles
|
||||
});
|
||||
// Add mode parameter for search and search_files (not for symbol search)
|
||||
if (searchType === 'search' || searchType === 'search_files') {
|
||||
params.append('mode', searchMode);
|
||||
@@ -2001,7 +2009,7 @@ function buildCodexLensManagerPage(config) {
|
||||
'<div id="concurrencySelector" class="hidden">' +
|
||||
'<label class="block text-sm font-medium mb-1.5">' + t('codexlens.concurrency') + '</label>' +
|
||||
'<div class="flex items-center gap-2">' +
|
||||
'<input type="number" id="pageConcurrencyInput" min="1" max="32" value="4" ' +
|
||||
'<input type="number" id="pageConcurrencyInput" min="1" value="4" ' +
|
||||
'class="w-24 px-3 py-2 border border-border rounded-lg bg-background text-sm" ' +
|
||||
'onchange="validateConcurrencyInput(this)" />' +
|
||||
'<span class="text-sm text-muted-foreground">workers</span>' +
|
||||
@@ -2173,6 +2181,20 @@ function buildCodexLensManagerPage(config) {
|
||||
'<option value="vector">' + t('codexlens.vectorMode') + '</option>' +
|
||||
'</select>' +
|
||||
'</div>' +
|
||||
'<div class="flex gap-3 items-center">' +
|
||||
'<div class="flex items-center gap-2">' +
|
||||
'<label class="text-xs text-muted-foreground whitespace-nowrap">' + t('codexlens.resultLimit') + '</label>' +
|
||||
'<input type="number" id="searchLimitInput" class="w-16 px-2 py-1.5 border border-border rounded-lg bg-background text-sm text-center" value="5" min="1" max="50" />' +
|
||||
'</div>' +
|
||||
'<div class="flex items-center gap-2">' +
|
||||
'<label class="text-xs text-muted-foreground whitespace-nowrap">' + t('codexlens.contentLength') + '</label>' +
|
||||
'<input type="number" id="contentLengthInput" class="w-20 px-2 py-1.5 border border-border rounded-lg bg-background text-sm text-center" value="200" min="50" max="2000" />' +
|
||||
'</div>' +
|
||||
'<div class="flex items-center gap-2">' +
|
||||
'<label class="text-xs text-muted-foreground whitespace-nowrap">' + t('codexlens.extraFiles') + '</label>' +
|
||||
'<input type="number" id="extraFilesInput" class="w-16 px-2 py-1.5 border border-border rounded-lg bg-background text-sm text-center" value="10" min="0" max="50" />' +
|
||||
'</div>' +
|
||||
'</div>' +
|
||||
'<div class="flex gap-3">' +
|
||||
'<input type="text" id="searchQueryInput" class="flex-1 px-3 py-2 border border-border rounded-lg bg-background text-sm" placeholder="' + t('codexlens.searchPlaceholder') + '" />' +
|
||||
'<button class="btn-sm btn-primary" id="runSearchBtn"><i data-lucide="search" class="w-3.5 h-3.5"></i> ' + t('codexlens.runSearch') + '</button>' +
|
||||
@@ -2228,14 +2250,12 @@ function buildModelSelectOptionsForPage() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate concurrency input value (1-32)
|
||||
* Validate concurrency input value (min 1, no max limit)
|
||||
*/
|
||||
function validateConcurrencyInput(input) {
|
||||
var value = parseInt(input.value, 10);
|
||||
if (isNaN(value) || value < 1) {
|
||||
input.value = 1;
|
||||
} else if (value > 32) {
|
||||
input.value = 32;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2338,7 +2358,7 @@ function initCodexLensIndexFromPage(indexType) {
|
||||
var concurrencyInput = document.getElementById('pageConcurrencyInput');
|
||||
var selectedBackend = backendSelect ? backendSelect.value : 'fastembed';
|
||||
var selectedModel = modelSelect ? modelSelect.value : 'code';
|
||||
var selectedConcurrency = concurrencyInput ? Math.min(32, Math.max(1, parseInt(concurrencyInput.value, 10) || 4)) : 4;
|
||||
var selectedConcurrency = concurrencyInput ? Math.max(1, parseInt(concurrencyInput.value, 10) || 4) : 4;
|
||||
|
||||
// For FTS-only index, model is not needed
|
||||
if (indexType === 'normal') {
|
||||
@@ -2879,7 +2899,16 @@ async function saveRotationConfig() {
|
||||
var result = await response.json();
|
||||
|
||||
if (result.success) {
|
||||
showRefreshToast(t('codexlens.rotationSaved'), 'success');
|
||||
// Show sync result in toast
|
||||
var syncMsg = '';
|
||||
if (result.syncResult) {
|
||||
if (result.syncResult.success) {
|
||||
syncMsg = ' (' + result.syncResult.endpointCount + ' ' + t('codexlens.endpointsSynced') + ')';
|
||||
} else {
|
||||
syncMsg = ' (' + t('codexlens.syncFailed') + ': ' + result.syncResult.message + ')';
|
||||
}
|
||||
}
|
||||
showRefreshToast(t('codexlens.rotationSaved') + syncMsg, 'success');
|
||||
window.rotationConfig = rotationConfig;
|
||||
updateRotationStatusDisplay(rotationConfig);
|
||||
closeRotationModal();
|
||||
|
||||
@@ -347,6 +347,33 @@ export interface CodexLensEmbeddingRotation {
|
||||
providers: CodexLensEmbeddingProvider[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic embedding pool configuration (refactored from CodexLensEmbeddingRotation)
|
||||
* Supports automatic discovery of all providers offering a specific model
|
||||
*/
|
||||
export interface EmbeddingPoolConfig {
|
||||
/** Whether embedding pool is enabled */
|
||||
enabled: boolean;
|
||||
|
||||
/** Target embedding model name (e.g., "text-embedding-3-small") */
|
||||
targetModel: string;
|
||||
|
||||
/** Selection strategy: round_robin, latency_aware, weighted_random */
|
||||
strategy: 'round_robin' | 'latency_aware' | 'weighted_random';
|
||||
|
||||
/** Whether to automatically discover all providers offering targetModel */
|
||||
autoDiscover: boolean;
|
||||
|
||||
/** Provider IDs to exclude from auto-discovery (optional) */
|
||||
excludedProviderIds?: string[];
|
||||
|
||||
/** Default cooldown seconds for rate-limited endpoints (default: 60) */
|
||||
defaultCooldown: number;
|
||||
|
||||
/** Default maximum concurrent requests per key (default: 4) */
|
||||
defaultMaxConcurrentPerKey: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete LiteLLM API configuration
|
||||
* Root configuration object stored in JSON file
|
||||
@@ -367,6 +394,9 @@ export interface LiteLLMApiConfig {
|
||||
/** Global cache settings */
|
||||
globalCacheSettings: GlobalCacheSettings;
|
||||
|
||||
/** CodexLens multi-provider embedding rotation config */
|
||||
/** CodexLens multi-provider embedding rotation config (deprecated, use embeddingPoolConfig) */
|
||||
codexlensEmbeddingRotation?: CodexLensEmbeddingRotation;
|
||||
|
||||
/** Generic embedding pool configuration with auto-discovery support */
|
||||
embeddingPoolConfig?: EmbeddingPoolConfig;
|
||||
}
|
||||
|
||||
@@ -103,12 +103,12 @@ def init(
|
||||
"-l",
|
||||
help="Limit indexing to specific languages (repeat or comma-separated).",
|
||||
),
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=32, help="Parallel worker processes (default: auto-detect based on CPU count, max 32)."),
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."),
|
||||
force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
|
||||
no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
|
||||
embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
|
||||
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
|
||||
max_workers: int = typer.Option(1, "--max-workers", min=1, max=32, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
|
||||
max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
@@ -478,6 +478,7 @@ def search(
|
||||
"path": r.path,
|
||||
"score": r.score,
|
||||
"excerpt": r.excerpt,
|
||||
"content": r.content, # Full function/class body
|
||||
"source": getattr(r, "search_source", None),
|
||||
"symbol": getattr(r, "symbol", None),
|
||||
}
|
||||
@@ -1852,7 +1853,6 @@ def embeddings_generate(
|
||||
"--max-workers",
|
||||
"-w",
|
||||
min=1,
|
||||
max=32,
|
||||
help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
|
||||
),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
|
||||
@@ -331,7 +331,7 @@ def generate_embeddings(
|
||||
if max_workers is None:
|
||||
if embedding_backend == "litellm":
|
||||
if endpoint_count > 1:
|
||||
max_workers = min(endpoint_count * 2, 32) # Cap at 32 workers
|
||||
max_workers = endpoint_count * 2 # No cap, scale with endpoints
|
||||
else:
|
||||
max_workers = 4
|
||||
else:
|
||||
@@ -806,7 +806,7 @@ def generate_embeddings_recursive(
|
||||
if max_workers is None:
|
||||
if embedding_backend == "litellm":
|
||||
if endpoint_count > 1:
|
||||
max_workers = min(endpoint_count * 2, 32)
|
||||
max_workers = endpoint_count * 2 # No cap, scale with endpoints
|
||||
else:
|
||||
max_workers = 4
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user