diff --git a/ccw/src/config/litellm-api-config-manager.ts b/ccw/src/config/litellm-api-config-manager.ts index 23a02bb6..998c1290 100644 --- a/ccw/src/config/litellm-api-config-manager.ts +++ b/ccw/src/config/litellm-api-config-manager.ts @@ -13,6 +13,8 @@ import type { GlobalCacheSettings, ProviderType, CacheStrategy, + CodexLensEmbeddingRotation, + CodexLensEmbeddingProvider, } from '../types/litellm-api-config.js'; /** @@ -356,6 +358,199 @@ export function updateGlobalCacheSettings( saveConfig(baseDir, config); } +// =========================== +// CodexLens Embedding Rotation Management +// =========================== + +/** + * Get CodexLens embedding rotation config + */ +export function getCodexLensEmbeddingRotation(baseDir: string): CodexLensEmbeddingRotation | undefined { + const config = loadLiteLLMApiConfig(baseDir); + return config.codexlensEmbeddingRotation; +} + +/** + * Update CodexLens embedding rotation config + */ +export function updateCodexLensEmbeddingRotation( + baseDir: string, + rotationConfig: CodexLensEmbeddingRotation | undefined +): void { + const config = loadLiteLLMApiConfig(baseDir); + + if (rotationConfig) { + config.codexlensEmbeddingRotation = rotationConfig; + } else { + delete config.codexlensEmbeddingRotation; + } + + saveConfig(baseDir, config); +} + +/** + * Get all enabled embedding providers with their API keys for rotation + * This aggregates all providers that have embedding models configured + */ +export function getEmbeddingProvidersForRotation(baseDir: string): Array<{ + providerId: string; + providerName: string; + apiBase: string; + embeddingModels: Array<{ + modelId: string; + modelName: string; + dimensions: number; + }>; + apiKeys: Array<{ + keyId: string; + keyLabel: string; + enabled: boolean; + }>; +}> { + const config = loadLiteLLMApiConfig(baseDir); + const result: Array<{ + providerId: string; + providerName: string; + apiBase: string; + embeddingModels: Array<{ + modelId: string; + modelName: string; + dimensions: number; + }>; + apiKeys: Array<{ + keyId: string; + keyLabel: string; + enabled: boolean; + }>; + }> = []; + + for (const provider of config.providers) { + if (!provider.enabled) continue; + + // Check if provider has embedding models + const embeddingModels = (provider.embeddingModels || []) + .filter(m => m.enabled) + .map(m => ({ + modelId: m.id, + modelName: m.name, + dimensions: m.capabilities?.embeddingDimension || 1536, + })); + + if (embeddingModels.length === 0) continue; + + // Get API keys (single key or multiple from apiKeys array) + const apiKeys: Array<{ keyId: string; keyLabel: string; enabled: boolean }> = []; + + if (provider.apiKeys && provider.apiKeys.length > 0) { + // Use multi-key configuration + for (const keyEntry of provider.apiKeys) { + apiKeys.push({ + keyId: keyEntry.id, + keyLabel: keyEntry.label || keyEntry.id, + enabled: keyEntry.enabled, + }); + } + } else if (provider.apiKey) { + // Single key fallback + apiKeys.push({ + keyId: 'default', + keyLabel: 'Default Key', + enabled: true, + }); + } + + result.push({ + providerId: provider.id, + providerName: provider.name, + apiBase: provider.apiBase || getDefaultApiBaseForType(provider.type), + embeddingModels, + apiKeys, + }); + } + + return result; +} + +/** + * Generate rotation endpoints for ccw_litellm + * Creates endpoint list from rotation config for parallel embedding + */ +export function generateRotationEndpoints(baseDir: string): Array<{ + name: string; + api_key: string; + api_base: string; + model: string; + weight: number; + max_concurrent: number; +}> { + const config = loadLiteLLMApiConfig(baseDir); + const rotationConfig = config.codexlensEmbeddingRotation; + + if (!rotationConfig || !rotationConfig.enabled) { + return []; + } + + const endpoints: Array<{ + name: string; + api_key: string; + api_base: string; + model: string; + weight: number; + max_concurrent: number; + }> = []; + + for (const rotationProvider of rotationConfig.providers) { + if (!rotationProvider.enabled) continue; + + // Find the provider config + const provider = config.providers.find(p => p.id === rotationProvider.providerId); + if (!provider || !provider.enabled) continue; + + // Find the embedding model + const embeddingModel = provider.embeddingModels?.find(m => m.id === rotationProvider.modelId); + if (!embeddingModel || !embeddingModel.enabled) continue; + + // Get API base (model-specific or provider default) + const apiBase = embeddingModel.endpointSettings?.baseUrl || + provider.apiBase || + getDefaultApiBaseForType(provider.type); + + // Get API keys to use + let keysToUse: Array<{ id: string; key: string; label: string }> = []; + + if (provider.apiKeys && provider.apiKeys.length > 0) { + if (rotationProvider.useAllKeys) { + // Use all enabled keys + keysToUse = provider.apiKeys + .filter(k => k.enabled) + .map(k => ({ id: k.id, key: k.key, label: k.label || k.id })); + } else if (rotationProvider.selectedKeyIds && rotationProvider.selectedKeyIds.length > 0) { + // Use only selected keys + keysToUse = provider.apiKeys + .filter(k => k.enabled && rotationProvider.selectedKeyIds!.includes(k.id)) + .map(k => ({ id: k.id, key: k.key, label: k.label || k.id })); + } + } else if (provider.apiKey) { + // Single key fallback + keysToUse = [{ id: 'default', key: provider.apiKey, label: 'Default' }]; + } + + // Create endpoint for each key + for (const keyInfo of keysToUse) { + endpoints.push({ + name: `${provider.name}-${keyInfo.label}`, + api_key: resolveEnvVar(keyInfo.key), + api_base: apiBase, + model: embeddingModel.name, + weight: rotationProvider.weight, + max_concurrent: rotationProvider.maxConcurrentPerKey, + }); + } + } + + return endpoints; +} + // =========================== // YAML Config Generation for ccw_litellm // =========================== @@ -518,4 +713,4 @@ function objectToYaml(obj: unknown, indent: number = 0): string { } // Re-export types -export type { ProviderCredential, CustomEndpoint, ProviderType, CacheStrategy }; +export type { ProviderCredential, CustomEndpoint, ProviderType, CacheStrategy, CodexLensEmbeddingRotation, CodexLensEmbeddingProvider }; diff --git a/ccw/src/core/routes/litellm-api-routes.ts b/ccw/src/core/routes/litellm-api-routes.ts index 9ce331c3..95d82622 100644 --- a/ccw/src/core/routes/litellm-api-routes.ts +++ b/ccw/src/core/routes/litellm-api-routes.ts @@ -22,9 +22,14 @@ import { loadLiteLLMApiConfig, saveLiteLLMYamlConfig, generateLiteLLMYamlConfig, + getCodexLensEmbeddingRotation, + updateCodexLensEmbeddingRotation, + getEmbeddingProvidersForRotation, + generateRotationEndpoints, type ProviderCredential, type CustomEndpoint, type ProviderType, + type CodexLensEmbeddingRotation, } from '../../config/litellm-api-config-manager.js'; import { getContextCacheStore } from '../../tools/context-cache-store.js'; import { getLiteLLMClient } from '../../tools/litellm-client.js'; @@ -568,6 +573,66 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise { + const rotationConfig = body as CodexLensEmbeddingRotation | null; + + try { + updateCodexLensEmbeddingRotation(initialPath, rotationConfig || undefined); + + broadcastToClients({ + type: 'CODEXLENS_ROTATION_UPDATED', + payload: { rotationConfig, timestamp: new Date().toISOString() } + }); + + return { success: true, rotationConfig }; + } catch (err) { + return { error: (err as Error).message, status: 500 }; + } + }); + return true; + } + + // GET /api/litellm-api/codexlens/rotation/endpoints - Get generated rotation endpoints + if (pathname === '/api/litellm-api/codexlens/rotation/endpoints' && req.method === 'GET') { + try { + const endpoints = generateRotationEndpoints(initialPath); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + endpoints, + count: endpoints.length, + })); + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: (err as Error).message })); + } + return true; + } + // POST /api/litellm-api/ccw-litellm/install - Install ccw-litellm package if (pathname === '/api/litellm-api/ccw-litellm/install' && req.method === 'POST') { handlePostRequest(req, res, async () => { diff --git a/ccw/src/templates/dashboard-js/i18n.js b/ccw/src/templates/dashboard-js/i18n.js index 63fe998e..620785cb 100644 --- a/ccw/src/templates/dashboard-js/i18n.js +++ b/ccw/src/templates/dashboard-js/i18n.js @@ -269,6 +269,28 @@ const i18n = { 'codexlens.concurrency': 'API Concurrency', 'codexlens.concurrencyHint': 'Number of parallel API calls (1-32). Higher values speed up indexing but may hit rate limits.', 'codexlens.concurrencyCustom': 'Custom', + 'codexlens.rotation': 'Multi-Provider Rotation', + 'codexlens.rotationDesc': 'Aggregate multiple API providers and keys for parallel embedding generation', + 'codexlens.rotationEnabled': 'Enable Rotation', + 'codexlens.rotationStrategy': 'Rotation Strategy', + 'codexlens.strategyRoundRobin': 'Round Robin', + 'codexlens.strategyLatencyAware': 'Latency Aware', + 'codexlens.strategyWeightedRandom': 'Weighted Random', + 'codexlens.targetModel': 'Target Model', + 'codexlens.targetModelHint': 'Model name that all providers should support (e.g., qwen3-embedding)', + 'codexlens.cooldownSeconds': 'Cooldown (seconds)', + 'codexlens.cooldownHint': 'Default cooldown after rate limit (60s recommended)', + 'codexlens.rotationProviders': 'Rotation Providers', + 'codexlens.addProvider': 'Add Provider', + 'codexlens.noRotationProviders': 'No providers configured for rotation', + 'codexlens.providerWeight': 'Weight', + 'codexlens.maxConcurrentPerKey': 'Max Concurrent/Key', + 'codexlens.useAllKeys': 'Use All Keys', + 'codexlens.selectKeys': 'Select Keys', + 'codexlens.configureRotation': 'Configure Rotation', + 'codexlens.rotationSaved': 'Rotation config saved successfully', + 'codexlens.rotationDeleted': 'Rotation config deleted', + 'codexlens.totalEndpoints': 'Total Endpoints', 'codexlens.fullIndex': 'Full', 'codexlens.vectorIndex': 'Vector', 'codexlens.ftsIndex': 'FTS', @@ -1931,6 +1953,28 @@ const i18n = { 'codexlens.concurrency': 'API 并发数', 'codexlens.concurrencyHint': '并行 API 调用数量(1-32)。较高的值可加速索引但可能触发速率限制。', 'codexlens.concurrencyCustom': '自定义', + 'codexlens.rotation': '多供应商轮训', + 'codexlens.rotationDesc': '聚合多个 API 供应商和密钥进行并行嵌入生成', + 'codexlens.rotationEnabled': '启用轮训', + 'codexlens.rotationStrategy': '轮训策略', + 'codexlens.strategyRoundRobin': '轮询', + 'codexlens.strategyLatencyAware': '延迟感知', + 'codexlens.strategyWeightedRandom': '加权随机', + 'codexlens.targetModel': '目标模型', + 'codexlens.targetModelHint': '所有供应商应支持的模型名称(例如 qwen3-embedding)', + 'codexlens.cooldownSeconds': '冷却时间(秒)', + 'codexlens.cooldownHint': '速率限制后的默认冷却时间(推荐 60 秒)', + 'codexlens.rotationProviders': '轮训供应商', + 'codexlens.addProvider': '添加供应商', + 'codexlens.noRotationProviders': '未配置轮训供应商', + 'codexlens.providerWeight': '权重', + 'codexlens.maxConcurrentPerKey': '每密钥最大并发', + 'codexlens.useAllKeys': '使用所有密钥', + 'codexlens.selectKeys': '选择密钥', + 'codexlens.configureRotation': '配置轮训', + 'codexlens.rotationSaved': '轮训配置保存成功', + 'codexlens.rotationDeleted': '轮训配置已删除', + 'codexlens.totalEndpoints': '总端点数', 'codexlens.fullIndex': '全部', 'codexlens.vectorIndex': '向量', 'codexlens.ftsIndex': 'FTS', diff --git a/ccw/src/templates/dashboard-js/views/codexlens-manager.js b/ccw/src/templates/dashboard-js/views/codexlens-manager.js index 6456a12f..6af46da6 100644 --- a/ccw/src/templates/dashboard-js/views/codexlens-manager.js +++ b/ccw/src/templates/dashboard-js/views/codexlens-manager.js @@ -2009,6 +2009,28 @@ function buildCodexLensManagerPage(config) { '' + '

' + t('codexlens.concurrencyHint') + '

' + '' + + // Multi-Provider Rotation (only for LiteLLM backend) + '' + // Index buttons - two modes: full (FTS + Vector) or FTS only '
' + '' + + '
' + + '' + + '' + + '' + + ''; +} + +/** + * Initialize rotation config modal events + */ +function initRotationConfigEvents(rotationConfig, availableProviders) { + // Store in window for save function + window._rotationAvailableProviders = availableProviders; +} + +/** + * Close the rotation config modal + */ +function closeRotationModal() { + var modal = document.getElementById('rotationConfigModal'); + if (modal) modal.remove(); +} + +/** + * Save the rotation configuration + */ +async function saveRotationConfig() { + try { + var enabledToggle = document.getElementById('rotationEnabledToggle'); + var strategySelect = document.getElementById('rotationStrategy'); + var cooldownInput = document.getElementById('rotationCooldown'); + var targetModelInput = document.getElementById('rotationTargetModel'); + + var enabled = enabledToggle ? enabledToggle.checked : false; + var strategy = strategySelect ? strategySelect.value : 'round_robin'; + var cooldown = cooldownInput ? parseInt(cooldownInput.value, 10) : 60; + var targetModel = targetModelInput ? targetModelInput.value.trim() : 'qwen3-embedding'; + + // Collect provider configurations + var providers = []; + var providerToggles = document.querySelectorAll('.rotation-provider-toggle'); + providerToggles.forEach(function(toggle) { + var providerId = toggle.getAttribute('data-provider-id'); + var isEnabled = toggle.checked; + + var modelSelect = document.querySelector('.rotation-model-select[data-provider-id="' + providerId + '"]'); + var weightInput = document.querySelector('.rotation-weight-input[data-provider-id="' + providerId + '"]'); + var concurrentInput = document.querySelector('.rotation-concurrent-input[data-provider-id="' + providerId + '"]'); + var useAllKeysToggle = document.querySelector('.rotation-use-all-keys[data-provider-id="' + providerId + '"]'); + + providers.push({ + providerId: providerId, + modelId: modelSelect ? modelSelect.value : '', + weight: weightInput ? parseFloat(weightInput.value) || 1 : 1, + maxConcurrentPerKey: concurrentInput ? parseInt(concurrentInput.value, 10) || 4 : 4, + useAllKeys: useAllKeysToggle ? useAllKeysToggle.checked : true, + enabled: isEnabled + }); + }); + + var rotationConfig = { + enabled: enabled, + strategy: strategy, + defaultCooldown: cooldown, + targetModel: targetModel, + providers: providers + }; + + var response = await fetch('/api/litellm-api/codexlens/rotation', { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(rotationConfig) + }); + + var result = await response.json(); + + if (result.success) { + showRefreshToast(t('codexlens.rotationSaved'), 'success'); + window.rotationConfig = rotationConfig; + updateRotationStatusDisplay(rotationConfig); + closeRotationModal(); + } else { + showRefreshToast(t('common.saveFailed') + ': ' + result.error, 'error'); + } + } catch (err) { + showRefreshToast(t('common.error') + ': ' + err.message, 'error'); + } +} diff --git a/ccw/src/types/litellm-api-config.ts b/ccw/src/types/litellm-api-config.ts index 022b9109..8c5c1d2f 100644 --- a/ccw/src/types/litellm-api-config.ts +++ b/ccw/src/types/litellm-api-config.ts @@ -299,6 +299,54 @@ export interface GlobalCacheSettings { maxTotalSizeMB: number; } +/** + * CodexLens embedding provider selection for rotation + * Aggregates provider + model + all API keys + */ +export interface CodexLensEmbeddingProvider { + /** Reference to provider credential ID */ + providerId: string; + + /** Embedding model ID from the provider */ + modelId: string; + + /** Whether to use all API keys from this provider (default: true) */ + useAllKeys: boolean; + + /** Specific API key IDs to use (if useAllKeys is false) */ + selectedKeyIds?: string[]; + + /** Weight for weighted routing (default: 1.0, applies to all keys from this provider) */ + weight: number; + + /** Maximum concurrent requests per key (default: 4) */ + maxConcurrentPerKey: number; + + /** Whether this provider is enabled for rotation */ + enabled: boolean; +} + +/** + * CodexLens multi-provider embedding rotation configuration + * Aggregates multiple providers with same model for parallel rotation + */ +export interface CodexLensEmbeddingRotation { + /** Whether multi-provider rotation is enabled */ + enabled: boolean; + + /** Selection strategy: round_robin, latency_aware, weighted_random */ + strategy: 'round_robin' | 'latency_aware' | 'weighted_random'; + + /** Default cooldown seconds for rate-limited endpoints (default: 60) */ + defaultCooldown: number; + + /** Target model name that all providers should support (e.g., "qwen3-embedding") */ + targetModel: string; + + /** List of providers to aggregate for rotation */ + providers: CodexLensEmbeddingProvider[]; +} + /** * Complete LiteLLM API configuration * Root configuration object stored in JSON file @@ -318,4 +366,7 @@ export interface LiteLLMApiConfig { /** Global cache settings */ globalCacheSettings: GlobalCacheSettings; + + /** CodexLens multi-provider embedding rotation config */ + codexlensEmbeddingRotation?: CodexLensEmbeddingRotation; } diff --git a/ccw/tsconfig.tsbuildinfo b/ccw/tsconfig.tsbuildinfo new file mode 100644 index 00000000..5743c890 --- /dev/null +++ b/ccw/tsconfig.tsbuildinfo @@ -0,0 +1 @@ +{"root":["./src/cli.ts","./src/index.ts","./src/commands/cli.ts","./src/commands/core-memory.ts","./src/commands/hook.ts","./src/commands/install.ts","./src/commands/list.ts","./src/commands/memory.ts","./src/commands/serve.ts","./src/commands/session-path-resolver.ts","./src/commands/session.ts","./src/commands/stop.ts","./src/commands/tool.ts","./src/commands/uninstall.ts","./src/commands/upgrade.ts","./src/commands/view.ts","./src/config/litellm-api-config-manager.ts","./src/config/provider-models.ts","./src/config/storage-paths.ts","./src/core/cache-manager.ts","./src/core/claude-freshness.ts","./src/core/core-memory-store.ts","./src/core/dashboard-generator-patch.ts","./src/core/dashboard-generator.ts","./src/core/data-aggregator.ts","./src/core/history-importer.ts","./src/core/lite-scanner-complete.ts","./src/core/lite-scanner.ts","./src/core/manifest.ts","./src/core/memory-embedder-bridge.ts","./src/core/memory-store.ts","./src/core/server.ts","./src/core/session-clustering-service.ts","./src/core/session-scanner.ts","./src/core/websocket.ts","./src/core/routes/ccw-routes.ts","./src/core/routes/claude-routes.ts","./src/core/routes/cli-routes.ts","./src/core/routes/codexlens-routes.ts","./src/core/routes/core-memory-routes.ts","./src/core/routes/files-routes.ts","./src/core/routes/graph-routes.ts","./src/core/routes/help-routes.ts","./src/core/routes/hooks-routes.ts","./src/core/routes/litellm-api-routes.ts","./src/core/routes/litellm-routes.ts","./src/core/routes/mcp-routes.ts","./src/core/routes/mcp-templates-db.ts","./src/core/routes/memory-routes.ts","./src/core/routes/rules-routes.ts","./src/core/routes/session-routes.ts","./src/core/routes/skills-routes.ts","./src/core/routes/status-routes.ts","./src/core/routes/system-routes.ts","./src/mcp-server/index.ts","./src/tools/classify-folders.ts","./src/tools/claude-cli-tools.ts","./src/tools/cli-config-manager.ts","./src/tools/cli-executor.ts","./src/tools/cli-history-store.ts","./src/tools/codex-lens.ts","./src/tools/context-cache-store.ts","./src/tools/context-cache.ts","./src/tools/convert-tokens-to-css.ts","./src/tools/core-memory.ts","./src/tools/detect-changed-modules.ts","./src/tools/discover-design-files.ts","./src/tools/edit-file.ts","./src/tools/generate-module-docs.ts","./src/tools/get-modules-by-depth.ts","./src/tools/index.ts","./src/tools/litellm-client.ts","./src/tools/litellm-executor.ts","./src/tools/native-session-discovery.ts","./src/tools/notifier.ts","./src/tools/pattern-parser.ts","./src/tools/read-file.ts","./src/tools/resume-strategy.ts","./src/tools/session-content-parser.ts","./src/tools/session-manager.ts","./src/tools/smart-context.ts","./src/tools/smart-search.ts","./src/tools/storage-manager.ts","./src/tools/ui-generate-preview.js","./src/tools/ui-instantiate-prototypes.js","./src/tools/update-module-claude.js","./src/tools/write-file.ts","./src/types/config.ts","./src/types/index.ts","./src/types/litellm-api-config.ts","./src/types/session.ts","./src/types/tool.ts","./src/utils/browser-launcher.ts","./src/utils/file-utils.ts","./src/utils/path-resolver.ts","./src/utils/path-validator.ts","./src/utils/ui.ts"],"version":"5.9.3"} \ No newline at end of file diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index c4574e38..240c6245 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -103,12 +103,12 @@ def init( "-l", help="Limit indexing to specific languages (repeat or comma-separated).", ), - workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=16, help="Parallel worker processes (default: auto-detect based on CPU count, max 16)."), + workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=32, help="Parallel worker processes (default: auto-detect based on CPU count, max 32)."), force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."), no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."), embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."), embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."), - max_workers: int = typer.Option(1, "--max-workers", min=1, max=16, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."), + max_workers: int = typer.Option(1, "--max-workers", min=1, max=32, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."), json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), ) -> None: @@ -351,7 +351,7 @@ def search( Use 'codexlens embeddings-generate' to create embeddings first. Hybrid Mode: - Default weights: exact=0.4, fuzzy=0.3, vector=0.3 + Default weights: exact=0.3, fuzzy=0.1, vector=0.6 Use --weights to customize (e.g., --weights 0.5,0.3,0.2) Examples: @@ -1852,7 +1852,7 @@ def embeddings_generate( "--max-workers", "-w", min=1, - max=16, + max=32, help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).", ), json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py index be05f683..cb4d23c4 100644 --- a/codex-lens/src/codexlens/cli/embedding_manager.py +++ b/codex-lens/src/codexlens/cli/embedding_manager.py @@ -331,7 +331,7 @@ def generate_embeddings( if max_workers is None: if embedding_backend == "litellm": if endpoint_count > 1: - max_workers = min(endpoint_count * 2, 16) # Cap at 16 workers + max_workers = min(endpoint_count * 2, 32) # Cap at 32 workers else: max_workers = 4 else: @@ -806,7 +806,7 @@ def generate_embeddings_recursive( if max_workers is None: if embedding_backend == "litellm": if endpoint_count > 1: - max_workers = min(endpoint_count * 2, 16) + max_workers = min(endpoint_count * 2, 32) else: max_workers = 4 else: diff --git a/codex-lens/src/codexlens/search/hybrid_search.py b/codex-lens/src/codexlens/search/hybrid_search.py index fab8e753..8c8c225b 100644 --- a/codex-lens/src/codexlens/search/hybrid_search.py +++ b/codex-lens/src/codexlens/search/hybrid_search.py @@ -27,11 +27,11 @@ class HybridSearchEngine: default_weights: Default RRF weights for each source """ - # Default RRF weights (exact: 40%, fuzzy: 30%, vector: 30%) + # Default RRF weights (vector: 60%, exact: 30%, fuzzy: 10%) DEFAULT_WEIGHTS = { - "exact": 0.4, - "fuzzy": 0.3, - "vector": 0.3, + "exact": 0.3, + "fuzzy": 0.1, + "vector": 0.6, } def __init__(self, weights: Optional[Dict[str, float]] = None): diff --git a/codex-lens/src/codexlens/search/ranking.py b/codex-lens/src/codexlens/search/ranking.py index d5ec87e4..61a0edba 100644 --- a/codex-lens/src/codexlens/search/ranking.py +++ b/codex-lens/src/codexlens/search/ranking.py @@ -25,7 +25,7 @@ def reciprocal_rank_fusion( results_map: Dictionary mapping source name to list of SearchResult objects Sources: 'exact', 'fuzzy', 'vector' weights: Dictionary mapping source name to weight (default: equal weights) - Example: {'exact': 0.4, 'fuzzy': 0.3, 'vector': 0.3} + Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6} k: Constant to avoid division by zero and control rank influence (default 60) Returns: diff --git a/codex-lens/tests/test_hybrid_search_e2e.py b/codex-lens/tests/test_hybrid_search_e2e.py index 3670792d..232c9704 100644 --- a/codex-lens/tests/test_hybrid_search_e2e.py +++ b/codex-lens/tests/test_hybrid_search_e2e.py @@ -45,9 +45,9 @@ class TestHybridSearchBasics: """Test HybridSearchEngine initializes with default weights.""" engine = HybridSearchEngine() assert engine.weights == HybridSearchEngine.DEFAULT_WEIGHTS - assert engine.weights["exact"] == 0.4 - assert engine.weights["fuzzy"] == 0.3 - assert engine.weights["vector"] == 0.3 + assert engine.weights["exact"] == 0.3 + assert engine.weights["fuzzy"] == 0.1 + assert engine.weights["vector"] == 0.6 def test_engine_custom_weights(self): """Test HybridSearchEngine accepts custom weights.""" diff --git a/codex-lens/tests/test_rrf_fusion.py b/codex-lens/tests/test_rrf_fusion.py index 53dda39a..f44242a9 100644 --- a/codex-lens/tests/test_rrf_fusion.py +++ b/codex-lens/tests/test_rrf_fusion.py @@ -230,16 +230,16 @@ class TestRRFSyntheticRankings: vector = [SearchResult(path="c.py", score=8.0, excerpt="...")] results_map = {"exact": exact, "fuzzy": fuzzy, "vector": vector} - weights = {"exact": 0.4, "fuzzy": 0.3, "vector": 0.3} + weights = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} fused = reciprocal_rank_fusion(results_map, weights=weights) assert len(fused) == 3 # Each appears in one source only, so scores differ by weights - # a.py: 0.4/61 ≈ 0.0066 - # b.py: 0.3/61 ≈ 0.0049 - # c.py: 0.3/61 ≈ 0.0049 - assert fused[0].path == "a.py", "Exact (higher weight) should rank first" + # c.py: 0.6/61 ≈ 0.0098 (vector, highest weight) + # a.py: 0.3/61 ≈ 0.0049 (exact) + # b.py: 0.1/61 ≈ 0.0016 (fuzzy) + assert fused[0].path == "c.py", "Vector (higher weight) should rank first" class TestNormalizeBM25Score: