From e3e61bcae946a99237cc62fb181ab855a7e21963 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 24 Dec 2025 18:01:26 +0800 Subject: [PATCH] feat: Enhance LiteLLM integration and CLI management - Added token estimation and batching functionality in LiteLLMEmbedder to handle large text inputs efficiently. - Updated embed method to support max_tokens_per_batch parameter for better API call management. - Introduced new API routes for managing custom CLI endpoints, including GET, POST, PUT, and DELETE methods. - Enhanced CLI history component to support source directory context for native session content. - Improved error handling and logging in various components for better debugging and user feedback. - Added internationalization support for new API endpoint features in the i18n module. - Updated CodexLens CLI commands to allow for concurrent API calls with a max_workers option. - Enhanced embedding manager to track model information and handle embeddings generation more robustly. - Added entry points for CLI commands in the package configuration. --- .../src/ccw_litellm.egg-info/SOURCES.txt | 3 + .../src/ccw_litellm.egg-info/entry_points.txt | 2 + .../ccw_litellm/clients/litellm_embedder.py | 124 +++++++++--- ccw/src/core/routes/cli-routes.ts | 91 ++++++++- ccw/src/core/routes/litellm-api-routes.ts | 45 +++-- .../dashboard-js/components/cli-history.js | 23 ++- ccw/src/templates/dashboard-js/i18n.js | 14 ++ .../dashboard-js/views/api-settings.js | 3 + .../dashboard-js/views/cli-manager.js | 137 ++++++++++++- .../dashboard-js/views/codexlens-manager.js | 14 +- ccw/src/tools/smart-search.ts | 25 +++ codex-lens/src/codexlens/cli/commands.py | 15 +- .../src/codexlens/cli/embedding_manager.py | 186 +++++++++++++----- 13 files changed, 575 insertions(+), 107 deletions(-) create mode 100644 ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt diff --git a/ccw-litellm/src/ccw_litellm.egg-info/SOURCES.txt b/ccw-litellm/src/ccw_litellm.egg-info/SOURCES.txt index 479776fe..bf51c2a6 100644 --- a/ccw-litellm/src/ccw_litellm.egg-info/SOURCES.txt +++ b/ccw-litellm/src/ccw_litellm.egg-info/SOURCES.txt @@ -1,8 +1,11 @@ +README.md pyproject.toml src/ccw_litellm/__init__.py +src/ccw_litellm/cli.py src/ccw_litellm.egg-info/PKG-INFO src/ccw_litellm.egg-info/SOURCES.txt src/ccw_litellm.egg-info/dependency_links.txt +src/ccw_litellm.egg-info/entry_points.txt src/ccw_litellm.egg-info/requires.txt src/ccw_litellm.egg-info/top_level.txt src/ccw_litellm/clients/__init__.py diff --git a/ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt b/ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt new file mode 100644 index 00000000..aa80309b --- /dev/null +++ b/ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +ccw-litellm = ccw_litellm.cli:main diff --git a/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py b/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py index 66d9ebd8..31a86fd0 100644 --- a/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py +++ b/ccw-litellm/src/ccw_litellm/clients/litellm_embedder.py @@ -102,18 +102,75 @@ class LiteLLMEmbedder(AbstractEmbedder): """Embedding vector size.""" return self._model_config.dimensions + def _estimate_tokens(self, text: str) -> int: + """Estimate token count for a text using fast heuristic. + + Args: + text: Text to estimate tokens for + + Returns: + Estimated token count (len/4 is a reasonable approximation) + """ + return len(text) // 4 + + def _create_batches( + self, + texts: list[str], + max_tokens: int = 30000 + ) -> list[list[str]]: + """Split texts into batches that fit within token limits. + + Args: + texts: List of texts to batch + max_tokens: Maximum tokens per batch (default: 30000, safe margin for 40960 limit) + + Returns: + List of text batches + """ + batches = [] + current_batch = [] + current_tokens = 0 + + for text in texts: + text_tokens = self._estimate_tokens(text) + + # If single text exceeds limit, truncate it + if text_tokens > max_tokens: + logger.warning(f"Text with {text_tokens} estimated tokens exceeds limit, truncating") + # Truncate to fit (rough estimate: 4 chars per token) + max_chars = max_tokens * 4 + text = text[:max_chars] + text_tokens = self._estimate_tokens(text) + + # Start new batch if current would exceed limit + if current_tokens + text_tokens > max_tokens and current_batch: + batches.append(current_batch) + current_batch = [] + current_tokens = 0 + + current_batch.append(text) + current_tokens += text_tokens + + # Add final batch + if current_batch: + batches.append(current_batch) + + return batches + def embed( self, texts: str | Sequence[str], *, batch_size: int | None = None, + max_tokens_per_batch: int = 30000, **kwargs: Any, ) -> NDArray[np.floating]: """Embed one or more texts. Args: texts: Single text or sequence of texts - batch_size: Batch size for processing (currently unused, LiteLLM handles batching) + batch_size: Batch size for processing (deprecated, use max_tokens_per_batch) + max_tokens_per_batch: Maximum estimated tokens per API call (default: 30000) **kwargs: Additional arguments for litellm.embedding() Returns: @@ -125,10 +182,8 @@ class LiteLLMEmbedder(AbstractEmbedder): # Normalize input to list if isinstance(texts, str): text_list = [texts] - single_input = True else: text_list = list(texts) - single_input = False if not text_list: # Return empty array with correct shape @@ -137,36 +192,53 @@ class LiteLLMEmbedder(AbstractEmbedder): # Merge kwargs embedding_kwargs = {**self._litellm_kwargs, **kwargs} - try: - # For OpenAI-compatible endpoints, ensure encoding_format is set - if self._provider_config.api_base and "encoding_format" not in embedding_kwargs: - embedding_kwargs["encoding_format"] = "float" + # For OpenAI-compatible endpoints, ensure encoding_format is set + if self._provider_config.api_base and "encoding_format" not in embedding_kwargs: + embedding_kwargs["encoding_format"] = "float" - # Call LiteLLM embedding - response = litellm.embedding( - model=self._format_model_name(), - input=text_list, - **embedding_kwargs, - ) + # Split into token-aware batches + batches = self._create_batches(text_list, max_tokens_per_batch) - # Extract embeddings - embeddings = [item["embedding"] for item in response.data] + if len(batches) > 1: + logger.info(f"Split {len(text_list)} texts into {len(batches)} batches for embedding") - # Convert to numpy array - result = np.array(embeddings, dtype=np.float32) + all_embeddings = [] - # Validate dimensions - if result.shape[1] != self.dimensions: - logger.warning( - f"Expected {self.dimensions} dimensions, got {result.shape[1]}. " - f"Configuration may be incorrect." + for batch_idx, batch in enumerate(batches): + try: + # Build call kwargs with explicit api_base + call_kwargs = {**embedding_kwargs} + if self._provider_config.api_base: + call_kwargs["api_base"] = self._provider_config.api_base + if self._provider_config.api_key: + call_kwargs["api_key"] = self._provider_config.api_key + + # Call LiteLLM embedding for this batch + response = litellm.embedding( + model=self._format_model_name(), + input=batch, + **call_kwargs, ) - return result + # Extract embeddings + batch_embeddings = [item["embedding"] for item in response.data] + all_embeddings.extend(batch_embeddings) - except Exception as e: - logger.error(f"LiteLLM embedding failed: {e}") - raise + except Exception as e: + logger.error(f"LiteLLM embedding failed for batch {batch_idx + 1}/{len(batches)}: {e}") + raise + + # Convert to numpy array + result = np.array(all_embeddings, dtype=np.float32) + + # Validate dimensions + if result.shape[1] != self.dimensions: + logger.warning( + f"Expected {self.dimensions} dimensions, got {result.shape[1]}. " + f"Configuration may be incorrect." + ) + + return result @property def model_name(self) -> str: diff --git a/ccw/src/core/routes/cli-routes.ts b/ccw/src/core/routes/cli-routes.ts index 019566f0..2c9a6b4d 100644 --- a/ccw/src/core/routes/cli-routes.ts +++ b/ccw/src/core/routes/cli-routes.ts @@ -38,7 +38,9 @@ import { saveClaudeCliTools, updateClaudeToolEnabled, updateClaudeCacheSettings, - getClaudeCliToolsInfo + getClaudeCliToolsInfo, + addClaudeCustomEndpoint, + removeClaudeCustomEndpoint } from '../../tools/claude-cli-tools.js'; export interface RouteContext { @@ -211,6 +213,93 @@ export async function handleCliRoutes(ctx: RouteContext): Promise { } } + // API: Get all custom endpoints + if (pathname === '/api/cli/endpoints' && req.method === 'GET') { + try { + const config = loadClaudeCliTools(initialPath); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ endpoints: config.customEndpoints || [] })); + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: (err as Error).message })); + } + return true; + } + + // API: Add/Update custom endpoint + if (pathname === '/api/cli/endpoints' && req.method === 'POST') { + handlePostRequest(req, res, async (body: unknown) => { + try { + const { id, name, enabled } = body as { id: string; name: string; enabled: boolean }; + if (!id || !name) { + return { error: 'id and name are required', status: 400 }; + } + const config = addClaudeCustomEndpoint(initialPath, { id, name, enabled: enabled !== false }); + + broadcastToClients({ + type: 'CLI_ENDPOINT_UPDATED', + payload: { endpoint: { id, name, enabled }, timestamp: new Date().toISOString() } + }); + + return { success: true, endpoints: config.customEndpoints }; + } catch (err) { + return { error: (err as Error).message, status: 500 }; + } + }); + return true; + } + + // API: Update custom endpoint enabled status + if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'PUT') { + const endpointId = pathname.split('/').pop() || ''; + handlePostRequest(req, res, async (body: unknown) => { + try { + const { enabled, name } = body as { enabled?: boolean; name?: string }; + const config = loadClaudeCliTools(initialPath); + const endpoint = config.customEndpoints.find(e => e.id === endpointId); + + if (!endpoint) { + return { error: 'Endpoint not found', status: 404 }; + } + + if (typeof enabled === 'boolean') endpoint.enabled = enabled; + if (name) endpoint.name = name; + + saveClaudeCliTools(initialPath, config); + + broadcastToClients({ + type: 'CLI_ENDPOINT_UPDATED', + payload: { endpoint, timestamp: new Date().toISOString() } + }); + + return { success: true, endpoint }; + } catch (err) { + return { error: (err as Error).message, status: 500 }; + } + }); + return true; + } + + // API: Delete custom endpoint + if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'DELETE') { + const endpointId = pathname.split('/').pop() || ''; + try { + const config = removeClaudeCustomEndpoint(initialPath, endpointId); + + broadcastToClients({ + type: 'CLI_ENDPOINT_DELETED', + payload: { endpointId, timestamp: new Date().toISOString() } + }); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, endpoints: config.customEndpoints })); + } catch (err) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: (err as Error).message })); + } + return true; + } + // API: CLI Execution History if (pathname === '/api/cli/history') { const projectPath = url.searchParams.get('path') || initialPath; diff --git a/ccw/src/core/routes/litellm-api-routes.ts b/ccw/src/core/routes/litellm-api-routes.ts index 7470c6eb..9ce331c3 100644 --- a/ccw/src/core/routes/litellm-api-routes.ts +++ b/ccw/src/core/routes/litellm-api-routes.ts @@ -529,27 +529,38 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise((resolve) => { - const proc = spawn('python', ['-c', 'import ccw_litellm; print(ccw_litellm.__version__ if hasattr(ccw_litellm, "__version__") else "installed")'], { - shell: true, - timeout: 10000 - }); + const { execSync } = await import('child_process'); - let output = ''; - proc.stdout?.on('data', (data) => { output += data.toString(); }); - proc.on('close', (code) => { - if (code === 0) { - resolve({ installed: true, version: output.trim() || 'unknown' }); - } else { - resolve({ installed: false }); + // Try multiple Python executables + const pythonExecutables = ['python', 'python3', 'py']; + // Use single quotes inside Python code for Windows compatibility + const pythonCode = "import ccw_litellm; print(getattr(ccw_litellm, '__version__', 'installed'))"; + + let installed = false; + let version = ''; + let lastError = ''; + + for (const pythonExe of pythonExecutables) { + try { + const output = execSync(`${pythonExe} -c "${pythonCode}"`, { + encoding: 'utf-8', + timeout: 10000, + windowsHide: true + }); + version = output.trim(); + if (version) { + installed = true; + console.log(`[ccw-litellm status] Found with ${pythonExe}: ${version}`); + break; } - }); - proc.on('error', () => resolve({ installed: false })); - }); + } catch (err) { + lastError = (err as Error).message; + console.log(`[ccw-litellm status] ${pythonExe} failed:`, lastError.substring(0, 100)); + } + } res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify(result)); + res.end(JSON.stringify(installed ? { installed: true, version } : { installed: false, error: lastError })); } catch (err) { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ installed: false, error: (err as Error).message })); diff --git a/ccw/src/templates/dashboard-js/components/cli-history.js b/ccw/src/templates/dashboard-js/components/cli-history.js index fc5fa83f..9ca4a516 100644 --- a/ccw/src/templates/dashboard-js/components/cli-history.js +++ b/ccw/src/templates/dashboard-js/components/cli-history.js @@ -33,9 +33,13 @@ async function loadCliHistory(options = {}) { } // Load native session content for a specific execution -async function loadNativeSessionContent(executionId) { +async function loadNativeSessionContent(executionId, sourceDir) { try { - const url = `/api/cli/native-session?path=${encodeURIComponent(projectPath)}&id=${encodeURIComponent(executionId)}`; + // If sourceDir provided, use it to build the correct path + const basePath = sourceDir && sourceDir !== '.' + ? projectPath + '/' + sourceDir + : projectPath; + const url = `/api/cli/native-session?path=${encodeURIComponent(basePath)}&id=${encodeURIComponent(executionId)}`; const response = await fetch(url); if (!response.ok) return null; return await response.json(); @@ -133,9 +137,12 @@ function renderCliHistory() { ` : ''; + // Escape sourceDir for use in onclick + const sourceDirEscaped = exec.sourceDir ? exec.sourceDir.replace(/'/g, "\\'") : ''; + return `
-
+
${exec.tool.toUpperCase()} ${exec.mode || 'analysis'} @@ -154,14 +161,14 @@ function renderCliHistory() {
${hasNative ? ` - ` : ''} - -
@@ -650,9 +657,9 @@ async function copyConcatenatedPrompt(executionId) { /** * Show native session detail modal with full conversation content */ -async function showNativeSessionDetail(executionId) { +async function showNativeSessionDetail(executionId, sourceDir) { // Load native session content - const nativeSession = await loadNativeSessionContent(executionId); + const nativeSession = await loadNativeSessionContent(executionId, sourceDir); if (!nativeSession) { showRefreshToast('Native session not found', 'error'); diff --git a/ccw/src/templates/dashboard-js/i18n.js b/ccw/src/templates/dashboard-js/i18n.js index 6460f39c..178baf99 100644 --- a/ccw/src/templates/dashboard-js/i18n.js +++ b/ccw/src/templates/dashboard-js/i18n.js @@ -228,6 +228,11 @@ const i18n = { 'cli.codexLensDescFull': 'Full-text code search engine', 'cli.semanticDesc': 'AI-powered code understanding', 'cli.semanticDescFull': 'Natural language code search', + 'cli.apiEndpoints': 'API Endpoints', + 'cli.configured': 'configured', + 'cli.addToCli': 'Add to CLI', + 'cli.enabled': 'Enabled', + 'cli.disabled': 'Disabled', // CodexLens Configuration 'codexlens.config': 'CodexLens Configuration', @@ -378,6 +383,8 @@ const i18n = { 'codexlens.indexComplete': 'Index complete', 'codexlens.indexSuccess': 'Index created successfully', 'codexlens.indexFailed': 'Indexing failed', + 'codexlens.embeddingsFailed': 'Embeddings generation failed', + 'codexlens.ftsSuccessEmbeddingsFailed': 'FTS index created, but embeddings failed', // CodexLens Install 'codexlens.installDesc': 'Python-based code indexing engine', @@ -1880,6 +1887,11 @@ const i18n = { 'cli.codexLensDescFull': '全文代码搜索引擎', 'cli.semanticDesc': 'AI 驱动的代码理解', 'cli.semanticDescFull': '自然语言代码搜索', + 'cli.apiEndpoints': 'API 端点', + 'cli.configured': '已配置', + 'cli.addToCli': '添加到 CLI', + 'cli.enabled': '已启用', + 'cli.disabled': '已禁用', // CodexLens 配置 'codexlens.config': 'CodexLens 配置', @@ -2031,6 +2043,8 @@ const i18n = { 'codexlens.indexComplete': '索引完成', 'codexlens.indexSuccess': '索引创建成功', 'codexlens.indexFailed': '索引失败', + 'codexlens.embeddingsFailed': '嵌入生成失败', + 'codexlens.ftsSuccessEmbeddingsFailed': 'FTS 索引已创建,但嵌入生成失败', // CodexLens 安装 'codexlens.installDesc': '基于 Python 的代码索引引擎', diff --git a/ccw/src/templates/dashboard-js/views/api-settings.js b/ccw/src/templates/dashboard-js/views/api-settings.js index 0e293ca1..5f07eee7 100644 --- a/ccw/src/templates/dashboard-js/views/api-settings.js +++ b/ccw/src/templates/dashboard-js/views/api-settings.js @@ -2739,8 +2739,11 @@ function toggleKeyVisibility(btn) { */ async function checkCcwLitellmStatus() { try { + console.log('[API Settings] Checking ccw-litellm status...'); var response = await fetch('/api/litellm-api/ccw-litellm/status'); + console.log('[API Settings] Status response:', response.status); var status = await response.json(); + console.log('[API Settings] ccw-litellm status:', status); window.ccwLitellmStatus = status; return status; } catch (e) { diff --git a/ccw/src/templates/dashboard-js/views/cli-manager.js b/ccw/src/templates/dashboard-js/views/cli-manager.js index 93a87330..a9d66c3c 100644 --- a/ccw/src/templates/dashboard-js/views/cli-manager.js +++ b/ccw/src/templates/dashboard-js/views/cli-manager.js @@ -59,6 +59,91 @@ async function loadCcwEndpointTools() { } } +// ========== LiteLLM API Endpoints ========== +var litellmApiEndpoints = []; +var cliCustomEndpoints = []; + +async function loadLitellmApiEndpoints() { + try { + var response = await fetch('/api/litellm-api/config'); + if (!response.ok) throw new Error('Failed to load LiteLLM endpoints'); + var data = await response.json(); + litellmApiEndpoints = data.endpoints || []; + window.litellmApiConfig = data; + return litellmApiEndpoints; + } catch (err) { + console.error('Failed to load LiteLLM endpoints:', err); + litellmApiEndpoints = []; + return []; + } +} + +async function loadCliCustomEndpoints() { + try { + var response = await fetch('/api/cli/endpoints'); + if (!response.ok) throw new Error('Failed to load CLI custom endpoints'); + var data = await response.json(); + cliCustomEndpoints = data.endpoints || []; + return cliCustomEndpoints; + } catch (err) { + console.error('Failed to load CLI custom endpoints:', err); + cliCustomEndpoints = []; + return []; + } +} + +async function toggleEndpointEnabled(endpointId, enabled) { + try { + var response = await fetch('/api/cli/endpoints/' + endpointId, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ enabled: enabled }) + }); + if (!response.ok) throw new Error('Failed to update endpoint'); + var data = await response.json(); + if (data.success) { + // Update local state + var idx = cliCustomEndpoints.findIndex(function(e) { return e.id === endpointId; }); + if (idx >= 0) { + cliCustomEndpoints[idx].enabled = enabled; + } + showRefreshToast((enabled ? 'Enabled' : 'Disabled') + ' endpoint: ' + endpointId, 'success'); + } + return data; + } catch (err) { + showRefreshToast('Failed to update endpoint: ' + err.message, 'error'); + throw err; + } +} + +async function syncEndpointToCliTools(endpoint) { + try { + var response = await fetch('/api/cli/endpoints', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + id: endpoint.id, + name: endpoint.name, + enabled: true + }) + }); + if (!response.ok) throw new Error('Failed to sync endpoint'); + var data = await response.json(); + if (data.success) { + cliCustomEndpoints = data.endpoints; + showRefreshToast('Endpoint synced to CLI tools: ' + endpoint.id, 'success'); + renderToolsSection(); + } + return data; + } catch (err) { + showRefreshToast('Failed to sync endpoint: ' + err.message, 'error'); + throw err; + } +} + +window.toggleEndpointEnabled = toggleEndpointEnabled; +window.syncEndpointToCliTools = syncEndpointToCliTools; + // ========== CLI Tool Configuration ========== async function loadCliToolConfig() { try { @@ -322,7 +407,9 @@ async function renderCliManager() { loadCliToolStatus(), loadCodexLensStatus(), loadCcwInstallations(), - loadCcwEndpointTools() + loadCcwEndpointTools(), + loadLitellmApiEndpoints(), + loadCliCustomEndpoints() ]); container.innerHTML = '
' + @@ -487,6 +574,51 @@ function renderToolsSection() { '
'; } + // API Endpoints section + var apiEndpointsHtml = ''; + if (litellmApiEndpoints.length > 0) { + var endpointItems = litellmApiEndpoints.map(function(endpoint) { + // Check if endpoint is synced to CLI tools + var cliEndpoint = cliCustomEndpoints.find(function(e) { return e.id === endpoint.id; }); + var isSynced = !!cliEndpoint; + var isEnabled = cliEndpoint ? cliEndpoint.enabled : false; + + // Find provider info + var provider = (window.litellmApiConfig?.providers || []).find(function(p) { return p.id === endpoint.providerId; }); + var providerName = provider ? provider.name : endpoint.providerId; + + return '
' + + '
' + + '' + + '
' + + '
' + endpoint.id + ' API
' + + '
' + endpoint.model + ' (' + providerName + ')
' + + '
' + + '
' + + '
' + + (isSynced + ? '' + : '') + + '
' + + '
'; + }).join(''); + + apiEndpointsHtml = '
' + + '
' + + '

' + + ' ' + (t('cli.apiEndpoints') || 'API Endpoints') + + '

' + + '' + litellmApiEndpoints.length + ' ' + (t('cli.configured') || 'configured') + '' + + '
' + + '
' + endpointItems + '
' + + '
'; + } + container.innerHTML = '
' + '
' + '

' + t('cli.tools') + '

' + @@ -500,7 +632,8 @@ function renderToolsSection() { toolsHtml + codexLensHtml + semanticHtml + - '
'; + '
' + + apiEndpointsHtml; if (window.lucide) lucide.createIcons(); } diff --git a/ccw/src/templates/dashboard-js/views/codexlens-manager.js b/ccw/src/templates/dashboard-js/views/codexlens-manager.js index 913aa2db..ed1de360 100644 --- a/ccw/src/templates/dashboard-js/views/codexlens-manager.js +++ b/ccw/src/templates/dashboard-js/views/codexlens-manager.js @@ -383,7 +383,7 @@ async function loadSemanticDepsStatus() { acceleratorIcon = 'zap'; acceleratorClass = 'bg-green-500/20 text-green-600'; } else if (accelerator === 'DirectML') { - acceleratorIcon = 'gpu-card'; + acceleratorIcon = 'cpu'; acceleratorClass = 'bg-blue-500/20 text-blue-600'; } else if (accelerator === 'ROCm') { acceleratorIcon = 'flame'; @@ -450,7 +450,7 @@ function buildGpuModeSelector(gpuInfo) { id: 'directml', label: 'DirectML', desc: t('codexlens.directmlModeDesc') || 'Windows GPU (NVIDIA/AMD/Intel)', - icon: 'gpu-card', + icon: 'cpu', available: gpuInfo.available.includes('directml'), recommended: gpuInfo.mode === 'directml' }, @@ -1331,7 +1331,15 @@ async function startCodexLensIndexing(indexType, embeddingModel, embeddingBacken // Check if completed successfully (WebSocket might have already reported) if (result.success) { - handleIndexComplete(true, t('codexlens.indexComplete')); + // For vector index, check if embeddings were actually generated + var embeddingsResult = result.result && result.result.embeddings; + if (indexType === 'vector' && embeddingsResult && !embeddingsResult.generated) { + // FTS succeeded but embeddings failed - show partial success + var errorMsg = embeddingsResult.error || t('codexlens.embeddingsFailed'); + handleIndexComplete(false, t('codexlens.ftsSuccessEmbeddingsFailed') || 'FTS index created, but embeddings failed: ' + errorMsg); + } else { + handleIndexComplete(true, t('codexlens.indexComplete')); + } } else if (!result.success) { handleIndexComplete(false, result.error || t('common.unknownError')); } diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts index 931ee25e..5e9276a1 100644 --- a/ccw/src/tools/smart-search.ts +++ b/ccw/src/tools/smart-search.ts @@ -275,11 +275,22 @@ interface SearchResult { message?: string; } +interface ModelInfo { + model_profile?: string; + model_name?: string; + embedding_dim?: number; + backend?: string; + created_at?: string; + updated_at?: string; +} + interface IndexStatus { indexed: boolean; has_embeddings: boolean; file_count?: number; embeddings_coverage_percent?: number; + total_chunks?: number; + model_info?: ModelInfo; warning?: string; } @@ -320,6 +331,18 @@ async function checkIndexStatus(path: string = '.'): Promise { const embeddingsData = status.embeddings || {}; const embeddingsCoverage = embeddingsData.coverage_percent || 0; const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50% + const totalChunks = embeddingsData.total_chunks || 0; + + // Extract model info if available + const modelInfoData = embeddingsData.model_info; + const modelInfo: ModelInfo | undefined = modelInfoData ? { + model_profile: modelInfoData.model_profile, + model_name: modelInfoData.model_name, + embedding_dim: modelInfoData.embedding_dim, + backend: modelInfoData.backend, + created_at: modelInfoData.created_at, + updated_at: modelInfoData.updated_at, + } : undefined; let warning: string | undefined; if (!indexed) { @@ -335,6 +358,8 @@ async function checkIndexStatus(path: string = '.'): Promise { has_embeddings, file_count: status.total_files, embeddings_coverage_percent: embeddingsCoverage, + total_chunks: totalChunks, + model_info: modelInfo, warning, }; } catch { diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 3d2f37d1..b7cc01fc 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -1831,6 +1831,14 @@ def embeddings_generate( "-r", help="Recursively process all _index.db files in directory tree.", ), + max_workers: int = typer.Option( + 1, + "--max-workers", + "-w", + min=1, + max=16, + help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).", + ), json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."), ) -> None: @@ -1927,7 +1935,10 @@ def embeddings_generate( else: console.print(f"Index: [dim]{index_path}[/dim]") console.print(f"Backend: [cyan]{backend}[/cyan]") - console.print(f"Model: [cyan]{model}[/cyan]\n") + console.print(f"Model: [cyan]{model}[/cyan]") + if max_workers > 1: + console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]") + console.print() if use_recursive: result = generate_embeddings_recursive( @@ -1937,6 +1948,7 @@ def embeddings_generate( force=force, chunk_size=chunk_size, progress_callback=progress_update, + max_workers=max_workers, ) else: result = generate_embeddings( @@ -1946,6 +1958,7 @@ def embeddings_generate( force=force, chunk_size=chunk_size, progress_callback=progress_update, + max_workers=max_workers, ) if json_mode: diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py index 3090ef46..0bb31507 100644 --- a/codex-lens/src/codexlens/cli/embedding_manager.py +++ b/codex-lens/src/codexlens/cli/embedding_manager.py @@ -7,7 +7,6 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from itertools import islice from pathlib import Path -from threading import Lock from typing import Dict, Generator, List, Optional, Tuple try: @@ -441,82 +440,133 @@ def generate_embeddings( batch_number = 0 files_seen = set() - # Thread-safe counters for concurrent processing - counter_lock = Lock() - - def process_batch(batch_data: Tuple[int, List[Tuple]]) -> Tuple[int, set, Optional[str]]: - """Process a single batch: generate embeddings and store. + def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]): + """Compute embeddings for a batch (no DB write). Args: batch_data: Tuple of (batch_number, chunk_batch) Returns: - Tuple of (chunks_created, files_in_batch, error_message) + Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error) """ batch_num, chunk_batch = batch_data batch_files = set() try: - # Track files in this batch for _, file_path in chunk_batch: batch_files.add(file_path) - # Generate embeddings batch_contents = [chunk.content for chunk, _ in chunk_batch] embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE) - # Store embeddings (thread-safe via SQLite's serialized mode) - vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy) - - return len(chunk_batch), batch_files, None + return batch_num, chunk_batch, embeddings_numpy, batch_files, None except Exception as e: error_msg = f"Batch {batch_num}: {str(e)}" - logger.error(f"Failed to process embedding batch {batch_num}: {str(e)}") - return 0, batch_files, error_msg + logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}") + return batch_num, chunk_batch, None, batch_files, error_msg - # Collect batches for concurrent processing - all_batches = [] - for chunk_batch in batch_generator: - batch_number += 1 - all_batches.append((batch_number, chunk_batch)) - - # Process batches (sequential or concurrent based on max_workers) + # Process batches based on max_workers setting if max_workers <= 1: - # Sequential processing (original behavior) - for batch_num, chunk_batch in all_batches: - chunks_created, batch_files, error = process_batch((batch_num, chunk_batch)) - files_seen.update(batch_files) - total_chunks_created += chunks_created - total_files_processed = len(files_seen) + # Sequential processing - stream directly from generator (no pre-materialization) + for chunk_batch in batch_generator: + batch_number += 1 - if progress_callback and batch_num % 10 == 0: - progress_callback(f" Batch {batch_num}: {total_chunks_created} chunks, {total_files_processed} files") + # Track files in this batch + batch_files = set() + for _, file_path in chunk_batch: + batch_files.add(file_path) + + try: + # Generate embeddings + batch_contents = [chunk.content for chunk, _ in chunk_batch] + embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE) + + # Store embeddings + vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy) + + files_seen.update(batch_files) + total_chunks_created += len(chunk_batch) + total_files_processed = len(files_seen) + + if progress_callback and batch_number % 10 == 0: + progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files") + + except Exception as e: + logger.error(f"Failed to process batch {batch_number}: {str(e)}") + files_seen.update(batch_files) else: - # Concurrent processing for API backends + # Concurrent processing with producer-consumer pattern + # Workers compute embeddings (parallel), main thread writes to DB (serial) + from queue import Queue + from threading import Thread + + result_queue = Queue(maxsize=max_workers * 2) # Bounded queue to limit memory + batch_counter = [0] # Mutable counter for producer thread + producer_done = [False] + + def producer(): + """Submit batches to executor, put results in queue.""" + with ThreadPoolExecutor(max_workers=max_workers) as executor: + pending_futures = [] + + for chunk_batch in batch_generator: + batch_counter[0] += 1 + batch_num = batch_counter[0] + + # Submit compute task + future = executor.submit(compute_embeddings_only, (batch_num, chunk_batch)) + pending_futures.append(future) + + # Check for completed futures and add to queue + for f in list(pending_futures): + if f.done(): + try: + result_queue.put(f.result()) + except Exception as e: + logger.error(f"Future raised exception: {e}") + pending_futures.remove(f) + + # Wait for remaining futures + for future in as_completed(pending_futures): + try: + result_queue.put(future.result()) + except Exception as e: + logger.error(f"Future raised exception: {e}") + + producer_done[0] = True + result_queue.put(None) # Sentinel to signal completion + + # Start producer thread + producer_thread = Thread(target=producer, daemon=True) + producer_thread.start() + if progress_callback: - progress_callback(f"Processing {len(all_batches)} batches with {max_workers} concurrent workers...") + progress_callback(f"Processing with {max_workers} concurrent embedding workers...") - with ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = {executor.submit(process_batch, batch): batch[0] for batch in all_batches} + # Consumer: main thread writes to DB (serial, no contention) + completed = 0 + while True: + result = result_queue.get() + if result is None: # Sentinel + break - completed = 0 - for future in as_completed(futures): - batch_num = futures[future] - try: - chunks_created, batch_files, error = future.result() + batch_num, chunk_batch, embeddings_numpy, batch_files, error = result - with counter_lock: - files_seen.update(batch_files) - total_chunks_created += chunks_created - total_files_processed = len(files_seen) - completed += 1 + if embeddings_numpy is not None and error is None: + # Write to DB in main thread (no contention) + vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy) + total_chunks_created += len(chunk_batch) - if progress_callback and completed % 10 == 0: - progress_callback(f" Completed {completed}/{len(all_batches)} batches: {total_chunks_created} chunks") + files_seen.update(batch_files) + total_files_processed = len(files_seen) + completed += 1 - except Exception as e: - logger.error(f"Batch {batch_num} raised exception: {str(e)}") + if progress_callback and completed % 10 == 0: + progress_callback(f" Completed {completed} batches: {total_chunks_created} chunks") + + producer_thread.join() + batch_number = batch_counter[0] # Notify before ANN index finalization (happens when bulk_insert context exits) if progress_callback: @@ -718,7 +768,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]: index_root: Root index directory Returns: - Aggregated status with coverage statistics + Aggregated status with coverage statistics, model info, and timestamps """ index_files = discover_all_index_dbs(index_root) @@ -734,6 +784,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]: "coverage_percent": 0.0, "indexes_with_embeddings": 0, "indexes_without_embeddings": 0, + "model_info": None, }, } @@ -741,6 +792,8 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]: files_with_embeddings = 0 total_chunks = 0 indexes_with_embeddings = 0 + model_info = None + latest_updated_at = None for index_path in index_files: status = check_index_embeddings(index_path) @@ -752,6 +805,40 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]: if result["has_embeddings"]: indexes_with_embeddings += 1 + # Get model config from first index with embeddings (they should all match) + if model_info is None: + try: + from codexlens.semantic.vector_store import VectorStore + with VectorStore(index_path) as vs: + config = vs.get_model_config() + if config: + model_info = { + "model_profile": config.get("model_profile"), + "model_name": config.get("model_name"), + "embedding_dim": config.get("embedding_dim"), + "backend": config.get("backend"), + "created_at": config.get("created_at"), + "updated_at": config.get("updated_at"), + } + latest_updated_at = config.get("updated_at") + except Exception: + pass + else: + # Track the latest updated_at across all indexes + try: + from codexlens.semantic.vector_store import VectorStore + with VectorStore(index_path) as vs: + config = vs.get_model_config() + if config and config.get("updated_at"): + if latest_updated_at is None or config["updated_at"] > latest_updated_at: + latest_updated_at = config["updated_at"] + except Exception: + pass + + # Update model_info with latest timestamp + if model_info and latest_updated_at: + model_info["updated_at"] = latest_updated_at + return { "success": True, "result": { @@ -763,6 +850,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]: "coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1), "indexes_with_embeddings": indexes_with_embeddings, "indexes_without_embeddings": len(index_files) - indexes_with_embeddings, + "model_info": model_info, }, }