feat: Enhance LiteLLM integration and CLI management

- Added token estimation and batching functionality in LiteLLMEmbedder to handle large text inputs efficiently.
- Updated embed method to support max_tokens_per_batch parameter for better API call management.
- Introduced new API routes for managing custom CLI endpoints, including GET, POST, PUT, and DELETE methods.
- Enhanced CLI history component to support source directory context for native session content.
- Improved error handling and logging in various components for better debugging and user feedback.
- Added internationalization support for new API endpoint features in the i18n module.
- Updated CodexLens CLI commands to allow for concurrent API calls with a max_workers option.
- Enhanced embedding manager to track model information and handle embeddings generation more robustly.
- Added entry points for CLI commands in the package configuration.
This commit is contained in:
catlog22
2025-12-24 18:01:26 +08:00
parent dfca4d60ee
commit e3e61bcae9
13 changed files with 575 additions and 107 deletions

View File

@@ -1,8 +1,11 @@
README.md
pyproject.toml pyproject.toml
src/ccw_litellm/__init__.py src/ccw_litellm/__init__.py
src/ccw_litellm/cli.py
src/ccw_litellm.egg-info/PKG-INFO src/ccw_litellm.egg-info/PKG-INFO
src/ccw_litellm.egg-info/SOURCES.txt src/ccw_litellm.egg-info/SOURCES.txt
src/ccw_litellm.egg-info/dependency_links.txt src/ccw_litellm.egg-info/dependency_links.txt
src/ccw_litellm.egg-info/entry_points.txt
src/ccw_litellm.egg-info/requires.txt src/ccw_litellm.egg-info/requires.txt
src/ccw_litellm.egg-info/top_level.txt src/ccw_litellm.egg-info/top_level.txt
src/ccw_litellm/clients/__init__.py src/ccw_litellm/clients/__init__.py

View File

@@ -0,0 +1,2 @@
[console_scripts]
ccw-litellm = ccw_litellm.cli:main

View File

@@ -102,18 +102,75 @@ class LiteLLMEmbedder(AbstractEmbedder):
"""Embedding vector size.""" """Embedding vector size."""
return self._model_config.dimensions return self._model_config.dimensions
def _estimate_tokens(self, text: str) -> int:
"""Estimate token count for a text using fast heuristic.
Args:
text: Text to estimate tokens for
Returns:
Estimated token count (len/4 is a reasonable approximation)
"""
return len(text) // 4
def _create_batches(
self,
texts: list[str],
max_tokens: int = 30000
) -> list[list[str]]:
"""Split texts into batches that fit within token limits.
Args:
texts: List of texts to batch
max_tokens: Maximum tokens per batch (default: 30000, safe margin for 40960 limit)
Returns:
List of text batches
"""
batches = []
current_batch = []
current_tokens = 0
for text in texts:
text_tokens = self._estimate_tokens(text)
# If single text exceeds limit, truncate it
if text_tokens > max_tokens:
logger.warning(f"Text with {text_tokens} estimated tokens exceeds limit, truncating")
# Truncate to fit (rough estimate: 4 chars per token)
max_chars = max_tokens * 4
text = text[:max_chars]
text_tokens = self._estimate_tokens(text)
# Start new batch if current would exceed limit
if current_tokens + text_tokens > max_tokens and current_batch:
batches.append(current_batch)
current_batch = []
current_tokens = 0
current_batch.append(text)
current_tokens += text_tokens
# Add final batch
if current_batch:
batches.append(current_batch)
return batches
def embed( def embed(
self, self,
texts: str | Sequence[str], texts: str | Sequence[str],
*, *,
batch_size: int | None = None, batch_size: int | None = None,
max_tokens_per_batch: int = 30000,
**kwargs: Any, **kwargs: Any,
) -> NDArray[np.floating]: ) -> NDArray[np.floating]:
"""Embed one or more texts. """Embed one or more texts.
Args: Args:
texts: Single text or sequence of texts texts: Single text or sequence of texts
batch_size: Batch size for processing (currently unused, LiteLLM handles batching) batch_size: Batch size for processing (deprecated, use max_tokens_per_batch)
max_tokens_per_batch: Maximum estimated tokens per API call (default: 30000)
**kwargs: Additional arguments for litellm.embedding() **kwargs: Additional arguments for litellm.embedding()
Returns: Returns:
@@ -125,10 +182,8 @@ class LiteLLMEmbedder(AbstractEmbedder):
# Normalize input to list # Normalize input to list
if isinstance(texts, str): if isinstance(texts, str):
text_list = [texts] text_list = [texts]
single_input = True
else: else:
text_list = list(texts) text_list = list(texts)
single_input = False
if not text_list: if not text_list:
# Return empty array with correct shape # Return empty array with correct shape
@@ -137,36 +192,53 @@ class LiteLLMEmbedder(AbstractEmbedder):
# Merge kwargs # Merge kwargs
embedding_kwargs = {**self._litellm_kwargs, **kwargs} embedding_kwargs = {**self._litellm_kwargs, **kwargs}
try: # For OpenAI-compatible endpoints, ensure encoding_format is set
# For OpenAI-compatible endpoints, ensure encoding_format is set if self._provider_config.api_base and "encoding_format" not in embedding_kwargs:
if self._provider_config.api_base and "encoding_format" not in embedding_kwargs: embedding_kwargs["encoding_format"] = "float"
embedding_kwargs["encoding_format"] = "float"
# Call LiteLLM embedding # Split into token-aware batches
response = litellm.embedding( batches = self._create_batches(text_list, max_tokens_per_batch)
model=self._format_model_name(),
input=text_list,
**embedding_kwargs,
)
# Extract embeddings if len(batches) > 1:
embeddings = [item["embedding"] for item in response.data] logger.info(f"Split {len(text_list)} texts into {len(batches)} batches for embedding")
# Convert to numpy array all_embeddings = []
result = np.array(embeddings, dtype=np.float32)
# Validate dimensions for batch_idx, batch in enumerate(batches):
if result.shape[1] != self.dimensions: try:
logger.warning( # Build call kwargs with explicit api_base
f"Expected {self.dimensions} dimensions, got {result.shape[1]}. " call_kwargs = {**embedding_kwargs}
f"Configuration may be incorrect." if self._provider_config.api_base:
call_kwargs["api_base"] = self._provider_config.api_base
if self._provider_config.api_key:
call_kwargs["api_key"] = self._provider_config.api_key
# Call LiteLLM embedding for this batch
response = litellm.embedding(
model=self._format_model_name(),
input=batch,
**call_kwargs,
) )
return result # Extract embeddings
batch_embeddings = [item["embedding"] for item in response.data]
all_embeddings.extend(batch_embeddings)
except Exception as e: except Exception as e:
logger.error(f"LiteLLM embedding failed: {e}") logger.error(f"LiteLLM embedding failed for batch {batch_idx + 1}/{len(batches)}: {e}")
raise raise
# Convert to numpy array
result = np.array(all_embeddings, dtype=np.float32)
# Validate dimensions
if result.shape[1] != self.dimensions:
logger.warning(
f"Expected {self.dimensions} dimensions, got {result.shape[1]}. "
f"Configuration may be incorrect."
)
return result
@property @property
def model_name(self) -> str: def model_name(self) -> str:

View File

@@ -38,7 +38,9 @@ import {
saveClaudeCliTools, saveClaudeCliTools,
updateClaudeToolEnabled, updateClaudeToolEnabled,
updateClaudeCacheSettings, updateClaudeCacheSettings,
getClaudeCliToolsInfo getClaudeCliToolsInfo,
addClaudeCustomEndpoint,
removeClaudeCustomEndpoint
} from '../../tools/claude-cli-tools.js'; } from '../../tools/claude-cli-tools.js';
export interface RouteContext { export interface RouteContext {
@@ -211,6 +213,93 @@ export async function handleCliRoutes(ctx: RouteContext): Promise<boolean> {
} }
} }
// API: Get all custom endpoints
if (pathname === '/api/cli/endpoints' && req.method === 'GET') {
try {
const config = loadClaudeCliTools(initialPath);
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ endpoints: config.customEndpoints || [] }));
} catch (err) {
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: (err as Error).message }));
}
return true;
}
// API: Add/Update custom endpoint
if (pathname === '/api/cli/endpoints' && req.method === 'POST') {
handlePostRequest(req, res, async (body: unknown) => {
try {
const { id, name, enabled } = body as { id: string; name: string; enabled: boolean };
if (!id || !name) {
return { error: 'id and name are required', status: 400 };
}
const config = addClaudeCustomEndpoint(initialPath, { id, name, enabled: enabled !== false });
broadcastToClients({
type: 'CLI_ENDPOINT_UPDATED',
payload: { endpoint: { id, name, enabled }, timestamp: new Date().toISOString() }
});
return { success: true, endpoints: config.customEndpoints };
} catch (err) {
return { error: (err as Error).message, status: 500 };
}
});
return true;
}
// API: Update custom endpoint enabled status
if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'PUT') {
const endpointId = pathname.split('/').pop() || '';
handlePostRequest(req, res, async (body: unknown) => {
try {
const { enabled, name } = body as { enabled?: boolean; name?: string };
const config = loadClaudeCliTools(initialPath);
const endpoint = config.customEndpoints.find(e => e.id === endpointId);
if (!endpoint) {
return { error: 'Endpoint not found', status: 404 };
}
if (typeof enabled === 'boolean') endpoint.enabled = enabled;
if (name) endpoint.name = name;
saveClaudeCliTools(initialPath, config);
broadcastToClients({
type: 'CLI_ENDPOINT_UPDATED',
payload: { endpoint, timestamp: new Date().toISOString() }
});
return { success: true, endpoint };
} catch (err) {
return { error: (err as Error).message, status: 500 };
}
});
return true;
}
// API: Delete custom endpoint
if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'DELETE') {
const endpointId = pathname.split('/').pop() || '';
try {
const config = removeClaudeCustomEndpoint(initialPath, endpointId);
broadcastToClients({
type: 'CLI_ENDPOINT_DELETED',
payload: { endpointId, timestamp: new Date().toISOString() }
});
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ success: true, endpoints: config.customEndpoints }));
} catch (err) {
res.writeHead(500, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ error: (err as Error).message }));
}
return true;
}
// API: CLI Execution History // API: CLI Execution History
if (pathname === '/api/cli/history') { if (pathname === '/api/cli/history') {
const projectPath = url.searchParams.get('path') || initialPath; const projectPath = url.searchParams.get('path') || initialPath;

View File

@@ -529,27 +529,38 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
// GET /api/litellm-api/ccw-litellm/status - Check ccw-litellm installation status // GET /api/litellm-api/ccw-litellm/status - Check ccw-litellm installation status
if (pathname === '/api/litellm-api/ccw-litellm/status' && req.method === 'GET') { if (pathname === '/api/litellm-api/ccw-litellm/status' && req.method === 'GET') {
try { try {
const { spawn } = await import('child_process'); const { execSync } = await import('child_process');
const result = await new Promise<{ installed: boolean; version?: string }>((resolve) => {
const proc = spawn('python', ['-c', 'import ccw_litellm; print(ccw_litellm.__version__ if hasattr(ccw_litellm, "__version__") else "installed")'], {
shell: true,
timeout: 10000
});
let output = ''; // Try multiple Python executables
proc.stdout?.on('data', (data) => { output += data.toString(); }); const pythonExecutables = ['python', 'python3', 'py'];
proc.on('close', (code) => { // Use single quotes inside Python code for Windows compatibility
if (code === 0) { const pythonCode = "import ccw_litellm; print(getattr(ccw_litellm, '__version__', 'installed'))";
resolve({ installed: true, version: output.trim() || 'unknown' });
} else { let installed = false;
resolve({ installed: false }); let version = '';
let lastError = '';
for (const pythonExe of pythonExecutables) {
try {
const output = execSync(`${pythonExe} -c "${pythonCode}"`, {
encoding: 'utf-8',
timeout: 10000,
windowsHide: true
});
version = output.trim();
if (version) {
installed = true;
console.log(`[ccw-litellm status] Found with ${pythonExe}: ${version}`);
break;
} }
}); } catch (err) {
proc.on('error', () => resolve({ installed: false })); lastError = (err as Error).message;
}); console.log(`[ccw-litellm status] ${pythonExe} failed:`, lastError.substring(0, 100));
}
}
res.writeHead(200, { 'Content-Type': 'application/json' }); res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(result)); res.end(JSON.stringify(installed ? { installed: true, version } : { installed: false, error: lastError }));
} catch (err) { } catch (err) {
res.writeHead(200, { 'Content-Type': 'application/json' }); res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ installed: false, error: (err as Error).message })); res.end(JSON.stringify({ installed: false, error: (err as Error).message }));

View File

@@ -33,9 +33,13 @@ async function loadCliHistory(options = {}) {
} }
// Load native session content for a specific execution // Load native session content for a specific execution
async function loadNativeSessionContent(executionId) { async function loadNativeSessionContent(executionId, sourceDir) {
try { try {
const url = `/api/cli/native-session?path=${encodeURIComponent(projectPath)}&id=${encodeURIComponent(executionId)}`; // If sourceDir provided, use it to build the correct path
const basePath = sourceDir && sourceDir !== '.'
? projectPath + '/' + sourceDir
: projectPath;
const url = `/api/cli/native-session?path=${encodeURIComponent(basePath)}&id=${encodeURIComponent(executionId)}`;
const response = await fetch(url); const response = await fetch(url);
if (!response.ok) return null; if (!response.ok) return null;
return await response.json(); return await response.json();
@@ -133,9 +137,12 @@ function renderCliHistory() {
</span>` </span>`
: ''; : '';
// Escape sourceDir for use in onclick
const sourceDirEscaped = exec.sourceDir ? exec.sourceDir.replace(/'/g, "\\'") : '';
return ` return `
<div class="cli-history-item ${hasNative ? 'has-native' : ''}"> <div class="cli-history-item ${hasNative ? 'has-native' : ''}">
<div class="cli-history-item-content" onclick="showExecutionDetail('${exec.id}')"> <div class="cli-history-item-content" onclick="showExecutionDetail('${exec.id}', '${sourceDirEscaped}')">
<div class="cli-history-item-header"> <div class="cli-history-item-header">
<span class="cli-tool-tag cli-tool-${exec.tool}">${exec.tool.toUpperCase()}</span> <span class="cli-tool-tag cli-tool-${exec.tool}">${exec.tool.toUpperCase()}</span>
<span class="cli-mode-tag">${exec.mode || 'analysis'}</span> <span class="cli-mode-tag">${exec.mode || 'analysis'}</span>
@@ -154,14 +161,14 @@ function renderCliHistory() {
</div> </div>
<div class="cli-history-actions"> <div class="cli-history-actions">
${hasNative ? ` ${hasNative ? `
<button class="btn-icon" onclick="event.stopPropagation(); showNativeSessionDetail('${exec.id}')" title="View Native Session"> <button class="btn-icon" onclick="event.stopPropagation(); showNativeSessionDetail('${exec.id}', '${sourceDirEscaped}')" title="View Native Session">
<i data-lucide="file-json" class="w-3.5 h-3.5"></i> <i data-lucide="file-json" class="w-3.5 h-3.5"></i>
</button> </button>
` : ''} ` : ''}
<button class="btn-icon" onclick="event.stopPropagation(); showExecutionDetail('${exec.id}')" title="View Details"> <button class="btn-icon" onclick="event.stopPropagation(); showExecutionDetail('${exec.id}', '${sourceDirEscaped}')" title="View Details">
<i data-lucide="eye" class="w-3.5 h-3.5"></i> <i data-lucide="eye" class="w-3.5 h-3.5"></i>
</button> </button>
<button class="btn-icon btn-danger" onclick="event.stopPropagation(); confirmDeleteExecution('${exec.id}')" title="Delete"> <button class="btn-icon btn-danger" onclick="event.stopPropagation(); confirmDeleteExecution('${exec.id}', '${sourceDirEscaped}')" title="Delete">
<i data-lucide="trash-2" class="w-3.5 h-3.5"></i> <i data-lucide="trash-2" class="w-3.5 h-3.5"></i>
</button> </button>
</div> </div>
@@ -650,9 +657,9 @@ async function copyConcatenatedPrompt(executionId) {
/** /**
* Show native session detail modal with full conversation content * Show native session detail modal with full conversation content
*/ */
async function showNativeSessionDetail(executionId) { async function showNativeSessionDetail(executionId, sourceDir) {
// Load native session content // Load native session content
const nativeSession = await loadNativeSessionContent(executionId); const nativeSession = await loadNativeSessionContent(executionId, sourceDir);
if (!nativeSession) { if (!nativeSession) {
showRefreshToast('Native session not found', 'error'); showRefreshToast('Native session not found', 'error');

View File

@@ -228,6 +228,11 @@ const i18n = {
'cli.codexLensDescFull': 'Full-text code search engine', 'cli.codexLensDescFull': 'Full-text code search engine',
'cli.semanticDesc': 'AI-powered code understanding', 'cli.semanticDesc': 'AI-powered code understanding',
'cli.semanticDescFull': 'Natural language code search', 'cli.semanticDescFull': 'Natural language code search',
'cli.apiEndpoints': 'API Endpoints',
'cli.configured': 'configured',
'cli.addToCli': 'Add to CLI',
'cli.enabled': 'Enabled',
'cli.disabled': 'Disabled',
// CodexLens Configuration // CodexLens Configuration
'codexlens.config': 'CodexLens Configuration', 'codexlens.config': 'CodexLens Configuration',
@@ -378,6 +383,8 @@ const i18n = {
'codexlens.indexComplete': 'Index complete', 'codexlens.indexComplete': 'Index complete',
'codexlens.indexSuccess': 'Index created successfully', 'codexlens.indexSuccess': 'Index created successfully',
'codexlens.indexFailed': 'Indexing failed', 'codexlens.indexFailed': 'Indexing failed',
'codexlens.embeddingsFailed': 'Embeddings generation failed',
'codexlens.ftsSuccessEmbeddingsFailed': 'FTS index created, but embeddings failed',
// CodexLens Install // CodexLens Install
'codexlens.installDesc': 'Python-based code indexing engine', 'codexlens.installDesc': 'Python-based code indexing engine',
@@ -1880,6 +1887,11 @@ const i18n = {
'cli.codexLensDescFull': '全文代码搜索引擎', 'cli.codexLensDescFull': '全文代码搜索引擎',
'cli.semanticDesc': 'AI 驱动的代码理解', 'cli.semanticDesc': 'AI 驱动的代码理解',
'cli.semanticDescFull': '自然语言代码搜索', 'cli.semanticDescFull': '自然语言代码搜索',
'cli.apiEndpoints': 'API 端点',
'cli.configured': '已配置',
'cli.addToCli': '添加到 CLI',
'cli.enabled': '已启用',
'cli.disabled': '已禁用',
// CodexLens 配置 // CodexLens 配置
'codexlens.config': 'CodexLens 配置', 'codexlens.config': 'CodexLens 配置',
@@ -2031,6 +2043,8 @@ const i18n = {
'codexlens.indexComplete': '索引完成', 'codexlens.indexComplete': '索引完成',
'codexlens.indexSuccess': '索引创建成功', 'codexlens.indexSuccess': '索引创建成功',
'codexlens.indexFailed': '索引失败', 'codexlens.indexFailed': '索引失败',
'codexlens.embeddingsFailed': '嵌入生成失败',
'codexlens.ftsSuccessEmbeddingsFailed': 'FTS 索引已创建,但嵌入生成失败',
// CodexLens 安装 // CodexLens 安装
'codexlens.installDesc': '基于 Python 的代码索引引擎', 'codexlens.installDesc': '基于 Python 的代码索引引擎',

View File

@@ -2739,8 +2739,11 @@ function toggleKeyVisibility(btn) {
*/ */
async function checkCcwLitellmStatus() { async function checkCcwLitellmStatus() {
try { try {
console.log('[API Settings] Checking ccw-litellm status...');
var response = await fetch('/api/litellm-api/ccw-litellm/status'); var response = await fetch('/api/litellm-api/ccw-litellm/status');
console.log('[API Settings] Status response:', response.status);
var status = await response.json(); var status = await response.json();
console.log('[API Settings] ccw-litellm status:', status);
window.ccwLitellmStatus = status; window.ccwLitellmStatus = status;
return status; return status;
} catch (e) { } catch (e) {

View File

@@ -59,6 +59,91 @@ async function loadCcwEndpointTools() {
} }
} }
// ========== LiteLLM API Endpoints ==========
var litellmApiEndpoints = [];
var cliCustomEndpoints = [];
async function loadLitellmApiEndpoints() {
try {
var response = await fetch('/api/litellm-api/config');
if (!response.ok) throw new Error('Failed to load LiteLLM endpoints');
var data = await response.json();
litellmApiEndpoints = data.endpoints || [];
window.litellmApiConfig = data;
return litellmApiEndpoints;
} catch (err) {
console.error('Failed to load LiteLLM endpoints:', err);
litellmApiEndpoints = [];
return [];
}
}
async function loadCliCustomEndpoints() {
try {
var response = await fetch('/api/cli/endpoints');
if (!response.ok) throw new Error('Failed to load CLI custom endpoints');
var data = await response.json();
cliCustomEndpoints = data.endpoints || [];
return cliCustomEndpoints;
} catch (err) {
console.error('Failed to load CLI custom endpoints:', err);
cliCustomEndpoints = [];
return [];
}
}
async function toggleEndpointEnabled(endpointId, enabled) {
try {
var response = await fetch('/api/cli/endpoints/' + endpointId, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ enabled: enabled })
});
if (!response.ok) throw new Error('Failed to update endpoint');
var data = await response.json();
if (data.success) {
// Update local state
var idx = cliCustomEndpoints.findIndex(function(e) { return e.id === endpointId; });
if (idx >= 0) {
cliCustomEndpoints[idx].enabled = enabled;
}
showRefreshToast((enabled ? 'Enabled' : 'Disabled') + ' endpoint: ' + endpointId, 'success');
}
return data;
} catch (err) {
showRefreshToast('Failed to update endpoint: ' + err.message, 'error');
throw err;
}
}
async function syncEndpointToCliTools(endpoint) {
try {
var response = await fetch('/api/cli/endpoints', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
id: endpoint.id,
name: endpoint.name,
enabled: true
})
});
if (!response.ok) throw new Error('Failed to sync endpoint');
var data = await response.json();
if (data.success) {
cliCustomEndpoints = data.endpoints;
showRefreshToast('Endpoint synced to CLI tools: ' + endpoint.id, 'success');
renderToolsSection();
}
return data;
} catch (err) {
showRefreshToast('Failed to sync endpoint: ' + err.message, 'error');
throw err;
}
}
window.toggleEndpointEnabled = toggleEndpointEnabled;
window.syncEndpointToCliTools = syncEndpointToCliTools;
// ========== CLI Tool Configuration ========== // ========== CLI Tool Configuration ==========
async function loadCliToolConfig() { async function loadCliToolConfig() {
try { try {
@@ -322,7 +407,9 @@ async function renderCliManager() {
loadCliToolStatus(), loadCliToolStatus(),
loadCodexLensStatus(), loadCodexLensStatus(),
loadCcwInstallations(), loadCcwInstallations(),
loadCcwEndpointTools() loadCcwEndpointTools(),
loadLitellmApiEndpoints(),
loadCliCustomEndpoints()
]); ]);
container.innerHTML = '<div class="status-manager">' + container.innerHTML = '<div class="status-manager">' +
@@ -487,6 +574,51 @@ function renderToolsSection() {
'</div>'; '</div>';
} }
// API Endpoints section
var apiEndpointsHtml = '';
if (litellmApiEndpoints.length > 0) {
var endpointItems = litellmApiEndpoints.map(function(endpoint) {
// Check if endpoint is synced to CLI tools
var cliEndpoint = cliCustomEndpoints.find(function(e) { return e.id === endpoint.id; });
var isSynced = !!cliEndpoint;
var isEnabled = cliEndpoint ? cliEndpoint.enabled : false;
// Find provider info
var provider = (window.litellmApiConfig?.providers || []).find(function(p) { return p.id === endpoint.providerId; });
var providerName = provider ? provider.name : endpoint.providerId;
return '<div class="tool-item ' + (isSynced && isEnabled ? 'available' : 'unavailable') + '">' +
'<div class="tool-item-left">' +
'<span class="tool-status-dot ' + (isSynced && isEnabled ? 'status-available' : 'status-unavailable') + '"></span>' +
'<div class="tool-item-info">' +
'<div class="tool-item-name">' + endpoint.id + ' <span class="tool-type-badge">API</span></div>' +
'<div class="tool-item-desc">' + endpoint.model + ' (' + providerName + ')</div>' +
'</div>' +
'</div>' +
'<div class="tool-item-right">' +
(isSynced
? '<label class="toggle-switch" onclick="event.stopPropagation()">' +
'<input type="checkbox" ' + (isEnabled ? 'checked' : '') + ' onchange="toggleEndpointEnabled(\'' + endpoint.id + '\', this.checked); renderToolsSection();">' +
'<span class="toggle-slider"></span>' +
'</label>'
: '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); syncEndpointToCliTools({id: \'' + endpoint.id + '\', name: \'' + endpoint.name + '\'})">' +
'<i data-lucide="plus" class="w-3 h-3"></i> ' + (t('cli.addToCli') || 'Add to CLI') +
'</button>') +
'</div>' +
'</div>';
}).join('');
apiEndpointsHtml = '<div class="tools-subsection" style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid var(--border);">' +
'<div class="section-header-left" style="margin-bottom: 0.5rem;">' +
'<h4 style="font-size: 0.875rem; font-weight: 600; display: flex; align-items: center; gap: 0.5rem;">' +
'<i data-lucide="cloud" class="w-4 h-4"></i> ' + (t('cli.apiEndpoints') || 'API Endpoints') +
'</h4>' +
'<span class="section-count">' + litellmApiEndpoints.length + ' ' + (t('cli.configured') || 'configured') + '</span>' +
'</div>' +
'<div class="tools-list">' + endpointItems + '</div>' +
'</div>';
}
container.innerHTML = '<div class="section-header">' + container.innerHTML = '<div class="section-header">' +
'<div class="section-header-left">' + '<div class="section-header-left">' +
'<h3><i data-lucide="terminal" class="w-4 h-4"></i> ' + t('cli.tools') + '</h3>' + '<h3><i data-lucide="terminal" class="w-4 h-4"></i> ' + t('cli.tools') + '</h3>' +
@@ -500,7 +632,8 @@ function renderToolsSection() {
toolsHtml + toolsHtml +
codexLensHtml + codexLensHtml +
semanticHtml + semanticHtml +
'</div>'; '</div>' +
apiEndpointsHtml;
if (window.lucide) lucide.createIcons(); if (window.lucide) lucide.createIcons();
} }

View File

@@ -383,7 +383,7 @@ async function loadSemanticDepsStatus() {
acceleratorIcon = 'zap'; acceleratorIcon = 'zap';
acceleratorClass = 'bg-green-500/20 text-green-600'; acceleratorClass = 'bg-green-500/20 text-green-600';
} else if (accelerator === 'DirectML') { } else if (accelerator === 'DirectML') {
acceleratorIcon = 'gpu-card'; acceleratorIcon = 'cpu';
acceleratorClass = 'bg-blue-500/20 text-blue-600'; acceleratorClass = 'bg-blue-500/20 text-blue-600';
} else if (accelerator === 'ROCm') { } else if (accelerator === 'ROCm') {
acceleratorIcon = 'flame'; acceleratorIcon = 'flame';
@@ -450,7 +450,7 @@ function buildGpuModeSelector(gpuInfo) {
id: 'directml', id: 'directml',
label: 'DirectML', label: 'DirectML',
desc: t('codexlens.directmlModeDesc') || 'Windows GPU (NVIDIA/AMD/Intel)', desc: t('codexlens.directmlModeDesc') || 'Windows GPU (NVIDIA/AMD/Intel)',
icon: 'gpu-card', icon: 'cpu',
available: gpuInfo.available.includes('directml'), available: gpuInfo.available.includes('directml'),
recommended: gpuInfo.mode === 'directml' recommended: gpuInfo.mode === 'directml'
}, },
@@ -1331,7 +1331,15 @@ async function startCodexLensIndexing(indexType, embeddingModel, embeddingBacken
// Check if completed successfully (WebSocket might have already reported) // Check if completed successfully (WebSocket might have already reported)
if (result.success) { if (result.success) {
handleIndexComplete(true, t('codexlens.indexComplete')); // For vector index, check if embeddings were actually generated
var embeddingsResult = result.result && result.result.embeddings;
if (indexType === 'vector' && embeddingsResult && !embeddingsResult.generated) {
// FTS succeeded but embeddings failed - show partial success
var errorMsg = embeddingsResult.error || t('codexlens.embeddingsFailed');
handleIndexComplete(false, t('codexlens.ftsSuccessEmbeddingsFailed') || 'FTS index created, but embeddings failed: ' + errorMsg);
} else {
handleIndexComplete(true, t('codexlens.indexComplete'));
}
} else if (!result.success) { } else if (!result.success) {
handleIndexComplete(false, result.error || t('common.unknownError')); handleIndexComplete(false, result.error || t('common.unknownError'));
} }

View File

@@ -275,11 +275,22 @@ interface SearchResult {
message?: string; message?: string;
} }
interface ModelInfo {
model_profile?: string;
model_name?: string;
embedding_dim?: number;
backend?: string;
created_at?: string;
updated_at?: string;
}
interface IndexStatus { interface IndexStatus {
indexed: boolean; indexed: boolean;
has_embeddings: boolean; has_embeddings: boolean;
file_count?: number; file_count?: number;
embeddings_coverage_percent?: number; embeddings_coverage_percent?: number;
total_chunks?: number;
model_info?: ModelInfo;
warning?: string; warning?: string;
} }
@@ -320,6 +331,18 @@ async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
const embeddingsData = status.embeddings || {}; const embeddingsData = status.embeddings || {};
const embeddingsCoverage = embeddingsData.coverage_percent || 0; const embeddingsCoverage = embeddingsData.coverage_percent || 0;
const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50% const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50%
const totalChunks = embeddingsData.total_chunks || 0;
// Extract model info if available
const modelInfoData = embeddingsData.model_info;
const modelInfo: ModelInfo | undefined = modelInfoData ? {
model_profile: modelInfoData.model_profile,
model_name: modelInfoData.model_name,
embedding_dim: modelInfoData.embedding_dim,
backend: modelInfoData.backend,
created_at: modelInfoData.created_at,
updated_at: modelInfoData.updated_at,
} : undefined;
let warning: string | undefined; let warning: string | undefined;
if (!indexed) { if (!indexed) {
@@ -335,6 +358,8 @@ async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
has_embeddings, has_embeddings,
file_count: status.total_files, file_count: status.total_files,
embeddings_coverage_percent: embeddingsCoverage, embeddings_coverage_percent: embeddingsCoverage,
total_chunks: totalChunks,
model_info: modelInfo,
warning, warning,
}; };
} catch { } catch {

View File

@@ -1831,6 +1831,14 @@ def embeddings_generate(
"-r", "-r",
help="Recursively process all _index.db files in directory tree.", help="Recursively process all _index.db files in directory tree.",
), ),
max_workers: int = typer.Option(
1,
"--max-workers",
"-w",
min=1,
max=16,
help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
),
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."), verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."),
) -> None: ) -> None:
@@ -1927,7 +1935,10 @@ def embeddings_generate(
else: else:
console.print(f"Index: [dim]{index_path}[/dim]") console.print(f"Index: [dim]{index_path}[/dim]")
console.print(f"Backend: [cyan]{backend}[/cyan]") console.print(f"Backend: [cyan]{backend}[/cyan]")
console.print(f"Model: [cyan]{model}[/cyan]\n") console.print(f"Model: [cyan]{model}[/cyan]")
if max_workers > 1:
console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
console.print()
if use_recursive: if use_recursive:
result = generate_embeddings_recursive( result = generate_embeddings_recursive(
@@ -1937,6 +1948,7 @@ def embeddings_generate(
force=force, force=force,
chunk_size=chunk_size, chunk_size=chunk_size,
progress_callback=progress_update, progress_callback=progress_update,
max_workers=max_workers,
) )
else: else:
result = generate_embeddings( result = generate_embeddings(
@@ -1946,6 +1958,7 @@ def embeddings_generate(
force=force, force=force,
chunk_size=chunk_size, chunk_size=chunk_size,
progress_callback=progress_update, progress_callback=progress_update,
max_workers=max_workers,
) )
if json_mode: if json_mode:

View File

@@ -7,7 +7,6 @@ import time
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from itertools import islice from itertools import islice
from pathlib import Path from pathlib import Path
from threading import Lock
from typing import Dict, Generator, List, Optional, Tuple from typing import Dict, Generator, List, Optional, Tuple
try: try:
@@ -441,82 +440,133 @@ def generate_embeddings(
batch_number = 0 batch_number = 0
files_seen = set() files_seen = set()
# Thread-safe counters for concurrent processing def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]):
counter_lock = Lock() """Compute embeddings for a batch (no DB write).
def process_batch(batch_data: Tuple[int, List[Tuple]]) -> Tuple[int, set, Optional[str]]:
"""Process a single batch: generate embeddings and store.
Args: Args:
batch_data: Tuple of (batch_number, chunk_batch) batch_data: Tuple of (batch_number, chunk_batch)
Returns: Returns:
Tuple of (chunks_created, files_in_batch, error_message) Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error)
""" """
batch_num, chunk_batch = batch_data batch_num, chunk_batch = batch_data
batch_files = set() batch_files = set()
try: try:
# Track files in this batch
for _, file_path in chunk_batch: for _, file_path in chunk_batch:
batch_files.add(file_path) batch_files.add(file_path)
# Generate embeddings
batch_contents = [chunk.content for chunk, _ in chunk_batch] batch_contents = [chunk.content for chunk, _ in chunk_batch]
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE) embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
# Store embeddings (thread-safe via SQLite's serialized mode) return batch_num, chunk_batch, embeddings_numpy, batch_files, None
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
return len(chunk_batch), batch_files, None
except Exception as e: except Exception as e:
error_msg = f"Batch {batch_num}: {str(e)}" error_msg = f"Batch {batch_num}: {str(e)}"
logger.error(f"Failed to process embedding batch {batch_num}: {str(e)}") logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}")
return 0, batch_files, error_msg return batch_num, chunk_batch, None, batch_files, error_msg
# Collect batches for concurrent processing # Process batches based on max_workers setting
all_batches = []
for chunk_batch in batch_generator:
batch_number += 1
all_batches.append((batch_number, chunk_batch))
# Process batches (sequential or concurrent based on max_workers)
if max_workers <= 1: if max_workers <= 1:
# Sequential processing (original behavior) # Sequential processing - stream directly from generator (no pre-materialization)
for batch_num, chunk_batch in all_batches: for chunk_batch in batch_generator:
chunks_created, batch_files, error = process_batch((batch_num, chunk_batch)) batch_number += 1
files_seen.update(batch_files)
total_chunks_created += chunks_created
total_files_processed = len(files_seen)
if progress_callback and batch_num % 10 == 0: # Track files in this batch
progress_callback(f" Batch {batch_num}: {total_chunks_created} chunks, {total_files_processed} files") batch_files = set()
for _, file_path in chunk_batch:
batch_files.add(file_path)
try:
# Generate embeddings
batch_contents = [chunk.content for chunk, _ in chunk_batch]
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
# Store embeddings
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
files_seen.update(batch_files)
total_chunks_created += len(chunk_batch)
total_files_processed = len(files_seen)
if progress_callback and batch_number % 10 == 0:
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
except Exception as e:
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
files_seen.update(batch_files)
else: else:
# Concurrent processing for API backends # Concurrent processing with producer-consumer pattern
# Workers compute embeddings (parallel), main thread writes to DB (serial)
from queue import Queue
from threading import Thread
result_queue = Queue(maxsize=max_workers * 2) # Bounded queue to limit memory
batch_counter = [0] # Mutable counter for producer thread
producer_done = [False]
def producer():
"""Submit batches to executor, put results in queue."""
with ThreadPoolExecutor(max_workers=max_workers) as executor:
pending_futures = []
for chunk_batch in batch_generator:
batch_counter[0] += 1
batch_num = batch_counter[0]
# Submit compute task
future = executor.submit(compute_embeddings_only, (batch_num, chunk_batch))
pending_futures.append(future)
# Check for completed futures and add to queue
for f in list(pending_futures):
if f.done():
try:
result_queue.put(f.result())
except Exception as e:
logger.error(f"Future raised exception: {e}")
pending_futures.remove(f)
# Wait for remaining futures
for future in as_completed(pending_futures):
try:
result_queue.put(future.result())
except Exception as e:
logger.error(f"Future raised exception: {e}")
producer_done[0] = True
result_queue.put(None) # Sentinel to signal completion
# Start producer thread
producer_thread = Thread(target=producer, daemon=True)
producer_thread.start()
if progress_callback: if progress_callback:
progress_callback(f"Processing {len(all_batches)} batches with {max_workers} concurrent workers...") progress_callback(f"Processing with {max_workers} concurrent embedding workers...")
with ThreadPoolExecutor(max_workers=max_workers) as executor: # Consumer: main thread writes to DB (serial, no contention)
futures = {executor.submit(process_batch, batch): batch[0] for batch in all_batches} completed = 0
while True:
result = result_queue.get()
if result is None: # Sentinel
break
completed = 0 batch_num, chunk_batch, embeddings_numpy, batch_files, error = result
for future in as_completed(futures):
batch_num = futures[future]
try:
chunks_created, batch_files, error = future.result()
with counter_lock: if embeddings_numpy is not None and error is None:
files_seen.update(batch_files) # Write to DB in main thread (no contention)
total_chunks_created += chunks_created vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
total_files_processed = len(files_seen) total_chunks_created += len(chunk_batch)
completed += 1
if progress_callback and completed % 10 == 0: files_seen.update(batch_files)
progress_callback(f" Completed {completed}/{len(all_batches)} batches: {total_chunks_created} chunks") total_files_processed = len(files_seen)
completed += 1
except Exception as e: if progress_callback and completed % 10 == 0:
logger.error(f"Batch {batch_num} raised exception: {str(e)}") progress_callback(f" Completed {completed} batches: {total_chunks_created} chunks")
producer_thread.join()
batch_number = batch_counter[0]
# Notify before ANN index finalization (happens when bulk_insert context exits) # Notify before ANN index finalization (happens when bulk_insert context exits)
if progress_callback: if progress_callback:
@@ -718,7 +768,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
index_root: Root index directory index_root: Root index directory
Returns: Returns:
Aggregated status with coverage statistics Aggregated status with coverage statistics, model info, and timestamps
""" """
index_files = discover_all_index_dbs(index_root) index_files = discover_all_index_dbs(index_root)
@@ -734,6 +784,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
"coverage_percent": 0.0, "coverage_percent": 0.0,
"indexes_with_embeddings": 0, "indexes_with_embeddings": 0,
"indexes_without_embeddings": 0, "indexes_without_embeddings": 0,
"model_info": None,
}, },
} }
@@ -741,6 +792,8 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
files_with_embeddings = 0 files_with_embeddings = 0
total_chunks = 0 total_chunks = 0
indexes_with_embeddings = 0 indexes_with_embeddings = 0
model_info = None
latest_updated_at = None
for index_path in index_files: for index_path in index_files:
status = check_index_embeddings(index_path) status = check_index_embeddings(index_path)
@@ -752,6 +805,40 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
if result["has_embeddings"]: if result["has_embeddings"]:
indexes_with_embeddings += 1 indexes_with_embeddings += 1
# Get model config from first index with embeddings (they should all match)
if model_info is None:
try:
from codexlens.semantic.vector_store import VectorStore
with VectorStore(index_path) as vs:
config = vs.get_model_config()
if config:
model_info = {
"model_profile": config.get("model_profile"),
"model_name": config.get("model_name"),
"embedding_dim": config.get("embedding_dim"),
"backend": config.get("backend"),
"created_at": config.get("created_at"),
"updated_at": config.get("updated_at"),
}
latest_updated_at = config.get("updated_at")
except Exception:
pass
else:
# Track the latest updated_at across all indexes
try:
from codexlens.semantic.vector_store import VectorStore
with VectorStore(index_path) as vs:
config = vs.get_model_config()
if config and config.get("updated_at"):
if latest_updated_at is None or config["updated_at"] > latest_updated_at:
latest_updated_at = config["updated_at"]
except Exception:
pass
# Update model_info with latest timestamp
if model_info and latest_updated_at:
model_info["updated_at"] = latest_updated_at
return { return {
"success": True, "success": True,
"result": { "result": {
@@ -763,6 +850,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
"coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1), "coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
"indexes_with_embeddings": indexes_with_embeddings, "indexes_with_embeddings": indexes_with_embeddings,
"indexes_without_embeddings": len(index_files) - indexes_with_embeddings, "indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
"model_info": model_info,
}, },
} }