mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: Enhance LiteLLM integration and CLI management
- Added token estimation and batching functionality in LiteLLMEmbedder to handle large text inputs efficiently. - Updated embed method to support max_tokens_per_batch parameter for better API call management. - Introduced new API routes for managing custom CLI endpoints, including GET, POST, PUT, and DELETE methods. - Enhanced CLI history component to support source directory context for native session content. - Improved error handling and logging in various components for better debugging and user feedback. - Added internationalization support for new API endpoint features in the i18n module. - Updated CodexLens CLI commands to allow for concurrent API calls with a max_workers option. - Enhanced embedding manager to track model information and handle embeddings generation more robustly. - Added entry points for CLI commands in the package configuration.
This commit is contained in:
@@ -1,8 +1,11 @@
|
||||
README.md
|
||||
pyproject.toml
|
||||
src/ccw_litellm/__init__.py
|
||||
src/ccw_litellm/cli.py
|
||||
src/ccw_litellm.egg-info/PKG-INFO
|
||||
src/ccw_litellm.egg-info/SOURCES.txt
|
||||
src/ccw_litellm.egg-info/dependency_links.txt
|
||||
src/ccw_litellm.egg-info/entry_points.txt
|
||||
src/ccw_litellm.egg-info/requires.txt
|
||||
src/ccw_litellm.egg-info/top_level.txt
|
||||
src/ccw_litellm/clients/__init__.py
|
||||
|
||||
2
ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt
Normal file
2
ccw-litellm/src/ccw_litellm.egg-info/entry_points.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
[console_scripts]
|
||||
ccw-litellm = ccw_litellm.cli:main
|
||||
@@ -102,18 +102,75 @@ class LiteLLMEmbedder(AbstractEmbedder):
|
||||
"""Embedding vector size."""
|
||||
return self._model_config.dimensions
|
||||
|
||||
def _estimate_tokens(self, text: str) -> int:
|
||||
"""Estimate token count for a text using fast heuristic.
|
||||
|
||||
Args:
|
||||
text: Text to estimate tokens for
|
||||
|
||||
Returns:
|
||||
Estimated token count (len/4 is a reasonable approximation)
|
||||
"""
|
||||
return len(text) // 4
|
||||
|
||||
def _create_batches(
|
||||
self,
|
||||
texts: list[str],
|
||||
max_tokens: int = 30000
|
||||
) -> list[list[str]]:
|
||||
"""Split texts into batches that fit within token limits.
|
||||
|
||||
Args:
|
||||
texts: List of texts to batch
|
||||
max_tokens: Maximum tokens per batch (default: 30000, safe margin for 40960 limit)
|
||||
|
||||
Returns:
|
||||
List of text batches
|
||||
"""
|
||||
batches = []
|
||||
current_batch = []
|
||||
current_tokens = 0
|
||||
|
||||
for text in texts:
|
||||
text_tokens = self._estimate_tokens(text)
|
||||
|
||||
# If single text exceeds limit, truncate it
|
||||
if text_tokens > max_tokens:
|
||||
logger.warning(f"Text with {text_tokens} estimated tokens exceeds limit, truncating")
|
||||
# Truncate to fit (rough estimate: 4 chars per token)
|
||||
max_chars = max_tokens * 4
|
||||
text = text[:max_chars]
|
||||
text_tokens = self._estimate_tokens(text)
|
||||
|
||||
# Start new batch if current would exceed limit
|
||||
if current_tokens + text_tokens > max_tokens and current_batch:
|
||||
batches.append(current_batch)
|
||||
current_batch = []
|
||||
current_tokens = 0
|
||||
|
||||
current_batch.append(text)
|
||||
current_tokens += text_tokens
|
||||
|
||||
# Add final batch
|
||||
if current_batch:
|
||||
batches.append(current_batch)
|
||||
|
||||
return batches
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: str | Sequence[str],
|
||||
*,
|
||||
batch_size: int | None = None,
|
||||
max_tokens_per_batch: int = 30000,
|
||||
**kwargs: Any,
|
||||
) -> NDArray[np.floating]:
|
||||
"""Embed one or more texts.
|
||||
|
||||
Args:
|
||||
texts: Single text or sequence of texts
|
||||
batch_size: Batch size for processing (currently unused, LiteLLM handles batching)
|
||||
batch_size: Batch size for processing (deprecated, use max_tokens_per_batch)
|
||||
max_tokens_per_batch: Maximum estimated tokens per API call (default: 30000)
|
||||
**kwargs: Additional arguments for litellm.embedding()
|
||||
|
||||
Returns:
|
||||
@@ -125,10 +182,8 @@ class LiteLLMEmbedder(AbstractEmbedder):
|
||||
# Normalize input to list
|
||||
if isinstance(texts, str):
|
||||
text_list = [texts]
|
||||
single_input = True
|
||||
else:
|
||||
text_list = list(texts)
|
||||
single_input = False
|
||||
|
||||
if not text_list:
|
||||
# Return empty array with correct shape
|
||||
@@ -137,36 +192,53 @@ class LiteLLMEmbedder(AbstractEmbedder):
|
||||
# Merge kwargs
|
||||
embedding_kwargs = {**self._litellm_kwargs, **kwargs}
|
||||
|
||||
try:
|
||||
# For OpenAI-compatible endpoints, ensure encoding_format is set
|
||||
if self._provider_config.api_base and "encoding_format" not in embedding_kwargs:
|
||||
embedding_kwargs["encoding_format"] = "float"
|
||||
# For OpenAI-compatible endpoints, ensure encoding_format is set
|
||||
if self._provider_config.api_base and "encoding_format" not in embedding_kwargs:
|
||||
embedding_kwargs["encoding_format"] = "float"
|
||||
|
||||
# Call LiteLLM embedding
|
||||
response = litellm.embedding(
|
||||
model=self._format_model_name(),
|
||||
input=text_list,
|
||||
**embedding_kwargs,
|
||||
)
|
||||
# Split into token-aware batches
|
||||
batches = self._create_batches(text_list, max_tokens_per_batch)
|
||||
|
||||
# Extract embeddings
|
||||
embeddings = [item["embedding"] for item in response.data]
|
||||
if len(batches) > 1:
|
||||
logger.info(f"Split {len(text_list)} texts into {len(batches)} batches for embedding")
|
||||
|
||||
# Convert to numpy array
|
||||
result = np.array(embeddings, dtype=np.float32)
|
||||
all_embeddings = []
|
||||
|
||||
# Validate dimensions
|
||||
if result.shape[1] != self.dimensions:
|
||||
logger.warning(
|
||||
f"Expected {self.dimensions} dimensions, got {result.shape[1]}. "
|
||||
f"Configuration may be incorrect."
|
||||
for batch_idx, batch in enumerate(batches):
|
||||
try:
|
||||
# Build call kwargs with explicit api_base
|
||||
call_kwargs = {**embedding_kwargs}
|
||||
if self._provider_config.api_base:
|
||||
call_kwargs["api_base"] = self._provider_config.api_base
|
||||
if self._provider_config.api_key:
|
||||
call_kwargs["api_key"] = self._provider_config.api_key
|
||||
|
||||
# Call LiteLLM embedding for this batch
|
||||
response = litellm.embedding(
|
||||
model=self._format_model_name(),
|
||||
input=batch,
|
||||
**call_kwargs,
|
||||
)
|
||||
|
||||
return result
|
||||
# Extract embeddings
|
||||
batch_embeddings = [item["embedding"] for item in response.data]
|
||||
all_embeddings.extend(batch_embeddings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LiteLLM embedding failed: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"LiteLLM embedding failed for batch {batch_idx + 1}/{len(batches)}: {e}")
|
||||
raise
|
||||
|
||||
# Convert to numpy array
|
||||
result = np.array(all_embeddings, dtype=np.float32)
|
||||
|
||||
# Validate dimensions
|
||||
if result.shape[1] != self.dimensions:
|
||||
logger.warning(
|
||||
f"Expected {self.dimensions} dimensions, got {result.shape[1]}. "
|
||||
f"Configuration may be incorrect."
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
@property
|
||||
def model_name(self) -> str:
|
||||
|
||||
@@ -38,7 +38,9 @@ import {
|
||||
saveClaudeCliTools,
|
||||
updateClaudeToolEnabled,
|
||||
updateClaudeCacheSettings,
|
||||
getClaudeCliToolsInfo
|
||||
getClaudeCliToolsInfo,
|
||||
addClaudeCustomEndpoint,
|
||||
removeClaudeCustomEndpoint
|
||||
} from '../../tools/claude-cli-tools.js';
|
||||
|
||||
export interface RouteContext {
|
||||
@@ -211,6 +213,93 @@ export async function handleCliRoutes(ctx: RouteContext): Promise<boolean> {
|
||||
}
|
||||
}
|
||||
|
||||
// API: Get all custom endpoints
|
||||
if (pathname === '/api/cli/endpoints' && req.method === 'GET') {
|
||||
try {
|
||||
const config = loadClaudeCliTools(initialPath);
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ endpoints: config.customEndpoints || [] }));
|
||||
} catch (err) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ error: (err as Error).message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: Add/Update custom endpoint
|
||||
if (pathname === '/api/cli/endpoints' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async (body: unknown) => {
|
||||
try {
|
||||
const { id, name, enabled } = body as { id: string; name: string; enabled: boolean };
|
||||
if (!id || !name) {
|
||||
return { error: 'id and name are required', status: 400 };
|
||||
}
|
||||
const config = addClaudeCustomEndpoint(initialPath, { id, name, enabled: enabled !== false });
|
||||
|
||||
broadcastToClients({
|
||||
type: 'CLI_ENDPOINT_UPDATED',
|
||||
payload: { endpoint: { id, name, enabled }, timestamp: new Date().toISOString() }
|
||||
});
|
||||
|
||||
return { success: true, endpoints: config.customEndpoints };
|
||||
} catch (err) {
|
||||
return { error: (err as Error).message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: Update custom endpoint enabled status
|
||||
if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'PUT') {
|
||||
const endpointId = pathname.split('/').pop() || '';
|
||||
handlePostRequest(req, res, async (body: unknown) => {
|
||||
try {
|
||||
const { enabled, name } = body as { enabled?: boolean; name?: string };
|
||||
const config = loadClaudeCliTools(initialPath);
|
||||
const endpoint = config.customEndpoints.find(e => e.id === endpointId);
|
||||
|
||||
if (!endpoint) {
|
||||
return { error: 'Endpoint not found', status: 404 };
|
||||
}
|
||||
|
||||
if (typeof enabled === 'boolean') endpoint.enabled = enabled;
|
||||
if (name) endpoint.name = name;
|
||||
|
||||
saveClaudeCliTools(initialPath, config);
|
||||
|
||||
broadcastToClients({
|
||||
type: 'CLI_ENDPOINT_UPDATED',
|
||||
payload: { endpoint, timestamp: new Date().toISOString() }
|
||||
});
|
||||
|
||||
return { success: true, endpoint };
|
||||
} catch (err) {
|
||||
return { error: (err as Error).message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: Delete custom endpoint
|
||||
if (pathname.match(/^\/api\/cli\/endpoints\/[^/]+$/) && req.method === 'DELETE') {
|
||||
const endpointId = pathname.split('/').pop() || '';
|
||||
try {
|
||||
const config = removeClaudeCustomEndpoint(initialPath, endpointId);
|
||||
|
||||
broadcastToClients({
|
||||
type: 'CLI_ENDPOINT_DELETED',
|
||||
payload: { endpointId, timestamp: new Date().toISOString() }
|
||||
});
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ success: true, endpoints: config.customEndpoints }));
|
||||
} catch (err) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ error: (err as Error).message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: CLI Execution History
|
||||
if (pathname === '/api/cli/history') {
|
||||
const projectPath = url.searchParams.get('path') || initialPath;
|
||||
|
||||
@@ -529,27 +529,38 @@ export async function handleLiteLLMApiRoutes(ctx: RouteContext): Promise<boolean
|
||||
// GET /api/litellm-api/ccw-litellm/status - Check ccw-litellm installation status
|
||||
if (pathname === '/api/litellm-api/ccw-litellm/status' && req.method === 'GET') {
|
||||
try {
|
||||
const { spawn } = await import('child_process');
|
||||
const result = await new Promise<{ installed: boolean; version?: string }>((resolve) => {
|
||||
const proc = spawn('python', ['-c', 'import ccw_litellm; print(ccw_litellm.__version__ if hasattr(ccw_litellm, "__version__") else "installed")'], {
|
||||
shell: true,
|
||||
timeout: 10000
|
||||
});
|
||||
const { execSync } = await import('child_process');
|
||||
|
||||
let output = '';
|
||||
proc.stdout?.on('data', (data) => { output += data.toString(); });
|
||||
proc.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
resolve({ installed: true, version: output.trim() || 'unknown' });
|
||||
} else {
|
||||
resolve({ installed: false });
|
||||
// Try multiple Python executables
|
||||
const pythonExecutables = ['python', 'python3', 'py'];
|
||||
// Use single quotes inside Python code for Windows compatibility
|
||||
const pythonCode = "import ccw_litellm; print(getattr(ccw_litellm, '__version__', 'installed'))";
|
||||
|
||||
let installed = false;
|
||||
let version = '';
|
||||
let lastError = '';
|
||||
|
||||
for (const pythonExe of pythonExecutables) {
|
||||
try {
|
||||
const output = execSync(`${pythonExe} -c "${pythonCode}"`, {
|
||||
encoding: 'utf-8',
|
||||
timeout: 10000,
|
||||
windowsHide: true
|
||||
});
|
||||
version = output.trim();
|
||||
if (version) {
|
||||
installed = true;
|
||||
console.log(`[ccw-litellm status] Found with ${pythonExe}: ${version}`);
|
||||
break;
|
||||
}
|
||||
});
|
||||
proc.on('error', () => resolve({ installed: false }));
|
||||
});
|
||||
} catch (err) {
|
||||
lastError = (err as Error).message;
|
||||
console.log(`[ccw-litellm status] ${pythonExe} failed:`, lastError.substring(0, 100));
|
||||
}
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(result));
|
||||
res.end(JSON.stringify(installed ? { installed: true, version } : { installed: false, error: lastError }));
|
||||
} catch (err) {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ installed: false, error: (err as Error).message }));
|
||||
|
||||
@@ -33,9 +33,13 @@ async function loadCliHistory(options = {}) {
|
||||
}
|
||||
|
||||
// Load native session content for a specific execution
|
||||
async function loadNativeSessionContent(executionId) {
|
||||
async function loadNativeSessionContent(executionId, sourceDir) {
|
||||
try {
|
||||
const url = `/api/cli/native-session?path=${encodeURIComponent(projectPath)}&id=${encodeURIComponent(executionId)}`;
|
||||
// If sourceDir provided, use it to build the correct path
|
||||
const basePath = sourceDir && sourceDir !== '.'
|
||||
? projectPath + '/' + sourceDir
|
||||
: projectPath;
|
||||
const url = `/api/cli/native-session?path=${encodeURIComponent(basePath)}&id=${encodeURIComponent(executionId)}`;
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) return null;
|
||||
return await response.json();
|
||||
@@ -133,9 +137,12 @@ function renderCliHistory() {
|
||||
</span>`
|
||||
: '';
|
||||
|
||||
// Escape sourceDir for use in onclick
|
||||
const sourceDirEscaped = exec.sourceDir ? exec.sourceDir.replace(/'/g, "\\'") : '';
|
||||
|
||||
return `
|
||||
<div class="cli-history-item ${hasNative ? 'has-native' : ''}">
|
||||
<div class="cli-history-item-content" onclick="showExecutionDetail('${exec.id}')">
|
||||
<div class="cli-history-item-content" onclick="showExecutionDetail('${exec.id}', '${sourceDirEscaped}')">
|
||||
<div class="cli-history-item-header">
|
||||
<span class="cli-tool-tag cli-tool-${exec.tool}">${exec.tool.toUpperCase()}</span>
|
||||
<span class="cli-mode-tag">${exec.mode || 'analysis'}</span>
|
||||
@@ -154,14 +161,14 @@ function renderCliHistory() {
|
||||
</div>
|
||||
<div class="cli-history-actions">
|
||||
${hasNative ? `
|
||||
<button class="btn-icon" onclick="event.stopPropagation(); showNativeSessionDetail('${exec.id}')" title="View Native Session">
|
||||
<button class="btn-icon" onclick="event.stopPropagation(); showNativeSessionDetail('${exec.id}', '${sourceDirEscaped}')" title="View Native Session">
|
||||
<i data-lucide="file-json" class="w-3.5 h-3.5"></i>
|
||||
</button>
|
||||
` : ''}
|
||||
<button class="btn-icon" onclick="event.stopPropagation(); showExecutionDetail('${exec.id}')" title="View Details">
|
||||
<button class="btn-icon" onclick="event.stopPropagation(); showExecutionDetail('${exec.id}', '${sourceDirEscaped}')" title="View Details">
|
||||
<i data-lucide="eye" class="w-3.5 h-3.5"></i>
|
||||
</button>
|
||||
<button class="btn-icon btn-danger" onclick="event.stopPropagation(); confirmDeleteExecution('${exec.id}')" title="Delete">
|
||||
<button class="btn-icon btn-danger" onclick="event.stopPropagation(); confirmDeleteExecution('${exec.id}', '${sourceDirEscaped}')" title="Delete">
|
||||
<i data-lucide="trash-2" class="w-3.5 h-3.5"></i>
|
||||
</button>
|
||||
</div>
|
||||
@@ -650,9 +657,9 @@ async function copyConcatenatedPrompt(executionId) {
|
||||
/**
|
||||
* Show native session detail modal with full conversation content
|
||||
*/
|
||||
async function showNativeSessionDetail(executionId) {
|
||||
async function showNativeSessionDetail(executionId, sourceDir) {
|
||||
// Load native session content
|
||||
const nativeSession = await loadNativeSessionContent(executionId);
|
||||
const nativeSession = await loadNativeSessionContent(executionId, sourceDir);
|
||||
|
||||
if (!nativeSession) {
|
||||
showRefreshToast('Native session not found', 'error');
|
||||
|
||||
@@ -228,6 +228,11 @@ const i18n = {
|
||||
'cli.codexLensDescFull': 'Full-text code search engine',
|
||||
'cli.semanticDesc': 'AI-powered code understanding',
|
||||
'cli.semanticDescFull': 'Natural language code search',
|
||||
'cli.apiEndpoints': 'API Endpoints',
|
||||
'cli.configured': 'configured',
|
||||
'cli.addToCli': 'Add to CLI',
|
||||
'cli.enabled': 'Enabled',
|
||||
'cli.disabled': 'Disabled',
|
||||
|
||||
// CodexLens Configuration
|
||||
'codexlens.config': 'CodexLens Configuration',
|
||||
@@ -378,6 +383,8 @@ const i18n = {
|
||||
'codexlens.indexComplete': 'Index complete',
|
||||
'codexlens.indexSuccess': 'Index created successfully',
|
||||
'codexlens.indexFailed': 'Indexing failed',
|
||||
'codexlens.embeddingsFailed': 'Embeddings generation failed',
|
||||
'codexlens.ftsSuccessEmbeddingsFailed': 'FTS index created, but embeddings failed',
|
||||
|
||||
// CodexLens Install
|
||||
'codexlens.installDesc': 'Python-based code indexing engine',
|
||||
@@ -1880,6 +1887,11 @@ const i18n = {
|
||||
'cli.codexLensDescFull': '全文代码搜索引擎',
|
||||
'cli.semanticDesc': 'AI 驱动的代码理解',
|
||||
'cli.semanticDescFull': '自然语言代码搜索',
|
||||
'cli.apiEndpoints': 'API 端点',
|
||||
'cli.configured': '已配置',
|
||||
'cli.addToCli': '添加到 CLI',
|
||||
'cli.enabled': '已启用',
|
||||
'cli.disabled': '已禁用',
|
||||
|
||||
// CodexLens 配置
|
||||
'codexlens.config': 'CodexLens 配置',
|
||||
@@ -2031,6 +2043,8 @@ const i18n = {
|
||||
'codexlens.indexComplete': '索引完成',
|
||||
'codexlens.indexSuccess': '索引创建成功',
|
||||
'codexlens.indexFailed': '索引失败',
|
||||
'codexlens.embeddingsFailed': '嵌入生成失败',
|
||||
'codexlens.ftsSuccessEmbeddingsFailed': 'FTS 索引已创建,但嵌入生成失败',
|
||||
|
||||
// CodexLens 安装
|
||||
'codexlens.installDesc': '基于 Python 的代码索引引擎',
|
||||
|
||||
@@ -2739,8 +2739,11 @@ function toggleKeyVisibility(btn) {
|
||||
*/
|
||||
async function checkCcwLitellmStatus() {
|
||||
try {
|
||||
console.log('[API Settings] Checking ccw-litellm status...');
|
||||
var response = await fetch('/api/litellm-api/ccw-litellm/status');
|
||||
console.log('[API Settings] Status response:', response.status);
|
||||
var status = await response.json();
|
||||
console.log('[API Settings] ccw-litellm status:', status);
|
||||
window.ccwLitellmStatus = status;
|
||||
return status;
|
||||
} catch (e) {
|
||||
|
||||
@@ -59,6 +59,91 @@ async function loadCcwEndpointTools() {
|
||||
}
|
||||
}
|
||||
|
||||
// ========== LiteLLM API Endpoints ==========
|
||||
var litellmApiEndpoints = [];
|
||||
var cliCustomEndpoints = [];
|
||||
|
||||
async function loadLitellmApiEndpoints() {
|
||||
try {
|
||||
var response = await fetch('/api/litellm-api/config');
|
||||
if (!response.ok) throw new Error('Failed to load LiteLLM endpoints');
|
||||
var data = await response.json();
|
||||
litellmApiEndpoints = data.endpoints || [];
|
||||
window.litellmApiConfig = data;
|
||||
return litellmApiEndpoints;
|
||||
} catch (err) {
|
||||
console.error('Failed to load LiteLLM endpoints:', err);
|
||||
litellmApiEndpoints = [];
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function loadCliCustomEndpoints() {
|
||||
try {
|
||||
var response = await fetch('/api/cli/endpoints');
|
||||
if (!response.ok) throw new Error('Failed to load CLI custom endpoints');
|
||||
var data = await response.json();
|
||||
cliCustomEndpoints = data.endpoints || [];
|
||||
return cliCustomEndpoints;
|
||||
} catch (err) {
|
||||
console.error('Failed to load CLI custom endpoints:', err);
|
||||
cliCustomEndpoints = [];
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function toggleEndpointEnabled(endpointId, enabled) {
|
||||
try {
|
||||
var response = await fetch('/api/cli/endpoints/' + endpointId, {
|
||||
method: 'PUT',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ enabled: enabled })
|
||||
});
|
||||
if (!response.ok) throw new Error('Failed to update endpoint');
|
||||
var data = await response.json();
|
||||
if (data.success) {
|
||||
// Update local state
|
||||
var idx = cliCustomEndpoints.findIndex(function(e) { return e.id === endpointId; });
|
||||
if (idx >= 0) {
|
||||
cliCustomEndpoints[idx].enabled = enabled;
|
||||
}
|
||||
showRefreshToast((enabled ? 'Enabled' : 'Disabled') + ' endpoint: ' + endpointId, 'success');
|
||||
}
|
||||
return data;
|
||||
} catch (err) {
|
||||
showRefreshToast('Failed to update endpoint: ' + err.message, 'error');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async function syncEndpointToCliTools(endpoint) {
|
||||
try {
|
||||
var response = await fetch('/api/cli/endpoints', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
id: endpoint.id,
|
||||
name: endpoint.name,
|
||||
enabled: true
|
||||
})
|
||||
});
|
||||
if (!response.ok) throw new Error('Failed to sync endpoint');
|
||||
var data = await response.json();
|
||||
if (data.success) {
|
||||
cliCustomEndpoints = data.endpoints;
|
||||
showRefreshToast('Endpoint synced to CLI tools: ' + endpoint.id, 'success');
|
||||
renderToolsSection();
|
||||
}
|
||||
return data;
|
||||
} catch (err) {
|
||||
showRefreshToast('Failed to sync endpoint: ' + err.message, 'error');
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
window.toggleEndpointEnabled = toggleEndpointEnabled;
|
||||
window.syncEndpointToCliTools = syncEndpointToCliTools;
|
||||
|
||||
// ========== CLI Tool Configuration ==========
|
||||
async function loadCliToolConfig() {
|
||||
try {
|
||||
@@ -322,7 +407,9 @@ async function renderCliManager() {
|
||||
loadCliToolStatus(),
|
||||
loadCodexLensStatus(),
|
||||
loadCcwInstallations(),
|
||||
loadCcwEndpointTools()
|
||||
loadCcwEndpointTools(),
|
||||
loadLitellmApiEndpoints(),
|
||||
loadCliCustomEndpoints()
|
||||
]);
|
||||
|
||||
container.innerHTML = '<div class="status-manager">' +
|
||||
@@ -487,6 +574,51 @@ function renderToolsSection() {
|
||||
'</div>';
|
||||
}
|
||||
|
||||
// API Endpoints section
|
||||
var apiEndpointsHtml = '';
|
||||
if (litellmApiEndpoints.length > 0) {
|
||||
var endpointItems = litellmApiEndpoints.map(function(endpoint) {
|
||||
// Check if endpoint is synced to CLI tools
|
||||
var cliEndpoint = cliCustomEndpoints.find(function(e) { return e.id === endpoint.id; });
|
||||
var isSynced = !!cliEndpoint;
|
||||
var isEnabled = cliEndpoint ? cliEndpoint.enabled : false;
|
||||
|
||||
// Find provider info
|
||||
var provider = (window.litellmApiConfig?.providers || []).find(function(p) { return p.id === endpoint.providerId; });
|
||||
var providerName = provider ? provider.name : endpoint.providerId;
|
||||
|
||||
return '<div class="tool-item ' + (isSynced && isEnabled ? 'available' : 'unavailable') + '">' +
|
||||
'<div class="tool-item-left">' +
|
||||
'<span class="tool-status-dot ' + (isSynced && isEnabled ? 'status-available' : 'status-unavailable') + '"></span>' +
|
||||
'<div class="tool-item-info">' +
|
||||
'<div class="tool-item-name">' + endpoint.id + ' <span class="tool-type-badge">API</span></div>' +
|
||||
'<div class="tool-item-desc">' + endpoint.model + ' (' + providerName + ')</div>' +
|
||||
'</div>' +
|
||||
'</div>' +
|
||||
'<div class="tool-item-right">' +
|
||||
(isSynced
|
||||
? '<label class="toggle-switch" onclick="event.stopPropagation()">' +
|
||||
'<input type="checkbox" ' + (isEnabled ? 'checked' : '') + ' onchange="toggleEndpointEnabled(\'' + endpoint.id + '\', this.checked); renderToolsSection();">' +
|
||||
'<span class="toggle-slider"></span>' +
|
||||
'</label>'
|
||||
: '<button class="btn-sm btn-primary" onclick="event.stopPropagation(); syncEndpointToCliTools({id: \'' + endpoint.id + '\', name: \'' + endpoint.name + '\'})">' +
|
||||
'<i data-lucide="plus" class="w-3 h-3"></i> ' + (t('cli.addToCli') || 'Add to CLI') +
|
||||
'</button>') +
|
||||
'</div>' +
|
||||
'</div>';
|
||||
}).join('');
|
||||
|
||||
apiEndpointsHtml = '<div class="tools-subsection" style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid var(--border);">' +
|
||||
'<div class="section-header-left" style="margin-bottom: 0.5rem;">' +
|
||||
'<h4 style="font-size: 0.875rem; font-weight: 600; display: flex; align-items: center; gap: 0.5rem;">' +
|
||||
'<i data-lucide="cloud" class="w-4 h-4"></i> ' + (t('cli.apiEndpoints') || 'API Endpoints') +
|
||||
'</h4>' +
|
||||
'<span class="section-count">' + litellmApiEndpoints.length + ' ' + (t('cli.configured') || 'configured') + '</span>' +
|
||||
'</div>' +
|
||||
'<div class="tools-list">' + endpointItems + '</div>' +
|
||||
'</div>';
|
||||
}
|
||||
|
||||
container.innerHTML = '<div class="section-header">' +
|
||||
'<div class="section-header-left">' +
|
||||
'<h3><i data-lucide="terminal" class="w-4 h-4"></i> ' + t('cli.tools') + '</h3>' +
|
||||
@@ -500,7 +632,8 @@ function renderToolsSection() {
|
||||
toolsHtml +
|
||||
codexLensHtml +
|
||||
semanticHtml +
|
||||
'</div>';
|
||||
'</div>' +
|
||||
apiEndpointsHtml;
|
||||
|
||||
if (window.lucide) lucide.createIcons();
|
||||
}
|
||||
|
||||
@@ -383,7 +383,7 @@ async function loadSemanticDepsStatus() {
|
||||
acceleratorIcon = 'zap';
|
||||
acceleratorClass = 'bg-green-500/20 text-green-600';
|
||||
} else if (accelerator === 'DirectML') {
|
||||
acceleratorIcon = 'gpu-card';
|
||||
acceleratorIcon = 'cpu';
|
||||
acceleratorClass = 'bg-blue-500/20 text-blue-600';
|
||||
} else if (accelerator === 'ROCm') {
|
||||
acceleratorIcon = 'flame';
|
||||
@@ -450,7 +450,7 @@ function buildGpuModeSelector(gpuInfo) {
|
||||
id: 'directml',
|
||||
label: 'DirectML',
|
||||
desc: t('codexlens.directmlModeDesc') || 'Windows GPU (NVIDIA/AMD/Intel)',
|
||||
icon: 'gpu-card',
|
||||
icon: 'cpu',
|
||||
available: gpuInfo.available.includes('directml'),
|
||||
recommended: gpuInfo.mode === 'directml'
|
||||
},
|
||||
@@ -1331,7 +1331,15 @@ async function startCodexLensIndexing(indexType, embeddingModel, embeddingBacken
|
||||
|
||||
// Check if completed successfully (WebSocket might have already reported)
|
||||
if (result.success) {
|
||||
handleIndexComplete(true, t('codexlens.indexComplete'));
|
||||
// For vector index, check if embeddings were actually generated
|
||||
var embeddingsResult = result.result && result.result.embeddings;
|
||||
if (indexType === 'vector' && embeddingsResult && !embeddingsResult.generated) {
|
||||
// FTS succeeded but embeddings failed - show partial success
|
||||
var errorMsg = embeddingsResult.error || t('codexlens.embeddingsFailed');
|
||||
handleIndexComplete(false, t('codexlens.ftsSuccessEmbeddingsFailed') || 'FTS index created, but embeddings failed: ' + errorMsg);
|
||||
} else {
|
||||
handleIndexComplete(true, t('codexlens.indexComplete'));
|
||||
}
|
||||
} else if (!result.success) {
|
||||
handleIndexComplete(false, result.error || t('common.unknownError'));
|
||||
}
|
||||
|
||||
@@ -275,11 +275,22 @@ interface SearchResult {
|
||||
message?: string;
|
||||
}
|
||||
|
||||
interface ModelInfo {
|
||||
model_profile?: string;
|
||||
model_name?: string;
|
||||
embedding_dim?: number;
|
||||
backend?: string;
|
||||
created_at?: string;
|
||||
updated_at?: string;
|
||||
}
|
||||
|
||||
interface IndexStatus {
|
||||
indexed: boolean;
|
||||
has_embeddings: boolean;
|
||||
file_count?: number;
|
||||
embeddings_coverage_percent?: number;
|
||||
total_chunks?: number;
|
||||
model_info?: ModelInfo;
|
||||
warning?: string;
|
||||
}
|
||||
|
||||
@@ -320,6 +331,18 @@ async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
|
||||
const embeddingsData = status.embeddings || {};
|
||||
const embeddingsCoverage = embeddingsData.coverage_percent || 0;
|
||||
const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50%
|
||||
const totalChunks = embeddingsData.total_chunks || 0;
|
||||
|
||||
// Extract model info if available
|
||||
const modelInfoData = embeddingsData.model_info;
|
||||
const modelInfo: ModelInfo | undefined = modelInfoData ? {
|
||||
model_profile: modelInfoData.model_profile,
|
||||
model_name: modelInfoData.model_name,
|
||||
embedding_dim: modelInfoData.embedding_dim,
|
||||
backend: modelInfoData.backend,
|
||||
created_at: modelInfoData.created_at,
|
||||
updated_at: modelInfoData.updated_at,
|
||||
} : undefined;
|
||||
|
||||
let warning: string | undefined;
|
||||
if (!indexed) {
|
||||
@@ -335,6 +358,8 @@ async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
|
||||
has_embeddings,
|
||||
file_count: status.total_files,
|
||||
embeddings_coverage_percent: embeddingsCoverage,
|
||||
total_chunks: totalChunks,
|
||||
model_info: modelInfo,
|
||||
warning,
|
||||
};
|
||||
} catch {
|
||||
|
||||
@@ -1831,6 +1831,14 @@ def embeddings_generate(
|
||||
"-r",
|
||||
help="Recursively process all _index.db files in directory tree.",
|
||||
),
|
||||
max_workers: int = typer.Option(
|
||||
1,
|
||||
"--max-workers",
|
||||
"-w",
|
||||
min=1,
|
||||
max=16,
|
||||
help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
|
||||
),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."),
|
||||
) -> None:
|
||||
@@ -1927,7 +1935,10 @@ def embeddings_generate(
|
||||
else:
|
||||
console.print(f"Index: [dim]{index_path}[/dim]")
|
||||
console.print(f"Backend: [cyan]{backend}[/cyan]")
|
||||
console.print(f"Model: [cyan]{model}[/cyan]\n")
|
||||
console.print(f"Model: [cyan]{model}[/cyan]")
|
||||
if max_workers > 1:
|
||||
console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
|
||||
console.print()
|
||||
|
||||
if use_recursive:
|
||||
result = generate_embeddings_recursive(
|
||||
@@ -1937,6 +1948,7 @@ def embeddings_generate(
|
||||
force=force,
|
||||
chunk_size=chunk_size,
|
||||
progress_callback=progress_update,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
else:
|
||||
result = generate_embeddings(
|
||||
@@ -1946,6 +1958,7 @@ def embeddings_generate(
|
||||
force=force,
|
||||
chunk_size=chunk_size,
|
||||
progress_callback=progress_update,
|
||||
max_workers=max_workers,
|
||||
)
|
||||
|
||||
if json_mode:
|
||||
|
||||
@@ -7,7 +7,6 @@ import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from itertools import islice
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
from typing import Dict, Generator, List, Optional, Tuple
|
||||
|
||||
try:
|
||||
@@ -441,82 +440,133 @@ def generate_embeddings(
|
||||
batch_number = 0
|
||||
files_seen = set()
|
||||
|
||||
# Thread-safe counters for concurrent processing
|
||||
counter_lock = Lock()
|
||||
|
||||
def process_batch(batch_data: Tuple[int, List[Tuple]]) -> Tuple[int, set, Optional[str]]:
|
||||
"""Process a single batch: generate embeddings and store.
|
||||
def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]):
|
||||
"""Compute embeddings for a batch (no DB write).
|
||||
|
||||
Args:
|
||||
batch_data: Tuple of (batch_number, chunk_batch)
|
||||
|
||||
Returns:
|
||||
Tuple of (chunks_created, files_in_batch, error_message)
|
||||
Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error)
|
||||
"""
|
||||
batch_num, chunk_batch = batch_data
|
||||
batch_files = set()
|
||||
|
||||
try:
|
||||
# Track files in this batch
|
||||
for _, file_path in chunk_batch:
|
||||
batch_files.add(file_path)
|
||||
|
||||
# Generate embeddings
|
||||
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
||||
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
||||
|
||||
# Store embeddings (thread-safe via SQLite's serialized mode)
|
||||
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
||||
|
||||
return len(chunk_batch), batch_files, None
|
||||
return batch_num, chunk_batch, embeddings_numpy, batch_files, None
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Batch {batch_num}: {str(e)}"
|
||||
logger.error(f"Failed to process embedding batch {batch_num}: {str(e)}")
|
||||
return 0, batch_files, error_msg
|
||||
logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}")
|
||||
return batch_num, chunk_batch, None, batch_files, error_msg
|
||||
|
||||
# Collect batches for concurrent processing
|
||||
all_batches = []
|
||||
for chunk_batch in batch_generator:
|
||||
batch_number += 1
|
||||
all_batches.append((batch_number, chunk_batch))
|
||||
|
||||
# Process batches (sequential or concurrent based on max_workers)
|
||||
# Process batches based on max_workers setting
|
||||
if max_workers <= 1:
|
||||
# Sequential processing (original behavior)
|
||||
for batch_num, chunk_batch in all_batches:
|
||||
chunks_created, batch_files, error = process_batch((batch_num, chunk_batch))
|
||||
files_seen.update(batch_files)
|
||||
total_chunks_created += chunks_created
|
||||
total_files_processed = len(files_seen)
|
||||
# Sequential processing - stream directly from generator (no pre-materialization)
|
||||
for chunk_batch in batch_generator:
|
||||
batch_number += 1
|
||||
|
||||
if progress_callback and batch_num % 10 == 0:
|
||||
progress_callback(f" Batch {batch_num}: {total_chunks_created} chunks, {total_files_processed} files")
|
||||
# Track files in this batch
|
||||
batch_files = set()
|
||||
for _, file_path in chunk_batch:
|
||||
batch_files.add(file_path)
|
||||
|
||||
try:
|
||||
# Generate embeddings
|
||||
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
||||
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
||||
|
||||
# Store embeddings
|
||||
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
||||
|
||||
files_seen.update(batch_files)
|
||||
total_chunks_created += len(chunk_batch)
|
||||
total_files_processed = len(files_seen)
|
||||
|
||||
if progress_callback and batch_number % 10 == 0:
|
||||
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
|
||||
files_seen.update(batch_files)
|
||||
else:
|
||||
# Concurrent processing for API backends
|
||||
# Concurrent processing with producer-consumer pattern
|
||||
# Workers compute embeddings (parallel), main thread writes to DB (serial)
|
||||
from queue import Queue
|
||||
from threading import Thread
|
||||
|
||||
result_queue = Queue(maxsize=max_workers * 2) # Bounded queue to limit memory
|
||||
batch_counter = [0] # Mutable counter for producer thread
|
||||
producer_done = [False]
|
||||
|
||||
def producer():
|
||||
"""Submit batches to executor, put results in queue."""
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
pending_futures = []
|
||||
|
||||
for chunk_batch in batch_generator:
|
||||
batch_counter[0] += 1
|
||||
batch_num = batch_counter[0]
|
||||
|
||||
# Submit compute task
|
||||
future = executor.submit(compute_embeddings_only, (batch_num, chunk_batch))
|
||||
pending_futures.append(future)
|
||||
|
||||
# Check for completed futures and add to queue
|
||||
for f in list(pending_futures):
|
||||
if f.done():
|
||||
try:
|
||||
result_queue.put(f.result())
|
||||
except Exception as e:
|
||||
logger.error(f"Future raised exception: {e}")
|
||||
pending_futures.remove(f)
|
||||
|
||||
# Wait for remaining futures
|
||||
for future in as_completed(pending_futures):
|
||||
try:
|
||||
result_queue.put(future.result())
|
||||
except Exception as e:
|
||||
logger.error(f"Future raised exception: {e}")
|
||||
|
||||
producer_done[0] = True
|
||||
result_queue.put(None) # Sentinel to signal completion
|
||||
|
||||
# Start producer thread
|
||||
producer_thread = Thread(target=producer, daemon=True)
|
||||
producer_thread.start()
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"Processing {len(all_batches)} batches with {max_workers} concurrent workers...")
|
||||
progress_callback(f"Processing with {max_workers} concurrent embedding workers...")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = {executor.submit(process_batch, batch): batch[0] for batch in all_batches}
|
||||
# Consumer: main thread writes to DB (serial, no contention)
|
||||
completed = 0
|
||||
while True:
|
||||
result = result_queue.get()
|
||||
if result is None: # Sentinel
|
||||
break
|
||||
|
||||
completed = 0
|
||||
for future in as_completed(futures):
|
||||
batch_num = futures[future]
|
||||
try:
|
||||
chunks_created, batch_files, error = future.result()
|
||||
batch_num, chunk_batch, embeddings_numpy, batch_files, error = result
|
||||
|
||||
with counter_lock:
|
||||
files_seen.update(batch_files)
|
||||
total_chunks_created += chunks_created
|
||||
total_files_processed = len(files_seen)
|
||||
completed += 1
|
||||
if embeddings_numpy is not None and error is None:
|
||||
# Write to DB in main thread (no contention)
|
||||
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
||||
total_chunks_created += len(chunk_batch)
|
||||
|
||||
if progress_callback and completed % 10 == 0:
|
||||
progress_callback(f" Completed {completed}/{len(all_batches)} batches: {total_chunks_created} chunks")
|
||||
files_seen.update(batch_files)
|
||||
total_files_processed = len(files_seen)
|
||||
completed += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch {batch_num} raised exception: {str(e)}")
|
||||
if progress_callback and completed % 10 == 0:
|
||||
progress_callback(f" Completed {completed} batches: {total_chunks_created} chunks")
|
||||
|
||||
producer_thread.join()
|
||||
batch_number = batch_counter[0]
|
||||
|
||||
# Notify before ANN index finalization (happens when bulk_insert context exits)
|
||||
if progress_callback:
|
||||
@@ -718,7 +768,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
||||
index_root: Root index directory
|
||||
|
||||
Returns:
|
||||
Aggregated status with coverage statistics
|
||||
Aggregated status with coverage statistics, model info, and timestamps
|
||||
"""
|
||||
index_files = discover_all_index_dbs(index_root)
|
||||
|
||||
@@ -734,6 +784,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
||||
"coverage_percent": 0.0,
|
||||
"indexes_with_embeddings": 0,
|
||||
"indexes_without_embeddings": 0,
|
||||
"model_info": None,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -741,6 +792,8 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
||||
files_with_embeddings = 0
|
||||
total_chunks = 0
|
||||
indexes_with_embeddings = 0
|
||||
model_info = None
|
||||
latest_updated_at = None
|
||||
|
||||
for index_path in index_files:
|
||||
status = check_index_embeddings(index_path)
|
||||
@@ -752,6 +805,40 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
||||
if result["has_embeddings"]:
|
||||
indexes_with_embeddings += 1
|
||||
|
||||
# Get model config from first index with embeddings (they should all match)
|
||||
if model_info is None:
|
||||
try:
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
with VectorStore(index_path) as vs:
|
||||
config = vs.get_model_config()
|
||||
if config:
|
||||
model_info = {
|
||||
"model_profile": config.get("model_profile"),
|
||||
"model_name": config.get("model_name"),
|
||||
"embedding_dim": config.get("embedding_dim"),
|
||||
"backend": config.get("backend"),
|
||||
"created_at": config.get("created_at"),
|
||||
"updated_at": config.get("updated_at"),
|
||||
}
|
||||
latest_updated_at = config.get("updated_at")
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
# Track the latest updated_at across all indexes
|
||||
try:
|
||||
from codexlens.semantic.vector_store import VectorStore
|
||||
with VectorStore(index_path) as vs:
|
||||
config = vs.get_model_config()
|
||||
if config and config.get("updated_at"):
|
||||
if latest_updated_at is None or config["updated_at"] > latest_updated_at:
|
||||
latest_updated_at = config["updated_at"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Update model_info with latest timestamp
|
||||
if model_info and latest_updated_at:
|
||||
model_info["updated_at"] = latest_updated_at
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"result": {
|
||||
@@ -763,6 +850,7 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
|
||||
"coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
|
||||
"indexes_with_embeddings": indexes_with_embeddings,
|
||||
"indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
|
||||
"model_info": model_info,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user