mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-11 02:33:51 +08:00
Implement SPLADE sparse encoder and associated database migrations
- Added `splade_encoder.py` for ONNX-optimized SPLADE encoding, including methods for encoding text and batch processing. - Created `SPLADE_IMPLEMENTATION.md` to document the SPLADE encoder's functionality, design patterns, and integration points. - Introduced migration script `migration_009_add_splade.py` to add SPLADE metadata and posting list tables to the database. - Developed `splade_index.py` for managing the SPLADE inverted index, supporting efficient sparse vector retrieval. - Added verification script `verify_watcher.py` to test FileWatcher event filtering and debouncing functionality.
This commit is contained in:
@@ -17,6 +17,16 @@ import {
|
||||
isIndexingInProgress
|
||||
} from '../../tools/codex-lens.js';
|
||||
import type { ProgressInfo, GpuMode } from '../../tools/codex-lens.js';
|
||||
import { loadLiteLLMApiConfig } from '../../config/litellm-api-config-manager.js';
|
||||
|
||||
// File watcher state (persisted across requests)
|
||||
let watcherProcess: any = null;
|
||||
let watcherStats = {
|
||||
running: false,
|
||||
root_path: '',
|
||||
events_processed: 0,
|
||||
start_time: null as Date | null
|
||||
};
|
||||
|
||||
export interface RouteContext {
|
||||
pathname: string;
|
||||
@@ -1052,5 +1062,478 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise<boolean>
|
||||
return true;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// RERANKER CONFIGURATION ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
// API: Get Reranker Configuration
|
||||
if (pathname === '/api/codexlens/reranker/config' && req.method === 'GET') {
|
||||
try {
|
||||
const venvStatus = await checkVenvStatus();
|
||||
|
||||
// Default reranker config
|
||||
const rerankerConfig = {
|
||||
backend: 'onnx',
|
||||
model_name: 'cross-encoder/ms-marco-MiniLM-L-6-v2',
|
||||
api_provider: 'siliconflow',
|
||||
api_key_set: false,
|
||||
available_backends: ['onnx', 'api', 'litellm', 'legacy'],
|
||||
api_providers: ['siliconflow', 'cohere', 'jina'],
|
||||
litellm_endpoints: [] as string[],
|
||||
config_source: 'default'
|
||||
};
|
||||
|
||||
// Load LiteLLM endpoints for dropdown
|
||||
try {
|
||||
const litellmConfig = loadLiteLLMApiConfig(initialPath);
|
||||
if (litellmConfig.endpoints && Array.isArray(litellmConfig.endpoints)) {
|
||||
rerankerConfig.litellm_endpoints = litellmConfig.endpoints.map(
|
||||
(ep: any) => ep.alias || ep.name || ep.baseUrl
|
||||
).filter(Boolean);
|
||||
}
|
||||
} catch (e) {
|
||||
// LiteLLM config not available, continue with empty endpoints
|
||||
}
|
||||
|
||||
// If CodexLens is installed, try to get actual config
|
||||
if (venvStatus.ready) {
|
||||
try {
|
||||
const result = await executeCodexLens(['config', '--json']);
|
||||
if (result.success) {
|
||||
const config = extractJSON(result.output);
|
||||
if (config.success && config.result) {
|
||||
// Map config values
|
||||
if (config.result.reranker_backend) {
|
||||
rerankerConfig.backend = config.result.reranker_backend;
|
||||
rerankerConfig.config_source = 'codexlens';
|
||||
}
|
||||
if (config.result.reranker_model) {
|
||||
rerankerConfig.model_name = config.result.reranker_model;
|
||||
}
|
||||
if (config.result.reranker_api_provider) {
|
||||
rerankerConfig.api_provider = config.result.reranker_api_provider;
|
||||
}
|
||||
// Check if API key is set (from env)
|
||||
if (process.env.RERANKER_API_KEY) {
|
||||
rerankerConfig.api_key_set = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[CodexLens] Failed to get reranker config:', e);
|
||||
}
|
||||
}
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ success: true, ...rerankerConfig }));
|
||||
} catch (err) {
|
||||
res.writeHead(500, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ success: false, error: err.message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: Set Reranker Configuration
|
||||
if (pathname === '/api/codexlens/reranker/config' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async (body) => {
|
||||
const { backend, model_name, api_provider, api_key, litellm_endpoint } = body;
|
||||
|
||||
// Validate backend
|
||||
const validBackends = ['onnx', 'api', 'litellm', 'legacy'];
|
||||
if (backend && !validBackends.includes(backend)) {
|
||||
return { success: false, error: `Invalid backend: ${backend}. Valid options: ${validBackends.join(', ')}`, status: 400 };
|
||||
}
|
||||
|
||||
// Validate api_provider
|
||||
const validProviders = ['siliconflow', 'cohere', 'jina'];
|
||||
if (api_provider && !validProviders.includes(api_provider)) {
|
||||
return { success: false, error: `Invalid api_provider: ${api_provider}. Valid options: ${validProviders.join(', ')}`, status: 400 };
|
||||
}
|
||||
|
||||
try {
|
||||
const updates: string[] = [];
|
||||
|
||||
// Set backend
|
||||
if (backend) {
|
||||
const result = await executeCodexLens(['config', 'set', 'reranker_backend', backend, '--json']);
|
||||
if (result.success) updates.push('backend');
|
||||
}
|
||||
|
||||
// Set model
|
||||
if (model_name) {
|
||||
const result = await executeCodexLens(['config', 'set', 'reranker_model', model_name, '--json']);
|
||||
if (result.success) updates.push('model_name');
|
||||
}
|
||||
|
||||
// Set API provider
|
||||
if (api_provider) {
|
||||
const result = await executeCodexLens(['config', 'set', 'reranker_api_provider', api_provider, '--json']);
|
||||
if (result.success) updates.push('api_provider');
|
||||
}
|
||||
|
||||
// Set LiteLLM endpoint
|
||||
if (litellm_endpoint) {
|
||||
const result = await executeCodexLens(['config', 'set', 'reranker_litellm_endpoint', litellm_endpoint, '--json']);
|
||||
if (result.success) updates.push('litellm_endpoint');
|
||||
}
|
||||
|
||||
// Handle API key - write to .env file or environment
|
||||
if (api_key) {
|
||||
// For security, we store in process.env for the current session
|
||||
// In production, this should be written to a secure .env file
|
||||
process.env.RERANKER_API_KEY = api_key;
|
||||
updates.push('api_key');
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `Updated: ${updates.join(', ')}`,
|
||||
updated_fields: updates
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: err.message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// FILE WATCHER CONTROL ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
// API: Get File Watcher Status
|
||||
if (pathname === '/api/codexlens/watch/status') {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
success: true,
|
||||
running: watcherStats.running,
|
||||
root_path: watcherStats.root_path,
|
||||
events_processed: watcherStats.events_processed,
|
||||
start_time: watcherStats.start_time?.toISOString() || null,
|
||||
uptime_seconds: watcherStats.start_time
|
||||
? Math.floor((Date.now() - watcherStats.start_time.getTime()) / 1000)
|
||||
: 0
|
||||
}));
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: Start File Watcher
|
||||
if (pathname === '/api/codexlens/watch/start' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async (body) => {
|
||||
const { path: watchPath, debounce_ms = 1000 } = body;
|
||||
const targetPath = watchPath || initialPath;
|
||||
|
||||
if (watcherStats.running) {
|
||||
return { success: false, error: 'Watcher already running', status: 400 };
|
||||
}
|
||||
|
||||
try {
|
||||
const { spawn } = await import('child_process');
|
||||
const { join } = await import('path');
|
||||
const { existsSync, statSync } = await import('fs');
|
||||
|
||||
// Validate path exists and is a directory
|
||||
if (!existsSync(targetPath)) {
|
||||
return { success: false, error: `Path does not exist: ${targetPath}`, status: 400 };
|
||||
}
|
||||
const pathStat = statSync(targetPath);
|
||||
if (!pathStat.isDirectory()) {
|
||||
return { success: false, error: `Path is not a directory: ${targetPath}`, status: 400 };
|
||||
}
|
||||
|
||||
// Get the codexlens CLI path
|
||||
const venvStatus = await checkVenvStatus();
|
||||
if (!venvStatus.ready) {
|
||||
return { success: false, error: 'CodexLens not installed', status: 400 };
|
||||
}
|
||||
|
||||
// Spawn watch process (no shell: true for security)
|
||||
// Use process.platform to determine if we need .cmd extension on Windows
|
||||
const isWindows = process.platform === 'win32';
|
||||
const codexlensCmd = isWindows ? 'codexlens.exe' : 'codexlens';
|
||||
const args = ['watch', targetPath, '--debounce', String(debounce_ms)];
|
||||
watcherProcess = spawn(codexlensCmd, args, {
|
||||
cwd: targetPath,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
env: { ...process.env }
|
||||
});
|
||||
|
||||
watcherStats = {
|
||||
running: true,
|
||||
root_path: targetPath,
|
||||
events_processed: 0,
|
||||
start_time: new Date()
|
||||
};
|
||||
|
||||
// Handle process output for event counting
|
||||
if (watcherProcess.stdout) {
|
||||
watcherProcess.stdout.on('data', (data: Buffer) => {
|
||||
const output = data.toString();
|
||||
// Count processed events from output
|
||||
const matches = output.match(/Processed \d+ events?/g);
|
||||
if (matches) {
|
||||
watcherStats.events_processed += matches.length;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Handle process exit
|
||||
watcherProcess.on('exit', (code: number) => {
|
||||
watcherStats.running = false;
|
||||
watcherProcess = null;
|
||||
console.log(`[CodexLens] Watcher exited with code ${code}`);
|
||||
});
|
||||
|
||||
// Broadcast watcher started
|
||||
broadcastToClients({
|
||||
type: 'CODEXLENS_WATCHER_STATUS',
|
||||
payload: { running: true, path: targetPath }
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: 'Watcher started',
|
||||
path: targetPath,
|
||||
pid: watcherProcess.pid
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: err.message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: Stop File Watcher
|
||||
if (pathname === '/api/codexlens/watch/stop' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async () => {
|
||||
if (!watcherStats.running || !watcherProcess) {
|
||||
return { success: false, error: 'Watcher not running', status: 400 };
|
||||
}
|
||||
|
||||
try {
|
||||
// Send SIGTERM to gracefully stop the watcher
|
||||
watcherProcess.kill('SIGTERM');
|
||||
|
||||
// Wait a moment for graceful shutdown
|
||||
await new Promise(resolve => setTimeout(resolve, 500));
|
||||
|
||||
// Force kill if still running
|
||||
if (watcherProcess && !watcherProcess.killed) {
|
||||
watcherProcess.kill('SIGKILL');
|
||||
}
|
||||
|
||||
const finalStats = {
|
||||
events_processed: watcherStats.events_processed,
|
||||
uptime_seconds: watcherStats.start_time
|
||||
? Math.floor((Date.now() - watcherStats.start_time.getTime()) / 1000)
|
||||
: 0
|
||||
};
|
||||
|
||||
watcherStats = {
|
||||
running: false,
|
||||
root_path: '',
|
||||
events_processed: 0,
|
||||
start_time: null
|
||||
};
|
||||
watcherProcess = null;
|
||||
|
||||
// Broadcast watcher stopped
|
||||
broadcastToClients({
|
||||
type: 'CODEXLENS_WATCHER_STATUS',
|
||||
payload: { running: false }
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: 'Watcher stopped',
|
||||
...finalStats
|
||||
};
|
||||
} catch (err) {
|
||||
return { success: false, error: err.message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// SPLADE ENDPOINTS
|
||||
// ============================================================
|
||||
|
||||
// API: SPLADE Status - Check if SPLADE is available and installed
|
||||
if (pathname === '/api/codexlens/splade/status') {
|
||||
try {
|
||||
// Check if CodexLens is installed first
|
||||
const venvStatus = await checkVenvStatus();
|
||||
if (!venvStatus.ready) {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
available: false,
|
||||
installed: false,
|
||||
model: 'naver/splade-cocondenser-ensembledistil',
|
||||
error: 'CodexLens not installed'
|
||||
}));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check SPLADE availability using Python check
|
||||
const result = await executeCodexLens(['python', '-c',
|
||||
'from codexlens.semantic.splade_encoder import check_splade_available; ok, err = check_splade_available(); print("OK" if ok else err)'
|
||||
]);
|
||||
|
||||
const available = result.output.includes('OK');
|
||||
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
available,
|
||||
installed: available,
|
||||
model: 'naver/splade-cocondenser-ensembledistil',
|
||||
error: available ? null : result.output.trim()
|
||||
}));
|
||||
} catch (err) {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
available: false,
|
||||
installed: false,
|
||||
model: 'naver/splade-cocondenser-ensembledistil',
|
||||
error: err.message
|
||||
}));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: SPLADE Install - Install SPLADE dependencies
|
||||
if (pathname === '/api/codexlens/splade/install' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async (body) => {
|
||||
try {
|
||||
const gpu = body?.gpu || false;
|
||||
const packageName = gpu ? 'codex-lens[splade-gpu]' : 'codex-lens[splade]';
|
||||
|
||||
// Use pip to install the SPLADE extras
|
||||
const { spawn } = await import('child_process');
|
||||
const { promisify } = await import('util');
|
||||
const execFilePromise = promisify(require('child_process').execFile);
|
||||
|
||||
const result = await execFilePromise('pip', ['install', packageName], {
|
||||
timeout: 600000 // 10 minutes
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
message: `SPLADE installed successfully (${gpu ? 'GPU' : 'CPU'} mode)`,
|
||||
output: result.stdout
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
error: err.message,
|
||||
stderr: err.stderr,
|
||||
status: 500
|
||||
};
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: SPLADE Index Status - Check if SPLADE index exists for a project
|
||||
if (pathname === '/api/codexlens/splade/index-status') {
|
||||
try {
|
||||
const projectPath = url.searchParams.get('path');
|
||||
if (!projectPath) {
|
||||
res.writeHead(400, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ success: false, error: 'Missing path parameter' }));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if CodexLens is installed first
|
||||
const venvStatus = await checkVenvStatus();
|
||||
if (!venvStatus.ready) {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ exists: false, error: 'CodexLens not installed' }));
|
||||
return true;
|
||||
}
|
||||
|
||||
const { join } = await import('path');
|
||||
const indexDb = join(projectPath, '.codexlens', '_index.db');
|
||||
|
||||
// Use Python to check SPLADE index status
|
||||
const pythonCode = `
|
||||
from codexlens.storage.splade_index import SpladeIndex
|
||||
from pathlib import Path
|
||||
try:
|
||||
idx = SpladeIndex(Path("${indexDb.replace(/\\/g, '\\\\')}"))
|
||||
if idx.has_index():
|
||||
stats = idx.get_stats()
|
||||
meta = idx.get_metadata()
|
||||
model = meta.get('model_name', '') if meta else ''
|
||||
print(f"OK|{stats['unique_chunks']}|{stats['total_postings']}|{model}")
|
||||
else:
|
||||
print("NO_INDEX")
|
||||
except Exception as e:
|
||||
print(f"ERROR|{str(e)}")
|
||||
`;
|
||||
|
||||
const result = await executeCodexLens(['python', '-c', pythonCode]);
|
||||
|
||||
if (result.output.startsWith('OK|')) {
|
||||
const parts = result.output.trim().split('|');
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({
|
||||
exists: true,
|
||||
chunks: parseInt(parts[1]),
|
||||
postings: parseInt(parts[2]),
|
||||
model: parts[3]
|
||||
}));
|
||||
} else if (result.output.startsWith('ERROR|')) {
|
||||
const errorMsg = result.output.substring(6).trim();
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ exists: false, error: errorMsg }));
|
||||
} else {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ exists: false }));
|
||||
}
|
||||
} catch (err) {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify({ exists: false, error: err.message }));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// API: SPLADE Index Rebuild - Rebuild SPLADE index for a project
|
||||
if (pathname === '/api/codexlens/splade/rebuild' && req.method === 'POST') {
|
||||
handlePostRequest(req, res, async (body) => {
|
||||
const { path: projectPath } = body;
|
||||
|
||||
if (!projectPath) {
|
||||
return { success: false, error: 'Missing path parameter', status: 400 };
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await executeCodexLens(['splade-index', projectPath, '--rebuild'], {
|
||||
cwd: projectPath,
|
||||
timeout: 1800000 // 30 minutes for large codebases
|
||||
});
|
||||
|
||||
if (result.success) {
|
||||
return {
|
||||
success: true,
|
||||
message: 'SPLADE index rebuilt successfully',
|
||||
output: result.output
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
success: false,
|
||||
error: result.error || 'Failed to rebuild SPLADE index',
|
||||
output: result.output,
|
||||
status: 500
|
||||
};
|
||||
}
|
||||
} catch (err) {
|
||||
return { success: false, error: err.message, status: 500 };
|
||||
}
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user