Add comprehensive tests for semantic chunking and search functionality

- Implemented tests for the ChunkConfig and Chunker classes, covering default and custom configurations.
- Added tests for symbol-based chunking, including single and multiple symbols, handling of empty symbols, and preservation of line numbers.
- Developed tests for sliding window chunking, ensuring correct chunking behavior with various content sizes and configurations.
- Created integration tests for semantic search, validating embedding generation, vector storage, and search accuracy across a complex codebase.
- Included performance tests for embedding generation and search operations.
- Established tests for chunking strategies, comparing symbol-based and sliding window approaches.
- Enhanced test coverage for edge cases, including handling of unicode characters and out-of-bounds symbol ranges.
This commit is contained in:
catlog22
2025-12-12 19:55:35 +08:00
parent c42f91a7fe
commit 4faa5f1c95
27 changed files with 4812 additions and 129 deletions

View File

@@ -148,7 +148,7 @@ except Exception as e:
}
/**
* Install semantic search dependencies
* Install semantic search dependencies (fastembed, ONNX-based, ~200MB)
* @returns {Promise<{success: boolean, error?: string}>}
*/
async function installSemantic() {
@@ -163,12 +163,12 @@ async function installSemantic() {
: join(CODEXLENS_VENV, 'bin', 'pip');
return new Promise((resolve) => {
console.log('[CodexLens] Installing semantic search dependencies...');
console.log('[CodexLens] Installing semantic search dependencies (fastembed)...');
console.log('[CodexLens] Using ONNX-based fastembed backend (~200MB)');
// Install sentence-transformers and numpy
const child = spawn(pipPath, ['install', 'numpy>=1.24', 'sentence-transformers>=2.2'], {
const child = spawn(pipPath, ['install', 'numpy>=1.24', 'fastembed>=0.2'], {
stdio: ['ignore', 'pipe', 'pipe'],
timeout: 300000 // 5 minutes for model download
timeout: 600000 // 10 minutes for potential model download
});
let stdout = '';
@@ -178,7 +178,7 @@ async function installSemantic() {
stdout += data.toString();
// Log progress
const line = data.toString().trim();
if (line.includes('Downloading') || line.includes('Installing')) {
if (line.includes('Downloading') || line.includes('Installing') || line.includes('Collecting')) {
console.log(`[CodexLens] ${line}`);
}
});