Refactor search modes and optimize embedding generation

- Updated the dashboard template to hide the Code Graph Explorer feature.
- Enhanced the `executeCodexLens` function to use `exec` for better cross-platform compatibility and improved command execution.
- Changed the default `maxResults` and `limit` parameters in the smart search tool to 10 for better performance.
- Introduced a new `priority` search mode in the smart search tool, replacing the previous `parallel` mode, which now follows a fallback strategy: hybrid -> exact -> ripgrep.
- Optimized the embedding generation process in the embedding manager by batching operations and using a cached embedder instance to reduce model loading overhead.
- Implemented a thread-safe singleton pattern for the embedder to improve performance across multiple searches.
This commit is contained in:
catlog22
2025-12-20 11:08:34 +08:00
parent 7adde91e9f
commit e1cac5dd50
16 changed files with 852 additions and 284 deletions

View File

@@ -9,7 +9,7 @@ from typing import Dict, List, Optional
try:
from codexlens.semantic import SEMANTIC_AVAILABLE
if SEMANTIC_AVAILABLE:
from codexlens.semantic.embedder import Embedder
from codexlens.semantic.embedder import Embedder, get_embedder
from codexlens.semantic.vector_store import VectorStore
from codexlens.semantic.chunker import Chunker, ChunkConfig
except ImportError:
@@ -167,7 +167,8 @@ def generate_embeddings(
# Initialize components
try:
embedder = Embedder(profile=model_profile)
# Use cached embedder (singleton) for performance
embedder = get_embedder(profile=model_profile)
vector_store = VectorStore(index_path)
chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size))
@@ -201,10 +202,16 @@ def generate_embeddings(
if progress_callback:
progress_callback(f"Processing {len(files)} files...")
# Process each file
total_chunks = 0
failed_files = []
# Process all files using batch operations for optimal performance
start_time = time.time()
failed_files = []
# --- OPTIMIZATION Step 1: Collect all chunks from all files ---
if progress_callback:
progress_callback(f"Step 1/4: Chunking {len(files)} files...")
all_chunks_with_paths = [] # List of (chunk, file_path) tuples
files_with_chunks = set()
for idx, file_row in enumerate(files, 1):
file_path = file_row["full_path"]
@@ -212,39 +219,88 @@ def generate_embeddings(
language = file_row["language"] or "python"
try:
# Create chunks
chunks = chunker.chunk_sliding_window(
content,
file_path=file_path,
language=language
)
if not chunks:
continue
# Generate embeddings
for chunk in chunks:
embedding = embedder.embed_single(chunk.content)
chunk.embedding = embedding
# Store chunks
vector_store.add_chunks(chunks, file_path)
total_chunks += len(chunks)
if progress_callback:
progress_callback(f"[{idx}/{len(files)}] {file_path}: {len(chunks)} chunks")
if chunks:
for chunk in chunks:
all_chunks_with_paths.append((chunk, file_path))
files_with_chunks.add(file_path)
except Exception as e:
logger.error(f"Failed to process {file_path}: {e}")
logger.error(f"Failed to chunk {file_path}: {e}")
failed_files.append((file_path, str(e)))
if not all_chunks_with_paths:
elapsed_time = time.time() - start_time
return {
"success": True,
"result": {
"chunks_created": 0,
"files_processed": len(files) - len(failed_files),
"files_failed": len(failed_files),
"elapsed_time": elapsed_time,
"model_profile": model_profile,
"model_name": embedder.model_name,
"failed_files": failed_files[:5],
"index_path": str(index_path),
},
}
total_chunks = len(all_chunks_with_paths)
# --- OPTIMIZATION Step 2: Batch generate embeddings with memory-safe batching ---
# Use smaller batches to avoid OOM errors while still benefiting from batch processing
# jina-embeddings-v2-base-code with long chunks needs small batches
BATCH_SIZE = 8 # Conservative batch size for memory efficiency
if progress_callback:
num_batches = (total_chunks + BATCH_SIZE - 1) // BATCH_SIZE
progress_callback(f"Step 2/4: Generating embeddings for {total_chunks} chunks ({num_batches} batches)...")
try:
all_embeddings = []
for batch_start in range(0, total_chunks, BATCH_SIZE):
batch_end = min(batch_start + BATCH_SIZE, total_chunks)
batch_contents = [chunk.content for chunk, _ in all_chunks_with_paths[batch_start:batch_end]]
batch_embeddings = embedder.embed(batch_contents)
all_embeddings.extend(batch_embeddings)
if progress_callback and total_chunks > BATCH_SIZE:
progress_callback(f" Batch {batch_start // BATCH_SIZE + 1}/{(total_chunks + BATCH_SIZE - 1) // BATCH_SIZE}: {len(batch_embeddings)} embeddings")
except Exception as e:
return {
"success": False,
"error": f"Failed to generate embeddings: {str(e)}",
}
# --- OPTIMIZATION Step 3: Assign embeddings back to chunks ---
if progress_callback:
progress_callback(f"Step 3/4: Assigning {len(all_embeddings)} embeddings...")
for (chunk, _), embedding in zip(all_chunks_with_paths, all_embeddings):
chunk.embedding = embedding
# --- OPTIMIZATION Step 4: Batch store all chunks in single transaction ---
if progress_callback:
progress_callback(f"Step 4/4: Storing {total_chunks} chunks to database...")
try:
vector_store.add_chunks_batch(all_chunks_with_paths)
except Exception as e:
return {
"success": False,
"error": f"Failed to store chunks: {str(e)}",
}
elapsed_time = time.time() - start_time
return {
"success": True,
"result": {
"chunks_created": total_chunks,
"files_processed": len(files) - len(failed_files),
"files_processed": len(files_with_chunks),
"files_failed": len(failed_files),
"elapsed_time": elapsed_time,
"model_profile": model_profile,

View File

@@ -257,7 +257,7 @@ class HybridSearchEngine:
return []
# Initialize embedder and vector store
from codexlens.semantic.embedder import Embedder
from codexlens.semantic.embedder import get_embedder
from codexlens.semantic.vector_store import VectorStore
vector_store = VectorStore(index_path)
@@ -285,7 +285,8 @@ class HybridSearchEngine:
else:
profile = "code" # Default fallback
embedder = Embedder(profile=profile)
# Use cached embedder (singleton) for performance
embedder = get_embedder(profile=profile)
# Generate query embedding
query_embedding = embedder.embed_single(query)

View File

@@ -2,11 +2,57 @@
from __future__ import annotations
from typing import Iterable, List
import threading
from typing import Dict, Iterable, List, Optional
from . import SEMANTIC_AVAILABLE
# Global embedder cache for singleton pattern
_embedder_cache: Dict[str, "Embedder"] = {}
_cache_lock = threading.Lock()
def get_embedder(profile: str = "code") -> "Embedder":
"""Get or create a cached Embedder instance (thread-safe singleton).
This function provides significant performance improvement by reusing
Embedder instances across multiple searches, avoiding repeated model
loading overhead (~0.8s per load).
Args:
profile: Model profile ("fast", "code", "multilingual", "balanced")
Returns:
Cached Embedder instance for the given profile
"""
global _embedder_cache
# Fast path: check cache without lock
if profile in _embedder_cache:
return _embedder_cache[profile]
# Slow path: acquire lock for initialization
with _cache_lock:
# Double-check after acquiring lock
if profile in _embedder_cache:
return _embedder_cache[profile]
# Create new embedder and cache it
embedder = Embedder(profile=profile)
# Pre-load model to ensure it's ready
embedder._load_model()
_embedder_cache[profile] = embedder
return embedder
def clear_embedder_cache() -> None:
"""Clear the embedder cache (useful for testing or memory management)."""
global _embedder_cache
with _cache_lock:
_embedder_cache.clear()
class Embedder:
"""Generate embeddings for code chunks using fastembed (ONNX-based).