Refactor search modes and optimize embedding generation

- Updated the dashboard template to hide the Code Graph Explorer feature. - Enhanced the `executeCodexLens` function to use `exec` for better cross-platform compatibility and improved command execution. - Changed the default `maxResults` and `limit` parameters in the smart search tool to 10 for better performance. - Introduced a new `priority` search mode in the smart search tool, replacing the previous `parallel` mode, which now follows a fallback strategy: hybrid -> exact -> ripgrep. - Optimized the embedding generation process in the embedding manager by batching operations and using a cached embedder instance to reduce model loading overhead. - Implemented a thread-safe singleton pattern for the embedder to improve performance across multiple searches.
2026-02-05 01:50:27 +08:00 · 2025-12-20 11:08:34 +08:00
parent 7adde91e9f
commit e1cac5dd50
16 changed files with 852 additions and 284 deletions
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -9,7 +9,7 @@ from typing import Dict, List, Optional
 try:
    from codexlens.semantic import SEMANTIC_AVAILABLE
    if SEMANTIC_AVAILABLE:
-        from codexlens.semantic.embedder import Embedder
+        from codexlens.semantic.embedder import Embedder, get_embedder
        from codexlens.semantic.vector_store import VectorStore
        from codexlens.semantic.chunker import Chunker, ChunkConfig
 except ImportError:
@@ -167,7 +167,8 @@ def generate_embeddings(

    # Initialize components
    try:
-        embedder = Embedder(profile=model_profile)
+        # Use cached embedder (singleton) for performance
+        embedder = get_embedder(profile=model_profile)
        vector_store = VectorStore(index_path)
        chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size))

@@ -201,10 +202,16 @@ def generate_embeddings(
    if progress_callback:
        progress_callback(f"Processing {len(files)} files...")

-    # Process each file
-    total_chunks = 0
-    failed_files = []
+    # Process all files using batch operations for optimal performance
    start_time = time.time()
+    failed_files = []
+
+    # --- OPTIMIZATION Step 1: Collect all chunks from all files ---
+    if progress_callback:
+        progress_callback(f"Step 1/4: Chunking {len(files)} files...")
+
+    all_chunks_with_paths = []  # List of (chunk, file_path) tuples
+    files_with_chunks = set()

    for idx, file_row in enumerate(files, 1):
        file_path = file_row["full_path"]
@@ -212,39 +219,88 @@ def generate_embeddings(
        language = file_row["language"] or "python"

        try:
-            # Create chunks
            chunks = chunker.chunk_sliding_window(
                content,
                file_path=file_path,
                language=language
            )
-
-            if not chunks:
-                continue
-
-            # Generate embeddings
-            for chunk in chunks:
-                embedding = embedder.embed_single(chunk.content)
-                chunk.embedding = embedding
-
-            # Store chunks
-            vector_store.add_chunks(chunks, file_path)
-            total_chunks += len(chunks)
-
-            if progress_callback:
-                progress_callback(f"[{idx}/{len(files)}] {file_path}: {len(chunks)} chunks")
-
+            if chunks:
+                for chunk in chunks:
+                    all_chunks_with_paths.append((chunk, file_path))
+                files_with_chunks.add(file_path)
        except Exception as e:
-            logger.error(f"Failed to process {file_path}: {e}")
+            logger.error(f"Failed to chunk {file_path}: {e}")
            failed_files.append((file_path, str(e)))

+    if not all_chunks_with_paths:
+        elapsed_time = time.time() - start_time
+        return {
+            "success": True,
+            "result": {
+                "chunks_created": 0,
+                "files_processed": len(files) - len(failed_files),
+                "files_failed": len(failed_files),
+                "elapsed_time": elapsed_time,
+                "model_profile": model_profile,
+                "model_name": embedder.model_name,
+                "failed_files": failed_files[:5],
+                "index_path": str(index_path),
+            },
+        }
+
+    total_chunks = len(all_chunks_with_paths)
+
+    # --- OPTIMIZATION Step 2: Batch generate embeddings with memory-safe batching ---
+    # Use smaller batches to avoid OOM errors while still benefiting from batch processing
+    # jina-embeddings-v2-base-code with long chunks needs small batches
+    BATCH_SIZE = 8  # Conservative batch size for memory efficiency
+
+    if progress_callback:
+        num_batches = (total_chunks + BATCH_SIZE - 1) // BATCH_SIZE
+        progress_callback(f"Step 2/4: Generating embeddings for {total_chunks} chunks ({num_batches} batches)...")
+
+    try:
+        all_embeddings = []
+        for batch_start in range(0, total_chunks, BATCH_SIZE):
+            batch_end = min(batch_start + BATCH_SIZE, total_chunks)
+            batch_contents = [chunk.content for chunk, _ in all_chunks_with_paths[batch_start:batch_end]]
+            batch_embeddings = embedder.embed(batch_contents)
+            all_embeddings.extend(batch_embeddings)
+
+            if progress_callback and total_chunks > BATCH_SIZE:
+                progress_callback(f"  Batch {batch_start // BATCH_SIZE + 1}/{(total_chunks + BATCH_SIZE - 1) // BATCH_SIZE}: {len(batch_embeddings)} embeddings")
+    except Exception as e:
+        return {
+            "success": False,
+            "error": f"Failed to generate embeddings: {str(e)}",
+        }
+
+    # --- OPTIMIZATION Step 3: Assign embeddings back to chunks ---
+    if progress_callback:
+        progress_callback(f"Step 3/4: Assigning {len(all_embeddings)} embeddings...")
+
+    for (chunk, _), embedding in zip(all_chunks_with_paths, all_embeddings):
+        chunk.embedding = embedding
+
+    # --- OPTIMIZATION Step 4: Batch store all chunks in single transaction ---
+    if progress_callback:
+        progress_callback(f"Step 4/4: Storing {total_chunks} chunks to database...")
+
+    try:
+        vector_store.add_chunks_batch(all_chunks_with_paths)
+    except Exception as e:
+        return {
+            "success": False,
+            "error": f"Failed to store chunks: {str(e)}",
+        }
+
    elapsed_time = time.time() - start_time

    return {
        "success": True,
        "result": {
            "chunks_created": total_chunks,
-            "files_processed": len(files) - len(failed_files),
+            "files_processed": len(files_with_chunks),
            "files_failed": len(failed_files),
            "elapsed_time": elapsed_time,
            "model_profile": model_profile,
--- a/codex-lens/src/codexlens/search/hybrid_search.py
+++ b/codex-lens/src/codexlens/search/hybrid_search.py
@@ -257,7 +257,7 @@ class HybridSearchEngine:
                return []

            # Initialize embedder and vector store
-            from codexlens.semantic.embedder import Embedder
+            from codexlens.semantic.embedder import get_embedder
            from codexlens.semantic.vector_store import VectorStore

            vector_store = VectorStore(index_path)
@@ -285,7 +285,8 @@ class HybridSearchEngine:
            else:
                profile = "code"  # Default fallback

-            embedder = Embedder(profile=profile)
+            # Use cached embedder (singleton) for performance
+            embedder = get_embedder(profile=profile)

            # Generate query embedding
            query_embedding = embedder.embed_single(query)
--- a/codex-lens/src/codexlens/semantic/embedder.py
+++ b/codex-lens/src/codexlens/semantic/embedder.py
@@ -2,11 +2,57 @@

 from __future__ import annotations

-from typing import Iterable, List
+import threading
+from typing import Dict, Iterable, List, Optional

 from . import SEMANTIC_AVAILABLE


+# Global embedder cache for singleton pattern
+_embedder_cache: Dict[str, "Embedder"] = {}
+_cache_lock = threading.Lock()
+
+
+def get_embedder(profile: str = "code") -> "Embedder":
+    """Get or create a cached Embedder instance (thread-safe singleton).
+
+    This function provides significant performance improvement by reusing
+    Embedder instances across multiple searches, avoiding repeated model
+    loading overhead (~0.8s per load).
+
+    Args:
+        profile: Model profile ("fast", "code", "multilingual", "balanced")
+
+    Returns:
+        Cached Embedder instance for the given profile
+    """
+    global _embedder_cache
+
+    # Fast path: check cache without lock
+    if profile in _embedder_cache:
+        return _embedder_cache[profile]
+
+    # Slow path: acquire lock for initialization
+    with _cache_lock:
+        # Double-check after acquiring lock
+        if profile in _embedder_cache:
+            return _embedder_cache[profile]
+
+        # Create new embedder and cache it
+        embedder = Embedder(profile=profile)
+        # Pre-load model to ensure it's ready
+        embedder._load_model()
+        _embedder_cache[profile] = embedder
+        return embedder
+
+
+def clear_embedder_cache() -> None:
+    """Clear the embedder cache (useful for testing or memory management)."""
+    global _embedder_cache
+    with _cache_lock:
+        _embedder_cache.clear()
+
+
 class Embedder:
    """Generate embeddings for code chunks using fastembed (ONNX-based).