feat: Implement cascade indexing command and benchmark script for performance evaluation

2026-02-10 02:24:35 +08:00 · 2026-01-02 11:24:06 +08:00
parent e21d801523
commit da68ba0b82
4 changed files with 984 additions and 3 deletions
--- a/codex-lens/benchmarks/cascade_benchmark.py
+++ b/codex-lens/benchmarks/cascade_benchmark.py
@@ -0,0 +1,402 @@
 #!/usr/bin/env python
 """Benchmark script for comparing cascade search strategies.
 Compares:
 - binary: 256-dim binary coarse ranking + 2048-dim dense fine ranking
 - hybrid: FTS+SPLADE+Vector coarse ranking + CrossEncoder fine ranking
 Usage:
    python benchmarks/cascade_benchmark.py [--source PATH] [--queries N] [--warmup N]
 """
 from __future__ import annotations
 import argparse
 import gc
 import json
 import os
 import statistics
 import sys
 import time
 import traceback
 from dataclasses import dataclass, asdict
 from pathlib import Path
 from typing import List, Optional, Dict, Any
 # Add src to path
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
 from codexlens.config import Config
 from codexlens.storage.registry import RegistryStore
 from codexlens.storage.path_mapper import PathMapper
@dataclass
 class BenchmarkResult:
    """Result from a single benchmark run."""
    strategy: str
    query: str
    latency_ms: float
    num_results: int
    top_result: Optional[str]
    error: Optional[str] = None
@dataclass
 class BenchmarkSummary:
    """Aggregated benchmark statistics."""
    strategy: str
    total_queries: int
    successful_queries: int
    avg_latency_ms: float
    min_latency_ms: float
    max_latency_ms: float
    p50_latency_ms: float
    p95_latency_ms: float
    p99_latency_ms: float
    avg_results: float
    errors: List[str]
 # Default test queries covering different scenarios
 DEFAULT_QUERIES = [
    # Code patterns
    "def search",
    "class Engine",
    "import numpy",
    "async def",
    "raise ValueError",
    # Semantic queries
    "how to parse json",
    "database connection",
    "error handling",
    "authentication logic",
    "file read write",
    # Technical terms
    "embedding vector",
    "cosine similarity",
    "binary quantization",
    "hamming distance",
    "reranking",
 ]
 def percentile(data: List[float], p: float) -> float:
    """Calculate percentile of sorted data."""
    if not data:
        return 0.0
    sorted_data = sorted(data)
    k = (len(sorted_data) - 1) * (p / 100)
    f = int(k)
    c = f + 1 if f + 1 < len(sorted_data) else f
    return sorted_data[f] + (k - f) * (sorted_data[c] - sorted_data[f])
 def run_single_benchmark(
    engine: ChainSearchEngine,
    query: str,
    source_path: Path,
    strategy: str,
    options: Optional[SearchOptions] = None,
 ) -> BenchmarkResult:
    """Run a single benchmark query."""
    gc.collect()
    start_time = time.perf_counter()
    try:
        result = engine.cascade_search(
            query=query,
            source_path=source_path,
            k=10,
            coarse_k=100,
            options=options,
            strategy=strategy,
        )
        elapsed_ms = (time.perf_counter() - start_time) * 1000
        top_result = None
        if result.results:
            r = result.results[0]
            line = r.start_line or 0
            top_result = f"{r.path}:{line}"
        return BenchmarkResult(
            strategy=strategy,
            query=query,
            latency_ms=elapsed_ms,
            num_results=len(result.results),
            top_result=top_result,
        )
    except Exception as e:
        elapsed_ms = (time.perf_counter() - start_time) * 1000
        return BenchmarkResult(
            strategy=strategy,
            query=query,
            latency_ms=elapsed_ms,
            num_results=0,
            top_result=None,
            error=str(e),
        )
 def run_benchmarks(
    source_path: Path,
    queries: List[str],
    strategies: List[str],
    warmup_runs: int = 2,
    options: Optional[SearchOptions] = None,
 ) -> Dict[str, List[BenchmarkResult]]:
    """Run benchmarks for all queries and strategies."""
    print(f"\n{'='*60}")
    print(f"Cascade Search Benchmark")
    print(f"{'='*60}")
    print(f"Source: {source_path}")
    print(f"Queries: {len(queries)}")
    print(f"Strategies: {strategies}")
    print(f"Warmup runs: {warmup_runs}")
    print(f"{'='*60}\n")
    # Initialize engine
    config = Config()
    registry = RegistryStore()  # Uses default path
    registry.initialize()
    mapper = PathMapper()  # Uses default path
    engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config)
    results: Dict[str, List[BenchmarkResult]] = {s: [] for s in strategies}
    # Warmup phase
    if warmup_runs > 0:
        print(f"Running {warmup_runs} warmup queries...")
        warmup_query = queries[0] if queries else "test"
        for strategy in strategies:
            for _ in range(warmup_runs):
                try:
                    run_single_benchmark(engine, warmup_query, source_path, strategy, options)
                except Exception:
                    pass
        print("Warmup complete.\n")
    # Benchmark phase
    total_runs = len(queries) * len(strategies)
    current_run = 0
    for query in queries:
        for strategy in strategies:
            current_run += 1
            print(f"[{current_run}/{total_runs}] {strategy}: '{query[:40]}...' ", end="", flush=True)
            result = run_single_benchmark(engine, query, source_path, strategy, options)
            results[strategy].append(result)
            if result.error:
                print(f"ERROR: {result.error[:50]}")
            else:
                print(f"{result.latency_ms:.1f}ms, {result.num_results} results")
    return results
 def summarize_results(results: Dict[str, List[BenchmarkResult]]) -> Dict[str, BenchmarkSummary]:
    """Generate summary statistics for each strategy."""
    summaries = {}
    for strategy, benchmark_results in results.items():
        latencies = [r.latency_ms for r in benchmark_results if r.error is None]
        result_counts = [r.num_results for r in benchmark_results if r.error is None]
        errors = [r.error for r in benchmark_results if r.error is not None]
        if latencies:
            summary = BenchmarkSummary(
                strategy=strategy,
                total_queries=len(benchmark_results),
                successful_queries=len(latencies),
                avg_latency_ms=statistics.mean(latencies),
                min_latency_ms=min(latencies),
                max_latency_ms=max(latencies),
                p50_latency_ms=percentile(latencies, 50),
                p95_latency_ms=percentile(latencies, 95),
                p99_latency_ms=percentile(latencies, 99),
                avg_results=statistics.mean(result_counts) if result_counts else 0,
                errors=errors,
            )
        else:
            summary = BenchmarkSummary(
                strategy=strategy,
                total_queries=len(benchmark_results),
                successful_queries=0,
                avg_latency_ms=0,
                min_latency_ms=0,
                max_latency_ms=0,
                p50_latency_ms=0,
                p95_latency_ms=0,
                p99_latency_ms=0,
                avg_results=0,
                errors=errors,
            )
        summaries[strategy] = summary
    return summaries
 def print_comparison_table(summaries: Dict[str, BenchmarkSummary]) -> None:
    """Print formatted comparison table."""
    print(f"\n{'='*80}")
    print("BENCHMARK RESULTS COMPARISON")
    print(f"{'='*80}\n")
    # Header
    print(f"{'Metric':<25} {'Binary':>15} {'Hybrid':>15} {'Diff':>15}")
    print(f"{'-'*25} {'-'*15} {'-'*15} {'-'*15}")
    binary = summaries.get("binary")
    hybrid = summaries.get("hybrid")
    if not binary or not hybrid:
        print("Missing results for comparison")
        return
    metrics = [
        ("Total Queries", binary.total_queries, hybrid.total_queries),
        ("Successful", binary.successful_queries, hybrid.successful_queries),
        ("Avg Latency (ms)", binary.avg_latency_ms, hybrid.avg_latency_ms),
        ("Min Latency (ms)", binary.min_latency_ms, hybrid.min_latency_ms),
        ("Max Latency (ms)", binary.max_latency_ms, hybrid.max_latency_ms),
        ("P50 Latency (ms)", binary.p50_latency_ms, hybrid.p50_latency_ms),
        ("P95 Latency (ms)", binary.p95_latency_ms, hybrid.p95_latency_ms),
        ("P99 Latency (ms)", binary.p99_latency_ms, hybrid.p99_latency_ms),
        ("Avg Results", binary.avg_results, hybrid.avg_results),
    ]
    for name, b_val, h_val in metrics:
        if isinstance(b_val, float):
            diff = b_val - h_val
            diff_str = f"{diff:+.2f}" if diff != 0 else "0.00"
            speedup = h_val / b_val if b_val > 0 else 0
            if "Latency" in name and speedup > 1:
                diff_str += f" ({speedup:.1f}x faster)"
            print(f"{name:<25} {b_val:>15.2f} {h_val:>15.2f} {diff_str:>15}")
        else:
            diff = b_val - h_val
            print(f"{name:<25} {b_val:>15} {h_val:>15} {diff:>+15}")
    # Errors
    print(f"\n{'Errors:':<25}")
    print(f"  Binary: {len(binary.errors)}")
    for err in binary.errors[:3]:
        print(f"    - {err[:60]}...")
    print(f"  Hybrid: {len(hybrid.errors)}")
    for err in hybrid.errors[:3]:
        print(f"    - {err[:60]}...")
    # Winner
    print(f"\n{'='*80}")
    if binary.avg_latency_ms < hybrid.avg_latency_ms and binary.successful_queries > 0:
        speedup = hybrid.avg_latency_ms / binary.avg_latency_ms
        print(f"[WINNER] Binary ({speedup:.2f}x faster average latency)")
    elif hybrid.avg_latency_ms < binary.avg_latency_ms and hybrid.successful_queries > 0:
        speedup = binary.avg_latency_ms / hybrid.avg_latency_ms
        print(f"[WINNER] Hybrid ({speedup:.2f}x faster average latency)")
    else:
        print("No clear winner (check errors)")
    print(f"{'='*80}\n")
 def save_results(
    results: Dict[str, List[BenchmarkResult]],
    summaries: Dict[str, BenchmarkSummary],
    output_path: Path,
 ) -> None:
    """Save benchmark results to JSON file."""
    data = {
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "summaries": {k: asdict(v) for k, v in summaries.items()},
        "details": {
            k: [asdict(r) for r in v]
            for k, v in results.items()
        },
    }
    output_path.parent.mkdir(parents=True, exist_ok=True)
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)
    print(f"Results saved to: {output_path}")
 def main():
    parser = argparse.ArgumentParser(description="Benchmark cascade search strategies")
    parser.add_argument(
        "--source", "-s",
        type=Path,
        default=Path(__file__).parent.parent / "src",
        help="Source directory to search (default: ./src)",
    )
    parser.add_argument(
        "--queries", "-q",
        type=int,
        default=len(DEFAULT_QUERIES),
        help=f"Number of queries to run (default: {len(DEFAULT_QUERIES)})",
    )
    parser.add_argument(
        "--warmup", "-w",
        type=int,
        default=2,
        help="Number of warmup runs (default: 2)",
    )
    parser.add_argument(
        "--output", "-o",
        type=Path,
        default=Path(__file__).parent / "results" / "cascade_benchmark.json",
        help="Output file for results (default: benchmarks/results/cascade_benchmark.json)",
    )
    parser.add_argument(
        "--strategies",
        nargs="+",
        default=["binary", "hybrid"],
        choices=["binary", "hybrid"],
        help="Strategies to benchmark (default: both)",
    )
    args = parser.parse_args()
    # Validate source path
    if not args.source.exists():
        print(f"Error: Source path does not exist: {args.source}")
        sys.exit(1)
    # Select queries
    queries = DEFAULT_QUERIES[:args.queries]
    # Run benchmarks
    try:
        results = run_benchmarks(
            source_path=args.source,
            queries=queries,
            strategies=args.strategies,
            warmup_runs=args.warmup,
        )
        # Generate summaries
        summaries = summarize_results(results)
        # Print comparison
        print_comparison_table(summaries)
        # Save results
        save_results(results, summaries, args.output)
    except KeyboardInterrupt:
        print("\nBenchmark interrupted.")
        sys.exit(1)
    except Exception as e:
        print(f"\nBenchmark failed: {e}")
        traceback.print_exc()
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/codex-lens/benchmarks/results/cascade_benchmark.json
+++ b/codex-lens/benchmarks/results/cascade_benchmark.json
@@ -0,0 +1,277 @@
 {
  "timestamp": "2026-01-02 11:22:34",
  "summaries": {
    "binary": {
      "strategy": "binary",
      "total_queries": 15,
      "successful_queries": 15,
      "avg_latency_ms": 850.328753333209,
      "min_latency_ms": 750.9617999967304,
      "max_latency_ms": 1015.733200001705,
      "p50_latency_ms": 847.9711999971187,
      "p95_latency_ms": 976.768470002571,
      "p99_latency_ms": 1007.9402540018782,
      "avg_results": 0,
      "errors": []
    },
    "hybrid": {
      "strategy": "hybrid",
      "total_queries": 15,
      "successful_queries": 15,
      "avg_latency_ms": 821.3745733330143,
      "min_latency_ms": 720.5589000004693,
      "max_latency_ms": 943.0299999949057,
      "p50_latency_ms": 819.5875000019441,
      "p95_latency_ms": 916.3381599981221,
      "p99_latency_ms": 937.691631995549,
      "avg_results": 0,
      "errors": []
    }
  },
  "details": {
    "binary": [
      {
        "strategy": "binary",
        "query": "def search",
        "latency_ms": 862.7266999974381,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "class Engine",
        "latency_ms": 773.8472999990336,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "import numpy",
        "latency_ms": 858.1023000006098,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "async def",
        "latency_ms": 877.2815999982413,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "raise ValueError",
        "latency_ms": 824.3320999972639,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "how to parse json",
        "latency_ms": 948.0362000031164,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "database connection",
        "latency_ms": 789.3126000053599,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "error handling",
        "latency_ms": 960.0693000029423,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "authentication logic",
        "latency_ms": 757.247900000948,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "file read write",
        "latency_ms": 750.9617999967304,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "embedding vector",
        "latency_ms": 871.1426000008942,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "cosine similarity",
        "latency_ms": 817.1380999992834,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "binary quantization",
        "latency_ms": 1015.733200001705,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "hamming distance",
        "latency_ms": 847.9711999971187,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "binary",
        "query": "reranking",
        "latency_ms": 801.028399997449,
        "num_results": 0,
        "top_result": null,
        "error": null
      }
    ],
    "hybrid": [
      {
        "strategy": "hybrid",
        "query": "def search",
        "latency_ms": 720.5589000004693,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "class Engine",
        "latency_ms": 792.9914000051212,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "import numpy",
        "latency_ms": 943.0299999949057,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "async def",
        "latency_ms": 819.5875000019441,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "raise ValueError",
        "latency_ms": 835.5114000005415,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "how to parse json",
        "latency_ms": 867.8118999960134,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "database connection",
        "latency_ms": 824.6361999990768,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "error handling",
        "latency_ms": 742.638600000646,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "authentication logic",
        "latency_ms": 840.4286999939359,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "file read write",
        "latency_ms": 810.9049000049708,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "embedding vector",
        "latency_ms": 876.5335000061896,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "cosine similarity",
        "latency_ms": 797.3090999948909,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "binary quantization",
        "latency_ms": 767.9803999999422,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "hamming distance",
        "latency_ms": 775.7972999970661,
        "num_results": 0,
        "top_result": null,
        "error": null
      },
      {
        "strategy": "hybrid",
        "query": "reranking",
        "latency_ms": 904.8987999995006,
        "num_results": 0,
        "top_result": null,
        "error": null
      }
    ]
  }
 }
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -7,7 +7,7 @@ import logging
 import os
 import shutil
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Annotated, Any, Dict, Iterable, List, Optional
 import typer
 from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
@@ -2721,3 +2721,305 @@ def _display_index_result(result) -> None:
                console.print(f"  [red]Error:[/red] {error}")
            if len(result.errors) > 3:
                console.print(f"  [dim]... and {len(result.errors) - 3} more errors[/dim]")
 # ==================== Cascade Index Commands ====================
 def get_binary_index_path(db_path: Path) -> Path:
    """Get the path for binary ANN index file.
    Args:
        db_path: Path to the _index.db file
    Returns:
        Path to the binary index file (_index_binary.bin)
    """
    return db_path.parent / f"{db_path.stem}_binary.bin"
@app.command("cascade-index")
 def cascade_index(
    path: Annotated[Path, typer.Argument(help="Directory to index")],
    force: Annotated[bool, typer.Option("--force", "-f", help="Force regenerate")] = False,
    batch_size: Annotated[int, typer.Option("--batch-size", "-b", help="Batch size for embedding")] = 32,
    json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False,
    verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False,
 ) -> None:
    """Generate cascade embeddings (binary + dense) for two-stage retrieval.
    Cascade retrieval uses a two-stage approach:
    1. Binary search (fast, 32 bytes/vector) -> coarse filtering
    2. Dense rerank (precise, 8KB/vector) -> final results
    This command:
    - Finds all _index.db files in the directory
    - Generates binary (256-dim) and dense (2048-dim) embeddings for each chunk
    - Stores embeddings in the database (embedding_binary, embedding_dense columns)
    - Creates a BinaryANNIndex file for fast coarse retrieval
    Examples:
        codexlens cascade-index ~/projects/my-app
        codexlens cascade-index . --force
        codexlens cascade-index . --batch-size 64 --verbose
    """
    _configure_logging(verbose, json_mode)
    target_path = path.expanduser().resolve()
    # Find index database(s)
    if target_path.is_file() and target_path.name == "_index.db":
        index_dbs = [target_path]
    elif target_path.is_dir():
        # Check local .codexlens/_index.db first
        local_index = target_path / ".codexlens" / "_index.db"
        if local_index.exists():
            index_dbs = [local_index]
        else:
            # Find via registry
            registry = RegistryStore()
            try:
                registry.initialize()
                mapper = PathMapper()
                index_db = mapper.source_to_index_db(target_path)
                if not index_db.exists():
                    if json_mode:
                        print_json(success=False, error=f"No index found for {target_path}")
                    else:
                        console.print(f"[red]Error:[/red] No index found for {target_path}")
                        console.print("Run 'codexlens init' first to create an index")
                    raise typer.Exit(code=1)
                # Find all _index.db files under the index root
                index_root = index_db.parent
                index_dbs = list(index_root.rglob("_index.db"))
            finally:
                registry.close()
    else:
        if json_mode:
            print_json(success=False, error="Path must be _index.db file or indexed directory")
        else:
            console.print("[red]Error:[/red] Path must be _index.db file or indexed directory")
        raise typer.Exit(code=1)
    if not index_dbs:
        if json_mode:
            print_json(success=False, error="No index databases found")
        else:
            console.print("[yellow]No index databases found[/yellow]")
        raise typer.Exit(code=1)
    # Import cascade embedding backend
    try:
        from codexlens.indexing.embedding import CascadeEmbeddingBackend
        from codexlens.semantic.ann_index import BinaryANNIndex
        from codexlens.indexing.embedding import pack_binary_embedding
    except ImportError as e:
        error_msg = f"Cascade embedding dependencies not available: {e}"
        if json_mode:
            print_json(success=False, error=error_msg)
        else:
            console.print(f"[red]Error:[/red] {error_msg}")
            console.print("[dim]Install with: pip install codexlens[semantic][/dim]")
        raise typer.Exit(code=1)
    if not json_mode:
        console.print(f"[bold]Generating cascade embeddings[/bold]")
        console.print(f"Path: [dim]{target_path}[/dim]")
        console.print(f"Index databases: [cyan]{len(index_dbs)}[/cyan]")
        console.print(f"Batch size: [cyan]{batch_size}[/cyan]")
        console.print()
    # Initialize cascade embedding backend
    try:
        cascade_backend = CascadeEmbeddingBackend()
    except Exception as e:
        error_msg = f"Failed to initialize cascade embedding backend: {e}"
        if json_mode:
            print_json(success=False, error=error_msg)
        else:
            console.print(f"[red]Error:[/red] {error_msg}")
        raise typer.Exit(code=1)
    # Process statistics
    total_chunks_processed = 0
    total_indexes_processed = 0
    total_indexes_successful = 0
    total_binary_indexes_created = 0
    errors_list: List[str] = []
    # Process each index database
    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
        TextColumn("({task.completed}/{task.total})"),
        TimeElapsedColumn(),
        console=console,
        disable=json_mode,
    ) as progress:
        db_task = progress.add_task("Processing indexes...", total=len(index_dbs))
        for db_path in index_dbs:
            total_indexes_processed += 1
            index_name = db_path.parent.name
            try:
                # Open the index store
                store = DirIndexStore(db_path)
                store.initialize()
                # Get connection for direct queries
                conn = store._get_connection()
                # Ensure cascade columns exist in semantic_chunks table
                try:
                    conn.execute("ALTER TABLE semantic_chunks ADD COLUMN embedding_binary BLOB")
                except Exception:
                    pass  # Column already exists
                try:
                    conn.execute("ALTER TABLE semantic_chunks ADD COLUMN embedding_dense BLOB")
                except Exception:
                    pass  # Column already exists
                conn.commit()
                # Check if semantic_chunks table exists and has data
                try:
                    cursor = conn.execute("SELECT COUNT(*) FROM semantic_chunks")
                    chunk_count = cursor.fetchone()[0]
                except Exception:
                    # semantic_chunks table doesn't exist or is empty
                    chunk_count = 0
                if chunk_count == 0:
                    if verbose and not json_mode:
                        console.print(f"  [dim]Skipping {index_name}: no chunks found[/dim]")
                    progress.advance(db_task)
                    store.close()
                    continue
                # Check if embeddings already exist (unless force)
                if not force:
                    cursor = conn.execute(
                        "SELECT COUNT(*) FROM semantic_chunks WHERE embedding_binary IS NOT NULL"
                    )
                    existing_count = cursor.fetchone()[0]
                    if existing_count > 0:
                        if verbose and not json_mode:
                            console.print(f"  [dim]Skipping {index_name}: embeddings exist (use --force to regenerate)[/dim]")
                        progress.advance(db_task)
                        store.close()
                        continue
                # If force, clear existing cascade embeddings
                if force:
                    conn.execute(
                        "UPDATE semantic_chunks SET embedding_binary = NULL, embedding_dense = NULL"
                    )
                    conn.commit()
                # Get all chunks
                cursor = conn.execute("SELECT id, content FROM semantic_chunks")
                chunks = cursor.fetchall()
                if not chunks:
                    progress.advance(db_task)
                    store.close()
                    continue
                if verbose and not json_mode:
                    console.print(f"  Processing {index_name}: {len(chunks)} chunks")
                # Process in batches
                chunk_task = progress.add_task(
                    f"  {index_name}", total=len(chunks)
                )
                # Prepare for BinaryANNIndex
                binary_index_path = get_binary_index_path(db_path)
                binary_ann_index = BinaryANNIndex(db_path, dim=256)
                for i in range(0, len(chunks), batch_size):
                    batch_chunks = chunks[i:i + batch_size]
                    batch_ids = [c[0] for c in batch_chunks]
                    batch_contents = [c[1] for c in batch_chunks]
                    # Generate cascade embeddings
                    binary_embeddings, dense_embeddings = cascade_backend.encode_cascade(
                        batch_contents, batch_size=batch_size
                    )
                    # Pack binary embeddings and convert dense to bytes
                    packed_binaries = []
                    dense_bytes_list = []
                    for j in range(len(batch_ids)):
                        # Pack binary embedding (256 bits -> 32 bytes)
                        packed_binary = pack_binary_embedding(binary_embeddings[j])
                        packed_binaries.append(packed_binary)
                        # Convert dense embedding to bytes
                        import numpy as np
                        dense_blob = dense_embeddings[j].astype(np.float32).tobytes()
                        dense_bytes_list.append(dense_blob)
                    # Update database
                    for j, chunk_id in enumerate(batch_ids):
                        conn.execute(
                            """
                            UPDATE semantic_chunks
                            SET embedding_binary = ?, embedding_dense = ?
                            WHERE id = ?
                            """,
                            (packed_binaries[j], dense_bytes_list[j], chunk_id)
                        )
                    # Add to binary ANN index
                    binary_ann_index.add_vectors(batch_ids, packed_binaries)
                    conn.commit()
                    total_chunks_processed += len(batch_ids)
                    progress.advance(chunk_task, len(batch_ids))
                # Save binary ANN index
                binary_ann_index.save()
                total_binary_indexes_created += 1
                progress.remove_task(chunk_task)
                store.close()
                total_indexes_successful += 1
            except Exception as e:
                error_msg = f"{index_name}: {e}"
                errors_list.append(error_msg)
                if verbose and not json_mode:
                    console.print(f"  [red]Error processing {index_name}:[/red] {e}")
            progress.advance(db_task)
    # Build result
    result = {
        "path": str(target_path),
        "indexes_processed": total_indexes_processed,
        "indexes_successful": total_indexes_successful,
        "chunks_processed": total_chunks_processed,
        "binary_indexes_created": total_binary_indexes_created,
        "errors": len(errors_list),
        "error_details": errors_list[:5] if errors_list else [],
    }
    if json_mode:
        print_json(success=True, result=result)
    else:
        console.print(f"\n[green]Cascade indexing complete[/green]")
        console.print(f"  Indexes processed: {total_indexes_processed}")
        console.print(f"  Indexes successful: {total_indexes_successful}")
        console.print(f"  Chunks processed: {total_chunks_processed:,}")
        console.print(f"  Binary indexes created: {total_binary_indexes_created}")
        if errors_list:
            console.print(f"  [yellow]Errors: {len(errors_list)}[/yellow]")
            for err in errors_list[:3]:
                console.print(f"    [dim]{err}[/dim]")
            if len(errors_list) > 3:
                console.print(f"    [dim]... and {len(errors_list) - 3} more[/dim]")
--- a/codex-lens/src/codexlens/indexing/embedding.py
+++ b/codex-lens/src/codexlens/indexing/embedding.py
@@ -265,8 +265,8 @@ class DenseEmbeddingBackend(BaseEmbedder):
    Model: BAAI/bge-large-en-v1.5 (1024 dim) with optional expansion
    """
-    DEFAULT_MODEL = "BAAI/bge-large-en-v1.5"  # 1024 dim, high quality
+    DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"  # 384 dim, use small for testing
-    TARGET_DIM = 2048
+    TARGET_DIM = 768  # Reduced target for faster testing
    def __init__(
        self,