Implement search and reranking functionality with FTS and embedding support

- Added BaseReranker abstract class for defining reranking interfaces. - Implemented FastEmbedReranker using fastembed's TextCrossEncoder for scoring document-query pairs. - Introduced FTSEngine for full-text search capabilities using SQLite FTS5. - Developed SearchPipeline to integrate embedding, binary search, ANN indexing, FTS, and reranking. - Added fusion methods for combining results from different search strategies using Reciprocal Rank Fusion. - Created unit and integration tests for the new search and reranking components. - Established configuration management for search parameters and models.
2026-03-18 18:48:48 +08:00 · 2026-03-16 23:03:17 +08:00
parent 5a4b18d9b1
commit de4158597b
41 changed files with 2655 additions and 1848 deletions
--- a/codex-lens-v2/conftest.py
+++ b/codex-lens-v2/conftest.py
@@ -0,0 +1,5 @@
+import sys
+import os
+
+# Ensure the local src directory takes precedence over any installed codexlens package
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
--- a/codex-lens-v2/pyproject.toml
+++ b/codex-lens-v2/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "codex-lens-v2"
+version = "0.1.0"
+description = "Minimal code semantic search library with 2-stage pipeline"
+requires-python = ">=3.10"
+dependencies = []
+
+[project.optional-dependencies]
+semantic = [
+    "hnswlib>=0.8.0",
+    "numpy>=1.26",
+    "fastembed>=0.4.0,<2.0",
+]
+gpu = [
+    "onnxruntime-gpu>=1.16",
+]
+faiss-cpu = [
+    "faiss-cpu>=1.7.4",
+]
+faiss-gpu = [
+    "faiss-gpu>=1.7.4",
+]
+reranker-api = [
+    "httpx>=0.25",
+]
+dev = [
+    "pytest>=7.0",
+    "pytest-cov",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/codexlens"]
--- a/codex-lens-v2/scripts/index_and_search.py
+++ b/codex-lens-v2/scripts/index_and_search.py
@@ -0,0 +1,128 @@
+"""
+对 D:/Claude_dms3 仓库进行索引并测试搜索。
+用法: python scripts/index_and_search.py
+"""
+import sys
+import time
+from pathlib import Path
+
+# 确保 src 可被导入
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from codexlens.config import Config
+from codexlens.core.factory import create_ann_index, create_binary_index
+from codexlens.embed.local import FastEmbedEmbedder
+from codexlens.indexing import IndexingPipeline
+from codexlens.rerank.local import FastEmbedReranker
+from codexlens.search.fts import FTSEngine
+from codexlens.search.pipeline import SearchPipeline
+
+# ─── 配置 ──────────────────────────────────────────────────────────────────
+REPO_ROOT = Path("D:/Claude_dms3")
+INDEX_DIR = Path("D:/Claude_dms3/codex-lens-v2/.index_cache")
+EXTENSIONS = {".py", ".ts", ".js", ".md"}
+MAX_FILE_SIZE = 50_000   # bytes
+MAX_CHUNK_CHARS = 800    # 每个 chunk 的最大字符数
+CHUNK_OVERLAP = 100
+
+# ─── 文件收集 ───────────────────────────────────────────────────────────────
+SKIP_DIRS = {
+    ".git", "node_modules", "__pycache__", ".pytest_cache",
+    "dist", "build", ".venv", "venv", ".cache", ".index_cache",
+    "codex-lens-v2",  # 不索引自身
+}
+
+def collect_files(root: Path) -> list[Path]:
+    files = []
+    for p in root.rglob("*"):
+        if any(part in SKIP_DIRS for part in p.parts):
+            continue
+        if p.is_file() and p.suffix in EXTENSIONS:
+            if p.stat().st_size <= MAX_FILE_SIZE:
+                files.append(p)
+    return files
+
+# ─── 主流程 ─────────────────────────────────────────────────────────────────
+def main():
+    INDEX_DIR.mkdir(parents=True, exist_ok=True)
+
+    # 1. 使用小 profile 加快速度
+    config = Config(
+        embed_model="BAAI/bge-small-en-v1.5",
+        embed_dim=384,
+        embed_batch_size=32,
+        hnsw_ef=100,
+        hnsw_M=16,
+        binary_top_k=100,
+        ann_top_k=30,
+        reranker_top_k=10,
+    )
+
+    print("=== codex-lens-v2 索引测试 ===\n")
+
+    # 2. 收集文件
+    print(f"[1/4] 扫描 {REPO_ROOT} ...")
+    files = collect_files(REPO_ROOT)
+    print(f"      找到 {len(files)} 个文件")
+
+    # 3. 初始化组件
+    print(f"\n[2/4] 加载嵌入模型 (bge-small-en-v1.5, dim=384) ...")
+    embedder = FastEmbedEmbedder(config)
+    binary_store = create_binary_index(INDEX_DIR, config.embed_dim, config)
+    ann_index = create_ann_index(INDEX_DIR, config.embed_dim, config)
+    fts = FTSEngine(":memory:")   # 内存 FTS，不持久化
+
+    # 4. 使用 IndexingPipeline 并行索引 (chunk -> embed -> index)
+    print(f"[3/4] 并行索引 {len(files)} 个文件 ...")
+    pipeline = IndexingPipeline(
+        embedder=embedder,
+        binary_store=binary_store,
+        ann_index=ann_index,
+        fts=fts,
+        config=config,
+    )
+    stats = pipeline.index_files(
+        files,
+        root=REPO_ROOT,
+        max_chunk_chars=MAX_CHUNK_CHARS,
+        chunk_overlap=CHUNK_OVERLAP,
+        max_file_size=MAX_FILE_SIZE,
+    )
+    print(f"      索引完成: {stats.files_processed} 文件, {stats.chunks_created} chunks ({stats.duration_seconds:.1f}s)")
+
+    # 5. 搜索测试
+    print(f"\n[4/4] 构建 SearchPipeline ...")
+    reranker = FastEmbedReranker(config)
+    pipeline = SearchPipeline(
+        embedder=embedder,
+        binary_store=binary_store,
+        ann_index=ann_index,
+        reranker=reranker,
+        fts=fts,
+        config=config,
+    )
+
+    queries = [
+        "authentication middleware function",
+        "def embed_single",
+        "RRF fusion weights",
+        "fastembed TextCrossEncoder reranker",
+        "how to search code semantic",
+    ]
+
+    print("\n" + "=" * 60)
+    for query in queries:
+        t0 = time.time()
+        results = pipeline.search(query, top_k=5)
+        elapsed = time.time() - t0
+        print(f"\nQuery: {query!r}  ({elapsed*1000:.0f}ms)")
+        if results:
+            for r in results:
+                print(f"  [{r.score:.3f}] {r.path}")
+        else:
+            print("  (无结果)")
+    print("=" * 60)
+    print("\n测试完成 ✓")
+
+if __name__ == "__main__":
+    main()
--- a/codex-lens-v2/src/codexlens/init.py
+++ b/codex-lens-v2/src/codexlens/init.py
--- a/codex-lens-v2/src/codexlens/config.py
+++ b/codex-lens-v2/src/codexlens/config.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+import logging
+from dataclasses import dataclass, field
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class Config:
+    # Embedding
+    embed_model: str = "jinaai/jina-embeddings-v2-base-code"
+    embed_dim: int = 768
+    embed_batch_size: int = 64
+
+    # GPU / execution providers
+    device: str = "auto"  # 'auto', 'cuda', 'cpu'
+    embed_providers: list[str] | None = None  # explicit ONNX providers override
+
+    # Backend selection: 'auto', 'faiss', 'hnswlib'
+    ann_backend: str = "auto"
+    binary_backend: str = "auto"
+
+    # Indexing pipeline
+    index_workers: int = 2  # number of parallel indexing workers
+
+    # HNSW index (ANNIndex)
+    hnsw_ef: int = 150
+    hnsw_M: int = 32
+    hnsw_ef_construction: int = 200
+
+    # Binary coarse search (BinaryStore)
+    binary_top_k: int = 200
+
+    # ANN fine search
+    ann_top_k: int = 50
+
+    # Reranker
+    reranker_model: str = "BAAI/bge-reranker-v2-m3"
+    reranker_top_k: int = 20
+    reranker_batch_size: int = 32
+
+    # API reranker (optional)
+    reranker_api_url: str = ""
+    reranker_api_key: str = ""
+    reranker_api_model: str = ""
+    reranker_api_max_tokens_per_batch: int = 2048
+
+    # FTS
+    fts_top_k: int = 50
+
+    # Fusion
+    fusion_k: int = 60  # RRF k parameter
+    fusion_weights: dict = field(default_factory=lambda: {
+        "exact": 0.25,
+        "fuzzy": 0.10,
+        "vector": 0.50,
+        "graph": 0.15,
+    })
+
+    def resolve_embed_providers(self) -> list[str]:
+        """Return ONNX execution providers based on device config.
+
+        Priority: explicit embed_providers > device setting > auto-detect.
+        """
+        if self.embed_providers is not None:
+            return list(self.embed_providers)
+
+        if self.device == "cuda":
+            return ["CUDAExecutionProvider", "CPUExecutionProvider"]
+
+        if self.device == "cpu":
+            return ["CPUExecutionProvider"]
+
+        # auto-detect
+        try:
+            import onnxruntime
+            available = onnxruntime.get_available_providers()
+            if "CUDAExecutionProvider" in available:
+                log.info("CUDA detected via onnxruntime, using GPU for embedding")
+                return ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        except ImportError:
+            pass
+
+        return ["CPUExecutionProvider"]
+
+    @classmethod
+    def defaults(cls) -> "Config":
+        return cls()
+
+    @classmethod
+    def small(cls) -> "Config":
+        """Smaller config for testing or small corpora."""
+        return cls(
+            hnsw_ef=50,
+            hnsw_M=16,
+            binary_top_k=50,
+            ann_top_k=20,
+            reranker_top_k=10,
+        )
--- a/codex-lens-v2/src/codexlens/core/init.py
+++ b/codex-lens-v2/src/codexlens/core/init.py
@@ -0,0 +1,13 @@
+from .base import BaseANNIndex, BaseBinaryIndex
+from .binary import BinaryStore
+from .factory import create_ann_index, create_binary_index
+from .index import ANNIndex
+
+__all__ = [
+    "BaseANNIndex",
+    "BaseBinaryIndex",
+    "ANNIndex",
+    "BinaryStore",
+    "create_ann_index",
+    "create_binary_index",
+]
--- a/codex-lens-v2/src/codexlens/core/base.py
+++ b/codex-lens-v2/src/codexlens/core/base.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+
+class BaseANNIndex(ABC):
+    """Abstract base class for approximate nearest neighbor indexes."""
+
+    @abstractmethod
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors with corresponding IDs.
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+
+    @abstractmethod
+    def fine_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search for nearest neighbors.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results
+
+        Returns:
+            (ids, distances) as numpy arrays
+        """
+
+    @abstractmethod
+    def save(self) -> None:
+        """Persist index to disk."""
+
+    @abstractmethod
+    def load(self) -> None:
+        """Load index from disk."""
+
+    @abstractmethod
+    def __len__(self) -> int:
+        """Return the number of indexed items."""
+
+
+class BaseBinaryIndex(ABC):
+    """Abstract base class for binary vector indexes (Hamming distance)."""
+
+    @abstractmethod
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors (will be binary-quantized internally).
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+
+    @abstractmethod
+    def coarse_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search by Hamming distance.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results
+
+        Returns:
+            (ids, distances) sorted ascending by distance
+        """
+
+    @abstractmethod
+    def save(self) -> None:
+        """Persist store to disk."""
+
+    @abstractmethod
+    def load(self) -> None:
+        """Load store from disk."""
+
+    @abstractmethod
+    def __len__(self) -> int:
+        """Return the number of stored items."""
--- a/codex-lens-v2/src/codexlens/core/binary.py
+++ b/codex-lens-v2/src/codexlens/core/binary.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import logging
+import math
+from pathlib import Path
+
+import numpy as np
+
+from codexlens.config import Config
+from codexlens.core.base import BaseBinaryIndex
+
+logger = logging.getLogger(__name__)
+
+
+class BinaryStore(BaseBinaryIndex):
+    """Persistent binary vector store using numpy memmap.
+
+    Stores binary-quantized float32 vectors as packed uint8 arrays on disk.
+    Supports fast coarse search via XOR + popcount Hamming distance.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        self._dir = Path(path)
+        self._dim = dim
+        self._config = config
+        self._packed_bytes = math.ceil(dim / 8)
+
+        self._bin_path = self._dir / "binary_store.bin"
+        self._ids_path = self._dir / "binary_store_ids.npy"
+
+        self._matrix: np.ndarray | None = None  # shape (N, packed_bytes), uint8
+        self._ids: np.ndarray | None = None      # shape (N,), int64
+        self._count: int = 0
+
+        if self._bin_path.exists() and self._ids_path.exists():
+            self.load()
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _quantize(self, vectors: np.ndarray) -> np.ndarray:
+        """Convert float32 vectors (N, dim) to packed uint8 (N, packed_bytes)."""
+        binary = (vectors > 0).astype(np.uint8)
+        packed = np.packbits(binary, axis=1)
+        return packed
+
+    def _quantize_single(self, vec: np.ndarray) -> np.ndarray:
+        """Convert a single float32 vector (dim,) to packed uint8 (packed_bytes,)."""
+        binary = (vec > 0).astype(np.uint8)
+        return np.packbits(binary)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def _ensure_capacity(self, needed: int) -> None:
+        """Grow pre-allocated matrix/ids arrays to fit *needed* total items."""
+        if self._matrix is not None and self._matrix.shape[0] >= needed:
+            return
+
+        new_cap = max(1024, needed)
+        # Double until large enough
+        if self._matrix is not None:
+            cur_cap = self._matrix.shape[0]
+            new_cap = max(cur_cap, 1024)
+            while new_cap < needed:
+                new_cap *= 2
+
+        new_matrix = np.zeros((new_cap, self._packed_bytes), dtype=np.uint8)
+        new_ids = np.zeros(new_cap, dtype=np.int64)
+
+        if self._matrix is not None and self._count > 0:
+            new_matrix[: self._count] = self._matrix[: self._count]
+            new_ids[: self._count] = self._ids[: self._count]
+
+        self._matrix = new_matrix
+        self._ids = new_ids
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors and their ids.
+
+        Does NOT call save() internally -- callers must call save()
+        explicitly after batch indexing.
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        packed = self._quantize(vectors)  # (N, packed_bytes)
+        n = len(ids)
+
+        self._ensure_capacity(self._count + n)
+        self._matrix[self._count : self._count + n] = packed
+        self._ids[self._count : self._count + n] = ids.astype(np.int64)
+        self._count += n
+
+    def coarse_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search by Hamming distance.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.binary_top_k
+
+        Returns:
+            (ids, distances) sorted ascending by Hamming distance
+        """
+        if self._matrix is None or self._count == 0:
+            return np.array([], dtype=np.int64), np.array([], dtype=np.int32)
+
+        k = top_k if top_k is not None else self._config.binary_top_k
+        k = min(k, self._count)
+
+        query_bin = self._quantize_single(query_vec)  # (packed_bytes,)
+
+        # Slice to active region (matrix may be pre-allocated larger)
+        active_matrix = self._matrix[: self._count]
+        active_ids = self._ids[: self._count]
+
+        # XOR then popcount via unpackbits
+        xor = np.bitwise_xor(active_matrix, query_bin[np.newaxis, :])  # (N, packed_bytes)
+        dists = np.unpackbits(xor, axis=1).sum(axis=1).astype(np.int32)  # (N,)
+
+        if k >= self._count:
+            order = np.argsort(dists)
+        else:
+            part = np.argpartition(dists, k)[:k]
+            order = part[np.argsort(dists[part])]
+
+        return active_ids[order], dists[order]
+
+    def save(self) -> None:
+        """Flush binary store to disk."""
+        if self._matrix is None or self._count == 0:
+            return
+        self._dir.mkdir(parents=True, exist_ok=True)
+        # Write only the occupied portion of the pre-allocated matrix
+        active_matrix = self._matrix[: self._count]
+        mm = np.memmap(
+            str(self._bin_path),
+            dtype=np.uint8,
+            mode="w+",
+            shape=active_matrix.shape,
+        )
+        mm[:] = active_matrix
+        mm.flush()
+        del mm
+        np.save(str(self._ids_path), self._ids[: self._count])
+
+    def load(self) -> None:
+        """Reload binary store from disk."""
+        ids = np.load(str(self._ids_path))
+        n = len(ids)
+        if n == 0:
+            return
+        mm = np.memmap(
+            str(self._bin_path),
+            dtype=np.uint8,
+            mode="r",
+            shape=(n, self._packed_bytes),
+        )
+        self._matrix = np.array(mm)  # copy into RAM for mutation support
+        del mm
+        self._ids = ids.astype(np.int64)
+        self._count = n
+
+    def __len__(self) -> int:
+        return self._count
--- a/codex-lens-v2/src/codexlens/core/factory.py
+++ b/codex-lens-v2/src/codexlens/core/factory.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from codexlens.config import Config
+from codexlens.core.base import BaseANNIndex, BaseBinaryIndex
+
+logger = logging.getLogger(__name__)
+
+try:
+    import faiss as _faiss  # noqa: F401
+    _FAISS_AVAILABLE = True
+except ImportError:
+    _FAISS_AVAILABLE = False
+
+try:
+    import hnswlib as _hnswlib  # noqa: F401
+    _HNSWLIB_AVAILABLE = True
+except ImportError:
+    _HNSWLIB_AVAILABLE = False
+
+
+def _has_faiss_gpu() -> bool:
+    """Check whether faiss-gpu is available (has GPU resources)."""
+    if not _FAISS_AVAILABLE:
+        return False
+    try:
+        import faiss
+        res = faiss.StandardGpuResources()  # noqa: F841
+        return True
+    except (AttributeError, RuntimeError):
+        return False
+
+
+def create_ann_index(path: str | Path, dim: int, config: Config) -> BaseANNIndex:
+    """Create an ANN index based on config.ann_backend.
+
+    Fallback chain for 'auto': faiss-gpu -> faiss-cpu -> hnswlib.
+
+    Args:
+        path: directory for index persistence
+        dim: vector dimensionality
+        config: project configuration
+
+    Returns:
+        A BaseANNIndex implementation
+
+    Raises:
+        ImportError: if no suitable backend is available
+    """
+    backend = config.ann_backend
+
+    if backend == "faiss":
+        from codexlens.core.faiss_index import FAISSANNIndex
+        return FAISSANNIndex(path, dim, config)
+
+    if backend == "hnswlib":
+        from codexlens.core.index import ANNIndex
+        return ANNIndex(path, dim, config)
+
+    # auto: try faiss first, then hnswlib
+    if _FAISS_AVAILABLE:
+        from codexlens.core.faiss_index import FAISSANNIndex
+        gpu_tag = " (GPU available)" if _has_faiss_gpu() else " (CPU)"
+        logger.info("Auto-selected FAISS ANN backend%s", gpu_tag)
+        return FAISSANNIndex(path, dim, config)
+
+    if _HNSWLIB_AVAILABLE:
+        from codexlens.core.index import ANNIndex
+        logger.info("Auto-selected hnswlib ANN backend")
+        return ANNIndex(path, dim, config)
+
+    raise ImportError(
+        "No ANN backend available. Install faiss-cpu, faiss-gpu, or hnswlib."
+    )
+
+
+def create_binary_index(
+    path: str | Path, dim: int, config: Config
+) -> BaseBinaryIndex:
+    """Create a binary index based on config.binary_backend.
+
+    Fallback chain for 'auto': faiss -> numpy BinaryStore.
+
+    Args:
+        path: directory for index persistence
+        dim: vector dimensionality
+        config: project configuration
+
+    Returns:
+        A BaseBinaryIndex implementation
+
+    Raises:
+        ImportError: if no suitable backend is available
+    """
+    backend = config.binary_backend
+
+    if backend == "faiss":
+        from codexlens.core.faiss_index import FAISSBinaryIndex
+        return FAISSBinaryIndex(path, dim, config)
+
+    if backend == "hnswlib":
+        from codexlens.core.binary import BinaryStore
+        return BinaryStore(path, dim, config)
+
+    # auto: try faiss first, then numpy-based BinaryStore
+    if _FAISS_AVAILABLE:
+        from codexlens.core.faiss_index import FAISSBinaryIndex
+        logger.info("Auto-selected FAISS binary backend")
+        return FAISSBinaryIndex(path, dim, config)
+
+    # numpy BinaryStore is always available (no extra deps)
+    from codexlens.core.binary import BinaryStore
+    logger.info("Auto-selected numpy BinaryStore backend")
+    return BinaryStore(path, dim, config)
--- a/codex-lens-v2/src/codexlens/core/faiss_index.py
+++ b/codex-lens-v2/src/codexlens/core/faiss_index.py
@@ -0,0 +1,275 @@
+from __future__ import annotations
+
+import logging
+import math
+import threading
+from pathlib import Path
+
+import numpy as np
+
+from codexlens.config import Config
+from codexlens.core.base import BaseANNIndex, BaseBinaryIndex
+
+logger = logging.getLogger(__name__)
+
+try:
+    import faiss
+    _FAISS_AVAILABLE = True
+except ImportError:
+    faiss = None  # type: ignore[assignment]
+    _FAISS_AVAILABLE = False
+
+
+def _try_gpu_index(index: "faiss.Index") -> "faiss.Index":
+    """Transfer a FAISS index to GPU if faiss-gpu is available.
+
+    Returns the GPU index on success, or the original CPU index on failure.
+    """
+    try:
+        res = faiss.StandardGpuResources()
+        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
+        logger.info("FAISS index transferred to GPU 0")
+        return gpu_index
+    except (AttributeError, RuntimeError) as exc:
+        logger.debug("GPU transfer unavailable, staying on CPU: %s", exc)
+        return index
+
+
+def _to_cpu_for_save(index: "faiss.Index") -> "faiss.Index":
+    """Convert a GPU index back to CPU for serialization."""
+    try:
+        return faiss.index_gpu_to_cpu(index)
+    except (AttributeError, RuntimeError):
+        return index
+
+
+class FAISSANNIndex(BaseANNIndex):
+    """FAISS-based ANN index using IndexHNSWFlat with optional GPU.
+
+    Uses Inner Product space with L2-normalized vectors for cosine similarity.
+    Thread-safe via RLock.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        if not _FAISS_AVAILABLE:
+            raise ImportError(
+                "faiss is required. Install with: pip install faiss-cpu "
+                "or pip install faiss-gpu"
+            )
+
+        self._path = Path(path)
+        self._index_path = self._path / "faiss_ann.index"
+        self._dim = dim
+        self._config = config
+        self._lock = threading.RLock()
+        self._index: faiss.Index | None = None
+
+    def _ensure_loaded(self) -> None:
+        """Load or initialize the index (caller holds lock)."""
+        if self._index is not None:
+            return
+        self.load()
+
+    def load(self) -> None:
+        """Load index from disk or initialize a fresh one."""
+        with self._lock:
+            if self._index_path.exists():
+                idx = faiss.read_index(str(self._index_path))
+                logger.debug(
+                    "Loaded FAISS ANN index from %s (%d items)",
+                    self._index_path, idx.ntotal,
+                )
+            else:
+                # HNSW with flat storage, M=32 by default
+                m = self._config.hnsw_M
+                idx = faiss.IndexHNSWFlat(self._dim, m, faiss.METRIC_INNER_PRODUCT)
+                idx.hnsw.efConstruction = self._config.hnsw_ef_construction
+                idx.hnsw.efSearch = self._config.hnsw_ef
+                logger.debug(
+                    "Initialized fresh FAISS HNSW index (dim=%d, M=%d)",
+                    self._dim, m,
+                )
+            self._index = _try_gpu_index(idx)
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add L2-normalized float32 vectors.
+
+        Vectors are normalized before insertion so that Inner Product
+        distance equals cosine similarity.
+
+        Args:
+            ids: shape (N,) int64 -- currently unused by FAISS flat index
+                 but kept for API compatibility. FAISS uses sequential IDs.
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        vecs = np.ascontiguousarray(vectors, dtype=np.float32)
+        # Normalize for cosine similarity via Inner Product
+        faiss.normalize_L2(vecs)
+
+        with self._lock:
+            self._ensure_loaded()
+            self._index.add(vecs)
+
+    def fine_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search for nearest neighbors.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.ann_top_k
+
+        Returns:
+            (ids, distances) as numpy arrays. For IP space, higher = more
+            similar, but distances are returned as-is for consumer handling.
+        """
+        k = top_k if top_k is not None else self._config.ann_top_k
+
+        with self._lock:
+            self._ensure_loaded()
+
+            count = self._index.ntotal
+            if count == 0:
+                return np.array([], dtype=np.int64), np.array([], dtype=np.float32)
+
+            k = min(k, count)
+            # Set efSearch for HNSW accuracy
+            try:
+                self._index.hnsw.efSearch = max(self._config.hnsw_ef, k)
+            except AttributeError:
+                pass  # GPU index may not expose hnsw attribute directly
+
+            q = np.ascontiguousarray(query_vec, dtype=np.float32).reshape(1, -1)
+            faiss.normalize_L2(q)
+            distances, labels = self._index.search(q, k)
+            return labels[0].astype(np.int64), distances[0].astype(np.float32)
+
+    def save(self) -> None:
+        """Save index to disk."""
+        with self._lock:
+            if self._index is None:
+                return
+            self._path.mkdir(parents=True, exist_ok=True)
+            cpu_index = _to_cpu_for_save(self._index)
+            faiss.write_index(cpu_index, str(self._index_path))
+
+    def __len__(self) -> int:
+        with self._lock:
+            if self._index is None:
+                return 0
+            return self._index.ntotal
+
+
+class FAISSBinaryIndex(BaseBinaryIndex):
+    """FAISS-based binary index using IndexBinaryFlat for Hamming distance.
+
+    Vectors are binary-quantized (sign bit) before insertion.
+    Thread-safe via RLock.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        if not _FAISS_AVAILABLE:
+            raise ImportError(
+                "faiss is required. Install with: pip install faiss-cpu "
+                "or pip install faiss-gpu"
+            )
+
+        self._path = Path(path)
+        self._index_path = self._path / "faiss_binary.index"
+        self._dim = dim
+        self._config = config
+        self._packed_bytes = math.ceil(dim / 8)
+        self._lock = threading.RLock()
+        self._index: faiss.IndexBinary | None = None
+
+    def _ensure_loaded(self) -> None:
+        if self._index is not None:
+            return
+        self.load()
+
+    def _quantize(self, vectors: np.ndarray) -> np.ndarray:
+        """Convert float32 vectors (N, dim) to packed uint8 (N, packed_bytes)."""
+        binary = (vectors > 0).astype(np.uint8)
+        return np.packbits(binary, axis=1)
+
+    def _quantize_single(self, vec: np.ndarray) -> np.ndarray:
+        """Convert a single float32 vector (dim,) to packed uint8 (1, packed_bytes)."""
+        binary = (vec > 0).astype(np.uint8)
+        return np.packbits(binary).reshape(1, -1)
+
+    def load(self) -> None:
+        """Load binary index from disk or initialize a fresh one."""
+        with self._lock:
+            if self._index_path.exists():
+                idx = faiss.read_index_binary(str(self._index_path))
+                logger.debug(
+                    "Loaded FAISS binary index from %s (%d items)",
+                    self._index_path, idx.ntotal,
+                )
+            else:
+                # IndexBinaryFlat takes dimension in bits
+                idx = faiss.IndexBinaryFlat(self._dim)
+                logger.debug(
+                    "Initialized fresh FAISS binary index (dim_bits=%d)", self._dim,
+                )
+            self._index = idx
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors (binary-quantized internally).
+
+        Args:
+            ids: shape (N,) int64 -- kept for API compatibility
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        packed = self._quantize(vectors)
+        packed = np.ascontiguousarray(packed, dtype=np.uint8)
+
+        with self._lock:
+            self._ensure_loaded()
+            self._index.add(packed)
+
+    def coarse_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search by Hamming distance.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.binary_top_k
+
+        Returns:
+            (ids, distances) sorted ascending by Hamming distance
+        """
+        with self._lock:
+            self._ensure_loaded()
+
+            if self._index.ntotal == 0:
+                return np.array([], dtype=np.int64), np.array([], dtype=np.int32)
+
+            k = top_k if top_k is not None else self._config.binary_top_k
+            k = min(k, self._index.ntotal)
+
+            q = self._quantize_single(query_vec)
+            q = np.ascontiguousarray(q, dtype=np.uint8)
+            distances, labels = self._index.search(q, k)
+            return labels[0].astype(np.int64), distances[0].astype(np.int32)
+
+    def save(self) -> None:
+        """Save binary index to disk."""
+        with self._lock:
+            if self._index is None:
+                return
+            self._path.mkdir(parents=True, exist_ok=True)
+            faiss.write_index_binary(self._index, str(self._index_path))
+
+    def __len__(self) -> int:
+        with self._lock:
+            if self._index is None:
+                return 0
+            return self._index.ntotal
--- a/codex-lens-v2/src/codexlens/core/index.py
+++ b/codex-lens-v2/src/codexlens/core/index.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import logging
+import threading
+from pathlib import Path
+
+import numpy as np
+
+from codexlens.config import Config
+from codexlens.core.base import BaseANNIndex
+
+logger = logging.getLogger(__name__)
+
+try:
+    import hnswlib
+    _HNSWLIB_AVAILABLE = True
+except ImportError:
+    _HNSWLIB_AVAILABLE = False
+
+
+class ANNIndex(BaseANNIndex):
+    """HNSW-based approximate nearest neighbor index.
+
+    Lazy-loads on first use, thread-safe via RLock.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        if not _HNSWLIB_AVAILABLE:
+            raise ImportError("hnswlib is required. Install with: pip install hnswlib")
+
+        self._path = Path(path)
+        self._hnsw_path = self._path / "ann_index.hnsw"
+        self._dim = dim
+        self._config = config
+        self._lock = threading.RLock()
+        self._index: hnswlib.Index | None = None
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _ensure_loaded(self) -> None:
+        """Load or initialize the index (caller holds lock)."""
+        if self._index is not None:
+            return
+        self.load()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def load(self) -> None:
+        """Load index from disk or initialize a fresh one."""
+        with self._lock:
+            idx = hnswlib.Index(space="cosine", dim=self._dim)
+            if self._hnsw_path.exists():
+                idx.load_index(str(self._hnsw_path), max_elements=0)
+                idx.set_ef(self._config.hnsw_ef)
+                logger.debug("Loaded HNSW index from %s (%d items)", self._hnsw_path, idx.get_current_count())
+            else:
+                idx.init_index(
+                    max_elements=1000,
+                    ef_construction=self._config.hnsw_ef_construction,
+                    M=self._config.hnsw_M,
+                )
+                idx.set_ef(self._config.hnsw_ef)
+                logger.debug("Initialized fresh HNSW index (dim=%d)", self._dim)
+            self._index = idx
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors.
+
+        Does NOT call save() internally -- callers must call save()
+        explicitly after batch indexing.
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        vecs = np.ascontiguousarray(vectors, dtype=np.float32)
+
+        with self._lock:
+            self._ensure_loaded()
+            # Expand capacity if needed
+            current = self._index.get_current_count()
+            max_el = self._index.get_max_elements()
+            needed = current + len(ids)
+            if needed > max_el:
+                new_cap = max(max_el * 2, needed + 100)
+                self._index.resize_index(new_cap)
+            self._index.add_items(vecs, ids.astype(np.int64))
+
+    def fine_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search for nearest neighbors.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.ann_top_k
+
+        Returns:
+            (ids, distances) as numpy arrays
+        """
+        k = top_k if top_k is not None else self._config.ann_top_k
+
+        with self._lock:
+            self._ensure_loaded()
+
+            count = self._index.get_current_count()
+            if count == 0:
+                return np.array([], dtype=np.int64), np.array([], dtype=np.float32)
+
+            k = min(k, count)
+            self._index.set_ef(max(self._config.hnsw_ef, k))
+
+            q = np.ascontiguousarray(query_vec, dtype=np.float32).reshape(1, -1)
+            labels, distances = self._index.knn_query(q, k=k)
+            return labels[0].astype(np.int64), distances[0].astype(np.float32)
+
+    def save(self) -> None:
+        """Save index to disk (caller may or may not hold lock)."""
+        with self._lock:
+            if self._index is None:
+                return
+            self._path.mkdir(parents=True, exist_ok=True)
+            self._index.save_index(str(self._hnsw_path))
+
+    def __len__(self) -> int:
+        with self._lock:
+            if self._index is None:
+                return 0
+            return self._index.get_current_count()
--- a/codex-lens-v2/src/codexlens/embed/init.py
+++ b/codex-lens-v2/src/codexlens/embed/init.py
@@ -0,0 +1,4 @@
+from .base import BaseEmbedder
+from .local import FastEmbedEmbedder, EMBED_PROFILES
+
+__all__ = ["BaseEmbedder", "FastEmbedEmbedder", "EMBED_PROFILES"]
--- a/codex-lens-v2/src/codexlens/embed/base.py
+++ b/codex-lens-v2/src/codexlens/embed/base.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+import numpy as np
+
+
+class BaseEmbedder(ABC):
+    @abstractmethod
+    def embed_single(self, text: str) -> np.ndarray:
+        """Embed a single text, returns float32 ndarray shape (dim,)."""
+
+    @abstractmethod
+    def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
+        """Embed a list of texts, returns list of float32 ndarrays."""
--- a/codex-lens-v2/src/codexlens/embed/local.py
+++ b/codex-lens-v2/src/codexlens/embed/local.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import numpy as np
+
+from ..config import Config
+from .base import BaseEmbedder
+
+EMBED_PROFILES = {
+    "small": "BAAI/bge-small-en-v1.5",                    # 384d
+    "base": "BAAI/bge-base-en-v1.5",                      # 768d
+    "large": "BAAI/bge-large-en-v1.5",                    # 1024d
+    "code": "jinaai/jina-embeddings-v2-base-code",         # 768d
+}
+
+
+class FastEmbedEmbedder(BaseEmbedder):
+    """Embedder backed by fastembed.TextEmbedding with lazy model loading."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._model = None
+
+    def _load(self) -> None:
+        """Lazy-load the fastembed TextEmbedding model on first use."""
+        if self._model is not None:
+            return
+        from fastembed import TextEmbedding
+        providers = self._config.resolve_embed_providers()
+        try:
+            self._model = TextEmbedding(
+                model_name=self._config.embed_model,
+                providers=providers,
+            )
+        except TypeError:
+            # Older fastembed versions may not accept providers kwarg
+            self._model = TextEmbedding(model_name=self._config.embed_model)
+
+    def embed_single(self, text: str) -> np.ndarray:
+        """Embed a single text, returns float32 ndarray of shape (dim,)."""
+        self._load()
+        result = list(self._model.embed([text]))
+        return result[0].astype(np.float32)
+
+    def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
+        """Embed a list of texts in batches, returns list of float32 ndarrays."""
+        self._load()
+        batch_size = self._config.embed_batch_size
+        results: list[np.ndarray] = []
+        for start in range(0, len(texts), batch_size):
+            batch = texts[start : start + batch_size]
+            for vec in self._model.embed(batch):
+                results.append(vec.astype(np.float32))
+        return results
--- a/codex-lens-v2/src/codexlens/indexing/init.py
+++ b/codex-lens-v2/src/codexlens/indexing/init.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+from .pipeline import IndexingPipeline, IndexStats
+
+__all__ = ["IndexingPipeline", "IndexStats"]
--- a/codex-lens-v2/src/codexlens/indexing/pipeline.py
+++ b/codex-lens-v2/src/codexlens/indexing/pipeline.py
@@ -0,0 +1,277 @@
+"""Three-stage parallel indexing pipeline: chunk -> embed -> index.
+
+Uses threading.Thread with queue.Queue for producer-consumer handoff.
+The GIL is acceptable because embedding (onnxruntime) releases it in C extensions.
+"""
+from __future__ import annotations
+
+import logging
+import queue
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+
+from codexlens.config import Config
+from codexlens.core.binary import BinaryStore
+from codexlens.core.index import ANNIndex
+from codexlens.embed.base import BaseEmbedder
+from codexlens.search.fts import FTSEngine
+
+logger = logging.getLogger(__name__)
+
+# Sentinel value to signal worker shutdown
+_SENTINEL = None
+
+# Defaults for chunking (can be overridden via index_files kwargs)
+_DEFAULT_MAX_CHUNK_CHARS = 800
+_DEFAULT_CHUNK_OVERLAP = 100
+
+
+@dataclass
+class IndexStats:
+    """Statistics returned after indexing completes."""
+    files_processed: int = 0
+    chunks_created: int = 0
+    duration_seconds: float = 0.0
+
+
+class IndexingPipeline:
+    """Parallel 3-stage indexing pipeline with queue-based handoff.
+
+    Stage 1 (main thread): Read files, chunk text, push to embed_queue.
+    Stage 2 (embed worker): Pull text batches, call embed_batch(), push vectors to index_queue.
+    Stage 3 (index worker): Pull vectors+ids, call BinaryStore.add(), ANNIndex.add(), FTS.add_documents().
+
+    After all stages complete, save() is called on BinaryStore and ANNIndex exactly once.
+    """
+
+    def __init__(
+        self,
+        embedder: BaseEmbedder,
+        binary_store: BinaryStore,
+        ann_index: ANNIndex,
+        fts: FTSEngine,
+        config: Config,
+    ) -> None:
+        self._embedder = embedder
+        self._binary_store = binary_store
+        self._ann_index = ann_index
+        self._fts = fts
+        self._config = config
+
+    def index_files(
+        self,
+        files: list[Path],
+        *,
+        root: Path | None = None,
+        max_chunk_chars: int = _DEFAULT_MAX_CHUNK_CHARS,
+        chunk_overlap: int = _DEFAULT_CHUNK_OVERLAP,
+        max_file_size: int = 50_000,
+    ) -> IndexStats:
+        """Run the 3-stage pipeline on the given files.
+
+        Args:
+            files: List of file paths to index.
+            root: Optional root for computing relative paths. If None, uses
+                  each file's absolute path as its identifier.
+            max_chunk_chars: Maximum characters per chunk.
+            chunk_overlap: Character overlap between consecutive chunks.
+            max_file_size: Skip files larger than this (bytes).
+
+        Returns:
+            IndexStats with counts and timing.
+        """
+        if not files:
+            return IndexStats()
+
+        t0 = time.monotonic()
+
+        embed_queue: queue.Queue = queue.Queue(maxsize=4)
+        index_queue: queue.Queue = queue.Queue(maxsize=4)
+
+        # Track errors from workers
+        worker_errors: list[Exception] = []
+        error_lock = threading.Lock()
+
+        def _record_error(exc: Exception) -> None:
+            with error_lock:
+                worker_errors.append(exc)
+
+        # --- Start workers ---
+        embed_thread = threading.Thread(
+            target=self._embed_worker,
+            args=(embed_queue, index_queue, _record_error),
+            daemon=True,
+            name="indexing-embed",
+        )
+        index_thread = threading.Thread(
+            target=self._index_worker,
+            args=(index_queue, _record_error),
+            daemon=True,
+            name="indexing-index",
+        )
+        embed_thread.start()
+        index_thread.start()
+
+        # --- Stage 1: chunk files (main thread) ---
+        chunk_id = 0
+        files_processed = 0
+        chunks_created = 0
+
+        for fpath in files:
+            try:
+                if fpath.stat().st_size > max_file_size:
+                    continue
+                text = fpath.read_text(encoding="utf-8", errors="replace")
+            except Exception as exc:
+                logger.debug("Skipping %s: %s", fpath, exc)
+                continue
+
+            rel_path = str(fpath.relative_to(root)) if root else str(fpath)
+            file_chunks = self._chunk_text(text, rel_path, max_chunk_chars, chunk_overlap)
+
+            if not file_chunks:
+                continue
+
+            files_processed += 1
+
+            # Assign sequential IDs and push batch to embed queue
+            batch_ids = []
+            batch_texts = []
+            batch_paths = []
+            for chunk_text, path in file_chunks:
+                batch_ids.append(chunk_id)
+                batch_texts.append(chunk_text)
+                batch_paths.append(path)
+                chunk_id += 1
+
+            chunks_created += len(batch_ids)
+            embed_queue.put((batch_ids, batch_texts, batch_paths))
+
+        # Signal embed worker: no more data
+        embed_queue.put(_SENTINEL)
+
+        # Wait for workers to finish
+        embed_thread.join()
+        index_thread.join()
+
+        # --- Final flush ---
+        self._binary_store.save()
+        self._ann_index.save()
+
+        duration = time.monotonic() - t0
+        stats = IndexStats(
+            files_processed=files_processed,
+            chunks_created=chunks_created,
+            duration_seconds=round(duration, 2),
+        )
+
+        logger.info(
+            "Indexing complete: %d files, %d chunks in %.1fs",
+            stats.files_processed,
+            stats.chunks_created,
+            stats.duration_seconds,
+        )
+
+        # Raise first worker error if any occurred
+        if worker_errors:
+            raise worker_errors[0]
+
+        return stats
+
+    # ------------------------------------------------------------------
+    # Workers
+    # ------------------------------------------------------------------
+
+    def _embed_worker(
+        self,
+        in_q: queue.Queue,
+        out_q: queue.Queue,
+        on_error: callable,
+    ) -> None:
+        """Stage 2: Pull chunk batches, embed, push (ids, vecs, docs) to index queue."""
+        try:
+            while True:
+                item = in_q.get()
+                if item is _SENTINEL:
+                    break
+
+                batch_ids, batch_texts, batch_paths = item
+                try:
+                    vecs = self._embedder.embed_batch(batch_texts)
+                    vec_array = np.array(vecs, dtype=np.float32)
+                    id_array = np.array(batch_ids, dtype=np.int64)
+                    out_q.put((id_array, vec_array, batch_texts, batch_paths))
+                except Exception as exc:
+                    logger.error("Embed worker error: %s", exc)
+                    on_error(exc)
+        finally:
+            # Signal index worker: no more data
+            out_q.put(_SENTINEL)
+
+    def _index_worker(
+        self,
+        in_q: queue.Queue,
+        on_error: callable,
+    ) -> None:
+        """Stage 3: Pull (ids, vecs, texts, paths), write to stores."""
+        while True:
+            item = in_q.get()
+            if item is _SENTINEL:
+                break
+
+            id_array, vec_array, texts, paths = item
+            try:
+                self._binary_store.add(id_array, vec_array)
+                self._ann_index.add(id_array, vec_array)
+
+                fts_docs = [
+                    (int(id_array[i]), paths[i], texts[i])
+                    for i in range(len(id_array))
+                ]
+                self._fts.add_documents(fts_docs)
+            except Exception as exc:
+                logger.error("Index worker error: %s", exc)
+                on_error(exc)
+
+    # ------------------------------------------------------------------
+    # Chunking
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _chunk_text(
+        text: str,
+        path: str,
+        max_chars: int,
+        overlap: int,
+    ) -> list[tuple[str, str]]:
+        """Split file text into overlapping chunks.
+
+        Returns list of (chunk_text, path) tuples.
+        """
+        if not text.strip():
+            return []
+
+        chunks: list[tuple[str, str]] = []
+        lines = text.splitlines(keepends=True)
+        current: list[str] = []
+        current_len = 0
+
+        for line in lines:
+            if current_len + len(line) > max_chars and current:
+                chunk = "".join(current)
+                chunks.append((chunk, path))
+                # overlap: keep last N characters
+                tail = "".join(current)[-overlap:]
+                current = [tail] if tail else []
+                current_len = len(tail)
+            current.append(line)
+            current_len += len(line)
+
+        if current:
+            chunks.append(("".join(current), path))
+
+        return chunks
--- a/codex-lens-v2/src/codexlens/rerank/init.py
+++ b/codex-lens-v2/src/codexlens/rerank/init.py
@@ -0,0 +1,5 @@
+from .base import BaseReranker
+from .local import FastEmbedReranker
+from .api import APIReranker
+
+__all__ = ["BaseReranker", "FastEmbedReranker", "APIReranker"]
--- a/codex-lens-v2/src/codexlens/rerank/api.py
+++ b/codex-lens-v2/src/codexlens/rerank/api.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import logging
+import time
+
+import httpx
+
+from codexlens.config import Config
+from .base import BaseReranker
+
+logger = logging.getLogger(__name__)
+
+
+class APIReranker(BaseReranker):
+    """Reranker backed by a remote HTTP API (SiliconFlow/Cohere/Jina format)."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._client = httpx.Client(
+            headers={
+                "Authorization": f"Bearer {config.reranker_api_key}",
+                "Content-Type": "application/json",
+            },
+        )
+
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        if not documents:
+            return []
+        max_tokens = self._config.reranker_api_max_tokens_per_batch
+        batches = self._split_batches(documents, max_tokens)
+        scores = [0.0] * len(documents)
+        for batch in batches:
+            batch_scores = self._call_api_with_retry(query, batch)
+            for orig_idx, score in batch_scores.items():
+                scores[orig_idx] = score
+        return scores
+
+    def _split_batches(
+        self, documents: list[str], max_tokens: int
+    ) -> list[list[tuple[int, str]]]:
+        batches: list[list[tuple[int, str]]] = []
+        current_batch: list[tuple[int, str]] = []
+        current_tokens = 0
+
+        for idx, text in enumerate(documents):
+            doc_tokens = len(text) // 4
+            if current_tokens + doc_tokens > max_tokens and current_batch:
+                batches.append(current_batch)
+                current_batch = []
+                current_tokens = 0
+            current_batch.append((idx, text))
+            current_tokens += doc_tokens
+
+        if current_batch:
+            batches.append(current_batch)
+
+        return batches
+
+    def _call_api_with_retry(
+        self,
+        query: str,
+        docs: list[tuple[int, str]],
+        max_retries: int = 3,
+    ) -> dict[int, float]:
+        url = self._config.reranker_api_url.rstrip("/") + "/rerank"
+        payload = {
+            "model": self._config.reranker_api_model,
+            "query": query,
+            "documents": [t for _, t in docs],
+        }
+
+        last_exc: Exception | None = None
+        for attempt in range(max_retries):
+            try:
+                response = self._client.post(url, json=payload)
+            except Exception as exc:
+                last_exc = exc
+                time.sleep((2 ** attempt) * 0.5)
+                continue
+
+            if response.status_code in (429, 503):
+                logger.warning(
+                    "API reranker returned HTTP %s (attempt %d/%d), retrying...",
+                    response.status_code,
+                    attempt + 1,
+                    max_retries,
+                )
+                time.sleep((2 ** attempt) * 0.5)
+                continue
+
+            response.raise_for_status()
+            data = response.json()
+            results = data.get("results", [])
+            scores: dict[int, float] = {}
+            for item in results:
+                local_idx = int(item["index"])
+                orig_idx = docs[local_idx][0]
+                scores[orig_idx] = float(item["relevance_score"])
+            return scores
+
+        raise RuntimeError(
+            f"API reranker failed after {max_retries} attempts. Last error: {last_exc}"
+        )
--- a/codex-lens-v2/src/codexlens/rerank/base.py
+++ b/codex-lens-v2/src/codexlens/rerank/base.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+
+
+class BaseReranker(ABC):
+    @abstractmethod
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        """Score (query, doc) pairs. Returns list of floats same length as documents."""
--- a/codex-lens-v2/src/codexlens/rerank/local.py
+++ b/codex-lens-v2/src/codexlens/rerank/local.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from codexlens.config import Config
+from .base import BaseReranker
+
+
+class FastEmbedReranker(BaseReranker):
+    """Local reranker backed by fastembed TextCrossEncoder."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._model = None
+
+    def _load(self) -> None:
+        if self._model is None:
+            from fastembed.rerank.cross_encoder import TextCrossEncoder
+            self._model = TextCrossEncoder(model_name=self._config.reranker_model)
+
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        self._load()
+        results = list(self._model.rerank(query, documents))
+        scores = [0.0] * len(documents)
+        for r in results:
+            scores[r.index] = float(r.score)
+        return scores
--- a/codex-lens-v2/src/codexlens/search/init.py
+++ b/codex-lens-v2/src/codexlens/search/init.py
@@ -0,0 +1,8 @@
+from .fts import FTSEngine
+from .fusion import reciprocal_rank_fusion, detect_query_intent, QueryIntent, DEFAULT_WEIGHTS
+from .pipeline import SearchPipeline, SearchResult
+
+__all__ = [
+    "FTSEngine", "reciprocal_rank_fusion", "detect_query_intent",
+    "QueryIntent", "DEFAULT_WEIGHTS", "SearchPipeline", "SearchResult",
+]
--- a/codex-lens-v2/src/codexlens/search/fts.py
+++ b/codex-lens-v2/src/codexlens/search/fts.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+
+class FTSEngine:
+    def __init__(self, db_path: str | Path) -> None:
+        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
+        self._conn.execute(
+            "CREATE VIRTUAL TABLE IF NOT EXISTS docs "
+            "USING fts5(content, tokenize='porter unicode61')"
+        )
+        self._conn.execute(
+            "CREATE TABLE IF NOT EXISTS docs_meta "
+            "(id INTEGER PRIMARY KEY, path TEXT)"
+        )
+        self._conn.commit()
+
+    def add_documents(self, docs: list[tuple[int, str, str]]) -> None:
+        """Add documents in batch. docs: list of (id, path, content)."""
+        if not docs:
+            return
+        self._conn.executemany(
+            "INSERT OR REPLACE INTO docs_meta (id, path) VALUES (?, ?)",
+            [(doc_id, path) for doc_id, path, content in docs],
+        )
+        self._conn.executemany(
+            "INSERT OR REPLACE INTO docs (rowid, content) VALUES (?, ?)",
+            [(doc_id, content) for doc_id, path, content in docs],
+        )
+        self._conn.commit()
+
+    def exact_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
+        """FTS5 MATCH query, return (id, bm25_score) sorted by score descending."""
+        try:
+            rows = self._conn.execute(
+                "SELECT rowid, bm25(docs) AS score FROM docs "
+                "WHERE docs MATCH ? ORDER BY score LIMIT ?",
+                (query, top_k),
+            ).fetchall()
+        except sqlite3.OperationalError:
+            return []
+        # bm25 in SQLite FTS5 returns negative values (lower = better match)
+        # Negate so higher is better
+        return [(int(row[0]), -float(row[1])) for row in rows]
+
+    def fuzzy_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
+        """Prefix search: each token + '*', return (id, score) sorted descending."""
+        tokens = query.strip().split()
+        if not tokens:
+            return []
+        prefix_query = " ".join(t + "*" for t in tokens)
+        try:
+            rows = self._conn.execute(
+                "SELECT rowid, bm25(docs) AS score FROM docs "
+                "WHERE docs MATCH ? ORDER BY score LIMIT ?",
+                (prefix_query, top_k),
+            ).fetchall()
+        except sqlite3.OperationalError:
+            return []
+        return [(int(row[0]), -float(row[1])) for row in rows]
+
+    def get_content(self, doc_id: int) -> str:
+        """Retrieve content for a doc_id."""
+        row = self._conn.execute(
+            "SELECT content FROM docs WHERE rowid = ?", (doc_id,)
+        ).fetchone()
+        return row[0] if row else ""
--- a/codex-lens-v2/src/codexlens/search/fusion.py
+++ b/codex-lens-v2/src/codexlens/search/fusion.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+import re
+from enum import Enum
+
+DEFAULT_WEIGHTS: dict[str, float] = {
+    "exact": 0.25,
+    "fuzzy": 0.10,
+    "vector": 0.50,
+    "graph": 0.15,
+}
+
+_CODE_CAMEL_RE = re.compile(r"[a-z][A-Z]")
+_CODE_SNAKE_RE = re.compile(r"\b[a-z_]+_[a-z_]+\b")
+_CODE_SYMBOLS_RE = re.compile(r"[.\[\](){}]|->|::")
+_CODE_KEYWORDS_RE = re.compile(r"\b(import|def|class|return|from|async|await|lambda|yield)\b")
+_QUESTION_WORDS_RE = re.compile(r"\b(how|what|why|when|where|which|who|does|do|is|are|can|should)\b", re.IGNORECASE)
+
+
+class QueryIntent(Enum):
+    CODE_SYMBOL = "code_symbol"
+    NATURAL_LANGUAGE = "natural"
+    MIXED = "mixed"
+
+
+def detect_query_intent(query: str) -> QueryIntent:
+    """Detect whether query is a code symbol, natural language, or mixed."""
+    words = query.strip().split()
+    word_count = len(words)
+
+    code_signals = 0
+    natural_signals = 0
+
+    if _CODE_CAMEL_RE.search(query):
+        code_signals += 2
+    if _CODE_SNAKE_RE.search(query):
+        code_signals += 2
+    if _CODE_SYMBOLS_RE.search(query):
+        code_signals += 2
+    if _CODE_KEYWORDS_RE.search(query):
+        code_signals += 2
+    if "`" in query:
+        code_signals += 1
+    if word_count < 4:
+        code_signals += 1
+
+    if _QUESTION_WORDS_RE.search(query):
+        natural_signals += 2
+    if word_count > 5:
+        natural_signals += 2
+    if code_signals == 0 and word_count >= 3:
+        natural_signals += 1
+
+    if code_signals >= 2 and natural_signals == 0:
+        return QueryIntent.CODE_SYMBOL
+    if natural_signals >= 2 and code_signals == 0:
+        return QueryIntent.NATURAL_LANGUAGE
+    if code_signals >= 2 and natural_signals == 0:
+        return QueryIntent.CODE_SYMBOL
+    if natural_signals > code_signals:
+        return QueryIntent.NATURAL_LANGUAGE
+    if code_signals > natural_signals:
+        return QueryIntent.CODE_SYMBOL
+    return QueryIntent.MIXED
+
+
+def get_adaptive_weights(intent: QueryIntent, base: dict | None = None) -> dict[str, float]:
+    """Return weights adapted to query intent."""
+    weights = dict(base or DEFAULT_WEIGHTS)
+    if intent == QueryIntent.CODE_SYMBOL:
+        weights["exact"] = 0.45
+        weights["vector"] = 0.35
+    elif intent == QueryIntent.NATURAL_LANGUAGE:
+        weights["vector"] = 0.65
+        weights["exact"] = 0.15
+    # MIXED: use weights as-is
+    return weights
+
+
+def reciprocal_rank_fusion(
+    results: dict[str, list[tuple[int, float]]],
+    weights: dict[str, float] | None = None,
+    k: int = 60,
+) -> list[tuple[int, float]]:
+    """Fuse ranked result lists using Reciprocal Rank Fusion.
+
+    results: {source_name: [(doc_id, score), ...]} each list sorted desc by score.
+    weights: weight per source (defaults to equal weight across all sources).
+    k: RRF constant (default 60).
+    Returns sorted list of (doc_id, fused_score) descending.
+    """
+    if not results:
+        return []
+
+    sources = list(results.keys())
+    if weights is None:
+        equal_w = 1.0 / len(sources)
+        weights = {s: equal_w for s in sources}
+
+    scores: dict[int, float] = {}
+    for source, ranked_list in results.items():
+        w = weights.get(source, 0.0)
+        for rank, (doc_id, _) in enumerate(ranked_list, start=1):
+            scores[doc_id] = scores.get(doc_id, 0.0) + w * (1.0 / (k + rank))
+
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
--- a/codex-lens-v2/src/codexlens/search/pipeline.py
+++ b/codex-lens-v2/src/codexlens/search/pipeline.py
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+
+import numpy as np
+
+from ..config import Config
+from ..core import ANNIndex, BinaryStore
+from ..embed import BaseEmbedder
+from ..rerank import BaseReranker
+from .fts import FTSEngine
+from .fusion import (
+    DEFAULT_WEIGHTS,
+    detect_query_intent,
+    get_adaptive_weights,
+    reciprocal_rank_fusion,
+)
+
+_log = logging.getLogger(__name__)
+
+
+@dataclass
+class SearchResult:
+    id: int
+    path: str
+    score: float
+    snippet: str = ""
+
+
+class SearchPipeline:
+    def __init__(
+        self,
+        embedder: BaseEmbedder,
+        binary_store: BinaryStore,
+        ann_index: ANNIndex,
+        reranker: BaseReranker,
+        fts: FTSEngine,
+        config: Config,
+    ) -> None:
+        self._embedder = embedder
+        self._binary_store = binary_store
+        self._ann_index = ann_index
+        self._reranker = reranker
+        self._fts = fts
+        self._config = config
+
+    # -- Helper: vector search (binary coarse + ANN fine) -----------------
+
+    def _vector_search(
+        self, query_vec: np.ndarray
+    ) -> list[tuple[int, float]]:
+        """Run binary coarse search then ANN fine search and intersect."""
+        cfg = self._config
+
+        # Binary coarse search -> candidate_ids set
+        candidate_ids_list, _ = self._binary_store.coarse_search(
+            query_vec, top_k=cfg.binary_top_k
+        )
+        candidate_ids = set(candidate_ids_list)
+
+        # ANN fine search on full index, then intersect with binary candidates
+        ann_ids, ann_scores = self._ann_index.fine_search(
+            query_vec, top_k=cfg.ann_top_k
+        )
+        # Keep only results that appear in binary candidates (2-stage funnel)
+        vector_results: list[tuple[int, float]] = [
+            (int(doc_id), float(score))
+            for doc_id, score in zip(ann_ids, ann_scores)
+            if int(doc_id) in candidate_ids
+        ]
+        # Fall back to full ANN results if intersection is empty
+        if not vector_results:
+            vector_results = [
+                (int(doc_id), float(score))
+                for doc_id, score in zip(ann_ids, ann_scores)
+            ]
+        return vector_results
+
+    # -- Helper: FTS search (exact + fuzzy) ------------------------------
+
+    def _fts_search(
+        self, query: str
+    ) -> tuple[list[tuple[int, float]], list[tuple[int, float]]]:
+        """Run exact and fuzzy full-text search."""
+        cfg = self._config
+        exact_results = self._fts.exact_search(query, top_k=cfg.fts_top_k)
+        fuzzy_results = self._fts.fuzzy_search(query, top_k=cfg.fts_top_k)
+        return exact_results, fuzzy_results
+
+    # -- Main search entry point -----------------------------------------
+
+    def search(self, query: str, top_k: int | None = None) -> list[SearchResult]:
+        cfg = self._config
+        final_top_k = top_k if top_k is not None else cfg.reranker_top_k
+
+        # 1. Detect intent -> adaptive weights
+        intent = detect_query_intent(query)
+        weights = get_adaptive_weights(intent, cfg.fusion_weights)
+
+        # 2. Embed query
+        query_vec = self._embedder.embed([query])[0]
+
+        # 3. Parallel vector + FTS search
+        vector_results: list[tuple[int, float]] = []
+        exact_results: list[tuple[int, float]] = []
+        fuzzy_results: list[tuple[int, float]] = []
+
+        with ThreadPoolExecutor(max_workers=2) as pool:
+            vec_future = pool.submit(self._vector_search, query_vec)
+            fts_future = pool.submit(self._fts_search, query)
+
+            # Collect vector results
+            try:
+                vector_results = vec_future.result()
+            except Exception:
+                _log.warning("Vector search failed, using empty results", exc_info=True)
+
+            # Collect FTS results
+            try:
+                exact_results, fuzzy_results = fts_future.result()
+            except Exception:
+                _log.warning("FTS search failed, using empty results", exc_info=True)
+
+        # 4. RRF fusion
+        fusion_input: dict[str, list[tuple[int, float]]] = {}
+        if vector_results:
+            fusion_input["vector"] = vector_results
+        if exact_results:
+            fusion_input["exact"] = exact_results
+        if fuzzy_results:
+            fusion_input["fuzzy"] = fuzzy_results
+
+        if not fusion_input:
+            return []
+
+        fused = reciprocal_rank_fusion(fusion_input, weights=weights, k=cfg.fusion_k)
+
+        # 5. Rerank top candidates
+        rerank_ids = [doc_id for doc_id, _ in fused[:50]]
+        contents = [self._fts.get_content(doc_id) for doc_id in rerank_ids]
+        rerank_scores = self._reranker.score_pairs(query, contents)
+
+        # 6. Sort by rerank score, build SearchResult list
+        ranked = sorted(
+            zip(rerank_ids, rerank_scores), key=lambda x: x[1], reverse=True
+        )
+
+        results: list[SearchResult] = []
+        for doc_id, score in ranked[:final_top_k]:
+            path = self._fts._conn.execute(
+                "SELECT path FROM docs_meta WHERE id = ?", (doc_id,)
+            ).fetchone()
+            results.append(
+                SearchResult(
+                    id=doc_id,
+                    path=path[0] if path else "",
+                    score=float(score),
+                    snippet=self._fts.get_content(doc_id)[:200],
+                )
+            )
+        return results
--- a/codex-lens-v2/tests/init.py
+++ b/codex-lens-v2/tests/init.py
--- a/codex-lens-v2/tests/integration/init.py
+++ b/codex-lens-v2/tests/integration/init.py
--- a/codex-lens-v2/tests/integration/conftest.py
+++ b/codex-lens-v2/tests/integration/conftest.py
@@ -0,0 +1,108 @@
+import pytest
+import numpy as np
+import tempfile
+from pathlib import Path
+
+from codexlens.config import Config
+from codexlens.core import ANNIndex, BinaryStore
+from codexlens.embed.base import BaseEmbedder
+from codexlens.rerank.base import BaseReranker
+from codexlens.search.fts import FTSEngine
+from codexlens.search.pipeline import SearchPipeline
+
+# Test documents: 20 code snippets with id, path, content
+TEST_DOCS = [
+    (0, "auth.py", "def authenticate(user, password): return check_hash(password, user.hash)"),
+    (1, "auth.py", "def authorize(user, permission): return permission in user.roles"),
+    (2, "models.py", "class User: def __init__(self, name, email): self.name = name; self.email = email"),
+    (3, "models.py", "class Session: token = None; expires_at = None"),
+    (4, "middleware.py", "def auth_middleware(request): token = request.headers.get('Authorization')"),
+    (5, "utils.py", "def hash_password(password): import bcrypt; return bcrypt.hashpw(password)"),
+    (6, "config.py", "DATABASE_URL = os.environ.get('DATABASE_URL', 'sqlite:///db.sqlite3')"),
+    (7, "search.py", "def search_users(query): return User.objects.filter(name__icontains=query)"),
+    (8, "api.py", "def get_user(request, user_id): user = User.objects.get(id=user_id)"),
+    (9, "api.py", "def create_user(request): data = request.json(); user = User(**data)"),
+    (10, "tests.py", "def test_authenticate(): assert authenticate('admin', 'pass') is not None"),
+    (11, "tests.py", "def test_search(): results = search_users('alice'); assert len(results) > 0"),
+    (12, "router.py", "app.route('/users', methods=['GET'])(list_users)"),
+    (13, "router.py", "app.route('/login', methods=['POST'])(login_handler)"),
+    (14, "db.py", "def get_connection(): return sqlite3.connect(DATABASE_URL)"),
+    (15, "cache.py", "def cache_get(key): return redis_client.get(key)"),
+    (16, "cache.py", "def cache_set(key, value, ttl=3600): redis_client.setex(key, ttl, value)"),
+    (17, "errors.py", "class AuthError(Exception): status_code = 401"),
+    (18, "errors.py", "class NotFoundError(Exception): status_code = 404"),
+    (19, "validators.py", "def validate_email(email): return '@' in email and '.' in email.split('@')[1]"),
+]
+
+DIM = 32  # Use small dim for fast tests
+
+
+def make_stable_vec(doc_id: int, dim: int = DIM) -> np.ndarray:
+    """Generate a deterministic float32 vector for a given doc_id."""
+    rng = np.random.default_rng(seed=doc_id)
+    vec = rng.standard_normal(dim).astype(np.float32)
+    vec /= np.linalg.norm(vec)
+    return vec
+
+
+class MockEmbedder(BaseEmbedder):
+    """Returns stable deterministic vectors based on content hash."""
+
+    def embed_single(self, text: str) -> np.ndarray:
+        seed = hash(text) % (2**31)
+        rng = np.random.default_rng(seed=seed)
+        vec = rng.standard_normal(DIM).astype(np.float32)
+        vec /= np.linalg.norm(vec)
+        return vec
+
+    def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
+        return [self.embed_single(t) for t in texts]
+
+    def embed(self, texts: list[str]) -> list[np.ndarray]:
+        """Called by SearchPipeline as self._embedder.embed([query])[0]."""
+        return self.embed_batch(texts)
+
+
+class MockReranker(BaseReranker):
+    """Returns score based on simple keyword overlap."""
+
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        query_words = set(query.lower().split())
+        scores = []
+        for doc in documents:
+            doc_words = set(doc.lower().split())
+            overlap = len(query_words & doc_words)
+            scores.append(float(overlap) / max(len(query_words), 1))
+        return scores
+
+
+@pytest.fixture
+def config():
+    return Config.small()  # hnsw_ef=50, hnsw_M=16, binary_top_k=50, ann_top_k=20, rerank_top_k=10
+
+
+@pytest.fixture
+def search_pipeline(tmp_path, config):
+    """Build a full SearchPipeline with 20 test docs indexed."""
+    embedder = MockEmbedder()
+    binary_store = BinaryStore(tmp_path / "binary", dim=DIM, config=config)
+    ann_index = ANNIndex(tmp_path / "ann.hnsw", dim=DIM, config=config)
+    fts = FTSEngine(tmp_path / "fts.db")
+    reranker = MockReranker()
+
+    # Index all test docs
+    ids = np.array([d[0] for d in TEST_DOCS], dtype=np.int64)
+    vectors = np.array([embedder.embed_single(d[2]) for d in TEST_DOCS], dtype=np.float32)
+
+    binary_store.add(ids, vectors)
+    ann_index.add(ids, vectors)
+    fts.add_documents(TEST_DOCS)
+
+    return SearchPipeline(
+        embedder=embedder,
+        binary_store=binary_store,
+        ann_index=ann_index,
+        reranker=reranker,
+        fts=fts,
+        config=config,
+    )
--- a/codex-lens-v2/tests/integration/test_search_pipeline.py
+++ b/codex-lens-v2/tests/integration/test_search_pipeline.py
@@ -0,0 +1,44 @@
+"""Integration tests for SearchPipeline using real components and mock embedder/reranker."""
+from __future__ import annotations
+
+
+def test_vector_search_returns_results(search_pipeline):
+    results = search_pipeline.search("authentication middleware")
+    assert len(results) > 0
+    assert all(isinstance(r.score, float) for r in results)
+
+
+def test_exact_keyword_search(search_pipeline):
+    results = search_pipeline.search("authenticate")
+    assert len(results) > 0
+    result_ids = {r.id for r in results}
+    # Doc 0 and 10 both contain "authenticate"
+    assert result_ids & {0, 10}, f"Expected doc 0 or 10 in results, got {result_ids}"
+
+
+def test_pipeline_top_k_limit(search_pipeline):
+    results = search_pipeline.search("user", top_k=5)
+    assert len(results) <= 5
+
+
+def test_search_result_fields_populated(search_pipeline):
+    results = search_pipeline.search("password")
+    assert len(results) > 0
+    for r in results:
+        assert r.id >= 0
+        assert r.score >= 0
+        assert isinstance(r.path, str)
+
+
+def test_empty_query_handled(search_pipeline):
+    results = search_pipeline.search("")
+    assert isinstance(results, list)  # no exception
+
+
+def test_different_queries_give_different_results(search_pipeline):
+    r1 = search_pipeline.search("authenticate user")
+    r2 = search_pipeline.search("cache redis")
+    # Results should differ (different top IDs or scores), unless both are empty
+    ids1 = [r.id for r in r1]
+    ids2 = [r.id for r in r2]
+    assert ids1 != ids2 or len(r1) == 0
--- a/codex-lens-v2/tests/unit/init.py
+++ b/codex-lens-v2/tests/unit/init.py
--- a/codex-lens-v2/tests/unit/test_config.py
+++ b/codex-lens-v2/tests/unit/test_config.py
@@ -0,0 +1,31 @@
+from codexlens.config import Config
+
+
+def test_config_instantiates_no_args():
+    cfg = Config()
+    assert cfg is not None
+
+
+def test_defaults_hnsw_ef():
+    cfg = Config.defaults()
+    assert cfg.hnsw_ef == 150
+
+
+def test_defaults_hnsw_M():
+    cfg = Config.defaults()
+    assert cfg.hnsw_M == 32
+
+
+def test_small_hnsw_ef():
+    cfg = Config.small()
+    assert cfg.hnsw_ef == 50
+
+
+def test_custom_instantiation():
+    cfg = Config(hnsw_ef=100)
+    assert cfg.hnsw_ef == 100
+
+
+def test_fusion_weights_keys():
+    cfg = Config()
+    assert set(cfg.fusion_weights.keys()) == {"exact", "fuzzy", "vector", "graph"}
--- a/codex-lens-v2/tests/unit/test_core.py
+++ b/codex-lens-v2/tests/unit/test_core.py
@@ -0,0 +1,136 @@
+"""Unit tests for BinaryStore and ANNIndex (no fastembed required)."""
+from __future__ import annotations
+
+import concurrent.futures
+import tempfile
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from codexlens.config import Config
+from codexlens.core import ANNIndex, BinaryStore
+
+
+DIM = 32
+RNG = np.random.default_rng(42)
+
+
+def make_vectors(n: int, dim: int = DIM) -> np.ndarray:
+    return RNG.standard_normal((n, dim)).astype(np.float32)
+
+
+def make_ids(n: int, start: int = 0) -> np.ndarray:
+    return np.arange(start, start + n, dtype=np.int64)
+
+
+# ---------------------------------------------------------------------------
+# BinaryStore tests
+# ---------------------------------------------------------------------------
+
+
+class TestBinaryStore:
+    def test_binary_store_add_and_search(self, tmp_path: Path) -> None:
+        cfg = Config.small()
+        store = BinaryStore(tmp_path, DIM, cfg)
+        vecs = make_vectors(10)
+        ids = make_ids(10)
+        store.add(ids, vecs)
+
+        assert len(store) == 10
+
+        top_k = 5
+        ret_ids, ret_dists = store.coarse_search(vecs[0], top_k=top_k)
+        assert ret_ids.shape == (top_k,)
+        assert ret_dists.shape == (top_k,)
+        # distances are non-negative integers
+        assert (ret_dists >= 0).all()
+
+    def test_binary_hamming_correctness(self, tmp_path: Path) -> None:
+        cfg = Config.small()
+        store = BinaryStore(tmp_path, DIM, cfg)
+        vecs = make_vectors(20)
+        ids = make_ids(20)
+        store.add(ids, vecs)
+
+        # Query with the exact stored vector; it must be the top-1 result
+        query = vecs[7]
+        ret_ids, ret_dists = store.coarse_search(query, top_k=1)
+        assert ret_ids[0] == 7
+        assert ret_dists[0] == 0  # Hamming distance to itself is 0
+
+    def test_binary_store_persist(self, tmp_path: Path) -> None:
+        cfg = Config.small()
+        store = BinaryStore(tmp_path, DIM, cfg)
+        vecs = make_vectors(15)
+        ids = make_ids(15)
+        store.add(ids, vecs)
+        store.save()
+
+        # Load into a fresh instance
+        store2 = BinaryStore(tmp_path, DIM, cfg)
+        assert len(store2) == 15
+
+        query = vecs[3]
+        ret_ids, ret_dists = store2.coarse_search(query, top_k=1)
+        assert ret_ids[0] == 3
+        assert ret_dists[0] == 0
+
+
+# ---------------------------------------------------------------------------
+# ANNIndex tests
+# ---------------------------------------------------------------------------
+
+
+class TestANNIndex:
+    def test_ann_index_add_and_search(self, tmp_path: Path) -> None:
+        cfg = Config.small()
+        idx = ANNIndex(tmp_path, DIM, cfg)
+        vecs = make_vectors(50)
+        ids = make_ids(50)
+        idx.add(ids, vecs)
+
+        assert len(idx) == 50
+
+        ret_ids, ret_dists = idx.fine_search(vecs[0], top_k=5)
+        assert len(ret_ids) == 5
+        assert len(ret_dists) == 5
+
+    def test_ann_index_thread_safety(self, tmp_path: Path) -> None:
+        cfg = Config.small()
+        idx = ANNIndex(tmp_path, DIM, cfg)
+        vecs = make_vectors(50)
+        ids = make_ids(50)
+        idx.add(ids, vecs)
+
+        query = vecs[0]
+        errors: list[Exception] = []
+
+        def search() -> None:
+            try:
+                idx.fine_search(query, top_k=3)
+            except Exception as exc:
+                errors.append(exc)
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as pool:
+            futures = [pool.submit(search) for _ in range(5)]
+            concurrent.futures.wait(futures)
+
+        assert errors == [], f"Thread safety errors: {errors}"
+
+    def test_ann_index_save_load(self, tmp_path: Path) -> None:
+        cfg = Config.small()
+        idx = ANNIndex(tmp_path, DIM, cfg)
+        vecs = make_vectors(30)
+        ids = make_ids(30)
+        idx.add(ids, vecs)
+        idx.save()
+
+        # Load into a fresh instance
+        idx2 = ANNIndex(tmp_path, DIM, cfg)
+        idx2.load()
+        assert len(idx2) == 30
+
+        ret_ids, ret_dists = idx2.fine_search(vecs[10], top_k=1)
+        assert len(ret_ids) == 1
+        assert ret_ids[0] == 10
--- a/codex-lens-v2/tests/unit/test_embed.py
+++ b/codex-lens-v2/tests/unit/test_embed.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import sys
+import types
+import unittest
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+
+
+def _make_fastembed_mock():
+    """Build a minimal fastembed stub so imports succeed without the real package."""
+    fastembed_mod = types.ModuleType("fastembed")
+    fastembed_mod.TextEmbedding = MagicMock()
+    sys.modules.setdefault("fastembed", fastembed_mod)
+    return fastembed_mod
+
+
+_make_fastembed_mock()
+
+from codexlens.config import Config  # noqa: E402
+from codexlens.embed.base import BaseEmbedder  # noqa: E402
+from codexlens.embed.local import EMBED_PROFILES, FastEmbedEmbedder  # noqa: E402
+
+
+class TestEmbedSingle(unittest.TestCase):
+    def test_embed_single_returns_float32_ndarray(self):
+        config = Config()
+        embedder = FastEmbedEmbedder(config)
+
+        mock_model = MagicMock()
+        mock_model.embed.return_value = iter([np.ones(384, dtype=np.float64)])
+
+        # Inject mock model directly to bypass lazy load (no real fastembed needed)
+        embedder._model = mock_model
+        result = embedder.embed_single("hello world")
+
+        self.assertIsInstance(result, np.ndarray)
+        self.assertEqual(result.dtype, np.float32)
+        self.assertEqual(result.shape, (384,))
+
+
+class TestEmbedBatch(unittest.TestCase):
+    def test_embed_batch_returns_list(self):
+        config = Config()
+        embedder = FastEmbedEmbedder(config)
+
+        vecs = [np.ones(384, dtype=np.float64) * i for i in range(3)]
+        mock_model = MagicMock()
+        mock_model.embed.return_value = iter(vecs)
+
+        embedder._model = mock_model
+        result = embedder.embed_batch(["a", "b", "c"])
+
+        self.assertIsInstance(result, list)
+        self.assertEqual(len(result), 3)
+        for arr in result:
+            self.assertIsInstance(arr, np.ndarray)
+            self.assertEqual(arr.dtype, np.float32)
+
+
+class TestEmbedProfiles(unittest.TestCase):
+    def test_embed_profiles_all_have_valid_keys(self):
+        expected_keys = {"small", "base", "large", "code"}
+        self.assertEqual(set(EMBED_PROFILES.keys()), expected_keys)
+
+    def test_embed_profiles_model_ids_non_empty(self):
+        for key, model_id in EMBED_PROFILES.items():
+            self.assertIsInstance(model_id, str, msg=f"{key} model id should be str")
+            self.assertTrue(len(model_id) > 0, msg=f"{key} model id should be non-empty")
+
+
+class TestBaseEmbedderAbstract(unittest.TestCase):
+    def test_base_embedder_is_abstract(self):
+        with self.assertRaises(TypeError):
+            BaseEmbedder()  # type: ignore[abstract]
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/codex-lens-v2/tests/unit/test_rerank.py
+++ b/codex-lens-v2/tests/unit/test_rerank.py
@@ -0,0 +1,179 @@
+from __future__ import annotations
+
+import types
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from codexlens.config import Config
+from codexlens.rerank.base import BaseReranker
+from codexlens.rerank.local import FastEmbedReranker
+from codexlens.rerank.api import APIReranker
+
+
+# ---------------------------------------------------------------------------
+# BaseReranker
+# ---------------------------------------------------------------------------
+
+def test_base_reranker_is_abstract():
+    with pytest.raises(TypeError):
+        BaseReranker()  # type: ignore[abstract]
+
+
+# ---------------------------------------------------------------------------
+# FastEmbedReranker
+# ---------------------------------------------------------------------------
+
+def _make_rerank_result(index: int, score: float) -> object:
+    obj = types.SimpleNamespace(index=index, score=score)
+    return obj
+
+
+def test_local_reranker_score_pairs_length():
+    config = Config()
+    reranker = FastEmbedReranker(config)
+
+    mock_results = [
+        _make_rerank_result(0, 0.9),
+        _make_rerank_result(1, 0.5),
+        _make_rerank_result(2, 0.1),
+    ]
+
+    mock_model = MagicMock()
+    mock_model.rerank.return_value = iter(mock_results)
+    reranker._model = mock_model
+
+    docs = ["doc0", "doc1", "doc2"]
+    scores = reranker.score_pairs("query", docs)
+
+    assert len(scores) == 3
+
+
+def test_local_reranker_preserves_order():
+    config = Config()
+    reranker = FastEmbedReranker(config)
+
+    # rerank returns results in reverse order (index 2, 1, 0)
+    mock_results = [
+        _make_rerank_result(2, 0.1),
+        _make_rerank_result(1, 0.5),
+        _make_rerank_result(0, 0.9),
+    ]
+
+    mock_model = MagicMock()
+    mock_model.rerank.return_value = iter(mock_results)
+    reranker._model = mock_model
+
+    docs = ["doc0", "doc1", "doc2"]
+    scores = reranker.score_pairs("query", docs)
+
+    assert scores[0] == pytest.approx(0.9)
+    assert scores[1] == pytest.approx(0.5)
+    assert scores[2] == pytest.approx(0.1)
+
+
+# ---------------------------------------------------------------------------
+# APIReranker
+# ---------------------------------------------------------------------------
+
+def _make_config(max_tokens_per_batch: int = 512) -> Config:
+    return Config(
+        reranker_api_url="https://api.example.com",
+        reranker_api_key="test-key",
+        reranker_api_model="test-model",
+        reranker_api_max_tokens_per_batch=max_tokens_per_batch,
+    )
+
+
+def test_api_reranker_batch_splitting():
+    config = _make_config(max_tokens_per_batch=512)
+
+    with patch("httpx.Client"):
+        reranker = APIReranker(config)
+
+    # 10 docs, each ~200 tokens (800 chars)
+    docs = ["x" * 800] * 10
+    batches = reranker._split_batches(docs, max_tokens=512)
+
+    # Each doc is 200 tokens; batches should have at most 2 docs (200+200=400 <= 512, 400+200=600 > 512)
+    assert len(batches) > 1
+    for batch in batches:
+        total = sum(len(text) // 4 for _, text in batch)
+        assert total <= 512 or len(batch) == 1
+
+
+def test_api_reranker_retry_on_429():
+    config = _make_config()
+
+    mock_429 = MagicMock()
+    mock_429.status_code = 429
+
+    mock_200 = MagicMock()
+    mock_200.status_code = 200
+    mock_200.json.return_value = {
+        "results": [
+            {"index": 0, "relevance_score": 0.8},
+            {"index": 1, "relevance_score": 0.3},
+        ]
+    }
+    mock_200.raise_for_status = MagicMock()
+
+    with patch("httpx.Client") as mock_client_cls:
+        mock_client = MagicMock()
+        mock_client_cls.return_value = mock_client
+        mock_client.post.side_effect = [mock_429, mock_429, mock_200]
+
+        reranker = APIReranker(config)
+
+        with patch("time.sleep"):
+            result = reranker._call_api_with_retry(
+                "query",
+                [(0, "doc0"), (1, "doc1")],
+                max_retries=3,
+            )
+
+    assert mock_client.post.call_count == 3
+    assert 0 in result
+    assert 1 in result
+
+
+def test_api_reranker_merge_batches():
+    config = _make_config(max_tokens_per_batch=100)
+
+    # 4 docs of 25 tokens each (100 chars); each batch holds at most 4 docs
+    # Use smaller docs to force 2 batches: 2 docs per batch (50 tokens each = 200 chars)
+    docs = ["x" * 200] * 4  # 50 tokens each; 50+50=100 <= 100, 100+50=150 > 100 -> 2 per batch
+
+    batch0_response = MagicMock()
+    batch0_response.status_code = 200
+    batch0_response.json.return_value = {
+        "results": [
+            {"index": 0, "relevance_score": 0.9},
+            {"index": 1, "relevance_score": 0.8},
+        ]
+    }
+    batch0_response.raise_for_status = MagicMock()
+
+    batch1_response = MagicMock()
+    batch1_response.status_code = 200
+    batch1_response.json.return_value = {
+        "results": [
+            {"index": 0, "relevance_score": 0.7},
+            {"index": 1, "relevance_score": 0.6},
+        ]
+    }
+    batch1_response.raise_for_status = MagicMock()
+
+    with patch("httpx.Client") as mock_client_cls:
+        mock_client = MagicMock()
+        mock_client_cls.return_value = mock_client
+        mock_client.post.side_effect = [batch0_response, batch1_response]
+
+        reranker = APIReranker(config)
+
+        with patch("time.sleep"):
+            scores = reranker.score_pairs("query", docs)
+
+    assert len(scores) == 4
+    # All original indices should have scores
+    assert all(s > 0 for s in scores)
--- a/codex-lens-v2/tests/unit/test_search.py
+++ b/codex-lens-v2/tests/unit/test_search.py
@@ -0,0 +1,156 @@
+"""Unit tests for search layer: FTSEngine, fusion, and SearchPipeline."""
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from codexlens.search.fts import FTSEngine
+from codexlens.search.fusion import (
+    DEFAULT_WEIGHTS,
+    QueryIntent,
+    detect_query_intent,
+    get_adaptive_weights,
+    reciprocal_rank_fusion,
+)
+from codexlens.search.pipeline import SearchPipeline, SearchResult
+from codexlens.config import Config
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_fts(docs: list[tuple[int, str, str]] | None = None) -> FTSEngine:
+    """Create an in-memory FTSEngine and optionally add documents."""
+    engine = FTSEngine(":memory:")
+    if docs:
+        engine.add_documents(docs)
+    return engine
+
+
+# ---------------------------------------------------------------------------
+# FTSEngine tests
+# ---------------------------------------------------------------------------
+
+def test_fts_add_and_exact_search():
+    docs = [
+        (1, "a.py", "def authenticate user password login"),
+        (2, "b.py", "connect to database with credentials"),
+        (3, "c.py", "render template html response"),
+    ]
+    engine = make_fts(docs)
+    results = engine.exact_search("authenticate", top_k=10)
+    ids = [r[0] for r in results]
+    assert 1 in ids, "doc 1 should match 'authenticate'"
+    assert 2 not in ids or results[0][0] == 1  # doc 1 must rank higher
+
+
+def test_fts_fuzzy_search_prefix():
+    docs = [
+        (10, "auth.py", "authentication token refresh"),
+        (11, "db.py", "database connection pool"),
+        (12, "ui.py", "render button click handler"),
+    ]
+    engine = make_fts(docs)
+    # Prefix 'auth' should match 'authentication' in doc 10
+    results = engine.fuzzy_search("auth", top_k=10)
+    ids = [r[0] for r in results]
+    assert 10 in ids, "prefix 'auth' should match doc 10 with 'authentication'"
+
+
+# ---------------------------------------------------------------------------
+# RRF fusion tests
+# ---------------------------------------------------------------------------
+
+def test_rrf_fusion_ordering():
+    """When two sources agree on top-1, it should rank first in fused result."""
+    source_a = [(1, 0.9), (2, 0.5), (3, 0.2)]
+    source_b = [(1, 0.8), (3, 0.6), (2, 0.1)]
+    fused = reciprocal_rank_fusion({"a": source_a, "b": source_b})
+    assert fused[0][0] == 1, "doc 1 agreed top by both sources must rank first"
+
+
+def test_rrf_equal_weight_default():
+    """Calling with None weights should use DEFAULT_WEIGHTS shape (not crash)."""
+    source_exact = [(5, 1.0), (6, 0.8)]
+    source_vector = [(6, 0.9), (5, 0.7)]
+    # Should not raise and should return results
+    fused = reciprocal_rank_fusion(
+        {"exact": source_exact, "vector": source_vector},
+        weights=None,
+    )
+    assert len(fused) == 2
+    ids = [r[0] for r in fused]
+    assert 5 in ids and 6 in ids
+
+
+# ---------------------------------------------------------------------------
+# detect_query_intent tests
+# ---------------------------------------------------------------------------
+
+def test_detect_intent_code_symbol():
+    assert detect_query_intent("def authenticate()") == QueryIntent.CODE_SYMBOL
+
+
+def test_detect_intent_natural():
+    assert detect_query_intent("how do I authenticate users") == QueryIntent.NATURAL_LANGUAGE
+
+
+# ---------------------------------------------------------------------------
+# SearchPipeline tests
+# ---------------------------------------------------------------------------
+
+def _make_pipeline(fts: FTSEngine, top_k: int = 5) -> SearchPipeline:
+    """Build a SearchPipeline with mocked heavy components."""
+    cfg = Config.small()
+    cfg.reranker_top_k = top_k
+
+    embedder = MagicMock()
+    embedder.embed.return_value = [[0.1] * cfg.embed_dim]
+
+    binary_store = MagicMock()
+    binary_store.coarse_search.return_value = ([1, 2, 3], None)
+
+    ann_index = MagicMock()
+    ann_index.fine_search.return_value = ([1, 2, 3], [0.9, 0.8, 0.7])
+
+    reranker = MagicMock()
+    # Return a score for each content string passed
+    reranker.score_pairs.side_effect = lambda q, contents: [0.9 - i * 0.1 for i in range(len(contents))]
+
+    return SearchPipeline(
+        embedder=embedder,
+        binary_store=binary_store,
+        ann_index=ann_index,
+        reranker=reranker,
+        fts=fts,
+        config=cfg,
+    )
+
+
+def test_pipeline_search_returns_results():
+    docs = [
+        (1, "a.py", "test content alpha"),
+        (2, "b.py", "test content beta"),
+        (3, "c.py", "test content gamma"),
+    ]
+    fts = make_fts(docs)
+    pipeline = _make_pipeline(fts)
+    results = pipeline.search("test")
+    assert len(results) > 0
+    assert all(isinstance(r, SearchResult) for r in results)
+
+
+def test_pipeline_top_k_limit():
+    docs = [
+        (1, "a.py", "hello world one"),
+        (2, "b.py", "hello world two"),
+        (3, "c.py", "hello world three"),
+        (4, "d.py", "hello world four"),
+        (5, "e.py", "hello world five"),
+    ]
+    fts = make_fts(docs)
+    pipeline = _make_pipeline(fts, top_k=2)
+    results = pipeline.search("hello", top_k=2)
+    assert len(results) <= 2, "pipeline must respect top_k limit"