refactor: rename package codexlens -> codexlens_search for independent distribution

Rename the v2 search engine package to `codexlens-search` (import as `codexlens_search`) so it can be installed independently and consumed by the original codex-lens as a dependency. This avoids package path conflicts since both previously used `src/codexlens/`. Changes: - Rename src/codexlens/ -> src/codexlens_search/ - Update pyproject.toml: name=codexlens-search, version=0.2.0 - Update all imports across source, tests, and scripts - Add public API exports in __init__.py (Config, SearchPipeline, IndexingPipeline, SearchResult, IndexStats) 37/37 tests pass. No functional changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 18:48:48 +08:00 · 2026-03-16 23:23:22 +08:00
parent a0a50d338a
commit 6712965b7f
32 changed files with 83 additions and 63 deletions
--- a/codex-lens-v2/src/codexlens_search/init.py
+++ b/codex-lens-v2/src/codexlens_search/init.py
@@ -0,0 +1,20 @@
+"""codexlens-search: Lightweight semantic code search engine.
+
+Public API for consumers (e.g. codex-lens):
+
+    from codexlens_search import SearchPipeline, IndexingPipeline, Config
+    from codexlens_search.core import create_ann_index, create_binary_index
+    from codexlens_search.embed.local import FastEmbedEmbedder
+    from codexlens_search.rerank.api import APIReranker
+"""
+from codexlens_search.config import Config
+from codexlens_search.indexing import IndexingPipeline, IndexStats
+from codexlens_search.search.pipeline import SearchPipeline, SearchResult
+
+__all__ = [
+    "Config",
+    "IndexingPipeline",
+    "IndexStats",
+    "SearchPipeline",
+    "SearchResult",
+]
--- a/codex-lens-v2/src/codexlens_search/config.py
+++ b/codex-lens-v2/src/codexlens_search/config.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+import logging
+from dataclasses import dataclass, field
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class Config:
+    # Embedding
+    embed_model: str = "jinaai/jina-embeddings-v2-base-code"
+    embed_dim: int = 768
+    embed_batch_size: int = 64
+
+    # GPU / execution providers
+    device: str = "auto"  # 'auto', 'cuda', 'cpu'
+    embed_providers: list[str] | None = None  # explicit ONNX providers override
+
+    # Backend selection: 'auto', 'faiss', 'hnswlib'
+    ann_backend: str = "auto"
+    binary_backend: str = "auto"
+
+    # Indexing pipeline
+    index_workers: int = 2  # number of parallel indexing workers
+
+    # HNSW index (ANNIndex)
+    hnsw_ef: int = 150
+    hnsw_M: int = 32
+    hnsw_ef_construction: int = 200
+
+    # Binary coarse search (BinaryStore)
+    binary_top_k: int = 200
+
+    # ANN fine search
+    ann_top_k: int = 50
+
+    # Reranker
+    reranker_model: str = "BAAI/bge-reranker-v2-m3"
+    reranker_top_k: int = 20
+    reranker_batch_size: int = 32
+
+    # API reranker (optional)
+    reranker_api_url: str = ""
+    reranker_api_key: str = ""
+    reranker_api_model: str = ""
+    reranker_api_max_tokens_per_batch: int = 2048
+
+    # FTS
+    fts_top_k: int = 50
+
+    # Fusion
+    fusion_k: int = 60  # RRF k parameter
+    fusion_weights: dict = field(default_factory=lambda: {
+        "exact": 0.25,
+        "fuzzy": 0.10,
+        "vector": 0.50,
+        "graph": 0.15,
+    })
+
+    def resolve_embed_providers(self) -> list[str]:
+        """Return ONNX execution providers based on device config.
+
+        Priority: explicit embed_providers > device setting > auto-detect.
+        """
+        if self.embed_providers is not None:
+            return list(self.embed_providers)
+
+        if self.device == "cuda":
+            return ["CUDAExecutionProvider", "CPUExecutionProvider"]
+
+        if self.device == "cpu":
+            return ["CPUExecutionProvider"]
+
+        # auto-detect
+        try:
+            import onnxruntime
+            available = onnxruntime.get_available_providers()
+            if "CUDAExecutionProvider" in available:
+                log.info("CUDA detected via onnxruntime, using GPU for embedding")
+                return ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        except ImportError:
+            pass
+
+        return ["CPUExecutionProvider"]
+
+    @classmethod
+    def defaults(cls) -> "Config":
+        return cls()
+
+    @classmethod
+    def small(cls) -> "Config":
+        """Smaller config for testing or small corpora."""
+        return cls(
+            hnsw_ef=50,
+            hnsw_M=16,
+            binary_top_k=50,
+            ann_top_k=20,
+            reranker_top_k=10,
+        )
--- a/codex-lens-v2/src/codexlens_search/core/init.py
+++ b/codex-lens-v2/src/codexlens_search/core/init.py
@@ -0,0 +1,13 @@
+from .base import BaseANNIndex, BaseBinaryIndex
+from .binary import BinaryStore
+from .factory import create_ann_index, create_binary_index
+from .index import ANNIndex
+
+__all__ = [
+    "BaseANNIndex",
+    "BaseBinaryIndex",
+    "ANNIndex",
+    "BinaryStore",
+    "create_ann_index",
+    "create_binary_index",
+]
--- a/codex-lens-v2/src/codexlens_search/core/base.py
+++ b/codex-lens-v2/src/codexlens_search/core/base.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+
+class BaseANNIndex(ABC):
+    """Abstract base class for approximate nearest neighbor indexes."""
+
+    @abstractmethod
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors with corresponding IDs.
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+
+    @abstractmethod
+    def fine_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search for nearest neighbors.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results
+
+        Returns:
+            (ids, distances) as numpy arrays
+        """
+
+    @abstractmethod
+    def save(self) -> None:
+        """Persist index to disk."""
+
+    @abstractmethod
+    def load(self) -> None:
+        """Load index from disk."""
+
+    @abstractmethod
+    def __len__(self) -> int:
+        """Return the number of indexed items."""
+
+
+class BaseBinaryIndex(ABC):
+    """Abstract base class for binary vector indexes (Hamming distance)."""
+
+    @abstractmethod
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors (will be binary-quantized internally).
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+
+    @abstractmethod
+    def coarse_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search by Hamming distance.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results
+
+        Returns:
+            (ids, distances) sorted ascending by distance
+        """
+
+    @abstractmethod
+    def save(self) -> None:
+        """Persist store to disk."""
+
+    @abstractmethod
+    def load(self) -> None:
+        """Load store from disk."""
+
+    @abstractmethod
+    def __len__(self) -> int:
+        """Return the number of stored items."""
--- a/codex-lens-v2/src/codexlens_search/core/binary.py
+++ b/codex-lens-v2/src/codexlens_search/core/binary.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import logging
+import math
+from pathlib import Path
+
+import numpy as np
+
+from codexlens_search.config import Config
+from codexlens_search.core.base import BaseBinaryIndex
+
+logger = logging.getLogger(__name__)
+
+
+class BinaryStore(BaseBinaryIndex):
+    """Persistent binary vector store using numpy memmap.
+
+    Stores binary-quantized float32 vectors as packed uint8 arrays on disk.
+    Supports fast coarse search via XOR + popcount Hamming distance.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        self._dir = Path(path)
+        self._dim = dim
+        self._config = config
+        self._packed_bytes = math.ceil(dim / 8)
+
+        self._bin_path = self._dir / "binary_store.bin"
+        self._ids_path = self._dir / "binary_store_ids.npy"
+
+        self._matrix: np.ndarray | None = None  # shape (N, packed_bytes), uint8
+        self._ids: np.ndarray | None = None      # shape (N,), int64
+        self._count: int = 0
+
+        if self._bin_path.exists() and self._ids_path.exists():
+            self.load()
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _quantize(self, vectors: np.ndarray) -> np.ndarray:
+        """Convert float32 vectors (N, dim) to packed uint8 (N, packed_bytes)."""
+        binary = (vectors > 0).astype(np.uint8)
+        packed = np.packbits(binary, axis=1)
+        return packed
+
+    def _quantize_single(self, vec: np.ndarray) -> np.ndarray:
+        """Convert a single float32 vector (dim,) to packed uint8 (packed_bytes,)."""
+        binary = (vec > 0).astype(np.uint8)
+        return np.packbits(binary)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def _ensure_capacity(self, needed: int) -> None:
+        """Grow pre-allocated matrix/ids arrays to fit *needed* total items."""
+        if self._matrix is not None and self._matrix.shape[0] >= needed:
+            return
+
+        new_cap = max(1024, needed)
+        # Double until large enough
+        if self._matrix is not None:
+            cur_cap = self._matrix.shape[0]
+            new_cap = max(cur_cap, 1024)
+            while new_cap < needed:
+                new_cap *= 2
+
+        new_matrix = np.zeros((new_cap, self._packed_bytes), dtype=np.uint8)
+        new_ids = np.zeros(new_cap, dtype=np.int64)
+
+        if self._matrix is not None and self._count > 0:
+            new_matrix[: self._count] = self._matrix[: self._count]
+            new_ids[: self._count] = self._ids[: self._count]
+
+        self._matrix = new_matrix
+        self._ids = new_ids
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors and their ids.
+
+        Does NOT call save() internally -- callers must call save()
+        explicitly after batch indexing.
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        packed = self._quantize(vectors)  # (N, packed_bytes)
+        n = len(ids)
+
+        self._ensure_capacity(self._count + n)
+        self._matrix[self._count : self._count + n] = packed
+        self._ids[self._count : self._count + n] = ids.astype(np.int64)
+        self._count += n
+
+    def coarse_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search by Hamming distance.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.binary_top_k
+
+        Returns:
+            (ids, distances) sorted ascending by Hamming distance
+        """
+        if self._matrix is None or self._count == 0:
+            return np.array([], dtype=np.int64), np.array([], dtype=np.int32)
+
+        k = top_k if top_k is not None else self._config.binary_top_k
+        k = min(k, self._count)
+
+        query_bin = self._quantize_single(query_vec)  # (packed_bytes,)
+
+        # Slice to active region (matrix may be pre-allocated larger)
+        active_matrix = self._matrix[: self._count]
+        active_ids = self._ids[: self._count]
+
+        # XOR then popcount via unpackbits
+        xor = np.bitwise_xor(active_matrix, query_bin[np.newaxis, :])  # (N, packed_bytes)
+        dists = np.unpackbits(xor, axis=1).sum(axis=1).astype(np.int32)  # (N,)
+
+        if k >= self._count:
+            order = np.argsort(dists)
+        else:
+            part = np.argpartition(dists, k)[:k]
+            order = part[np.argsort(dists[part])]
+
+        return active_ids[order], dists[order]
+
+    def save(self) -> None:
+        """Flush binary store to disk."""
+        if self._matrix is None or self._count == 0:
+            return
+        self._dir.mkdir(parents=True, exist_ok=True)
+        # Write only the occupied portion of the pre-allocated matrix
+        active_matrix = self._matrix[: self._count]
+        mm = np.memmap(
+            str(self._bin_path),
+            dtype=np.uint8,
+            mode="w+",
+            shape=active_matrix.shape,
+        )
+        mm[:] = active_matrix
+        mm.flush()
+        del mm
+        np.save(str(self._ids_path), self._ids[: self._count])
+
+    def load(self) -> None:
+        """Reload binary store from disk."""
+        ids = np.load(str(self._ids_path))
+        n = len(ids)
+        if n == 0:
+            return
+        mm = np.memmap(
+            str(self._bin_path),
+            dtype=np.uint8,
+            mode="r",
+            shape=(n, self._packed_bytes),
+        )
+        self._matrix = np.array(mm)  # copy into RAM for mutation support
+        del mm
+        self._ids = ids.astype(np.int64)
+        self._count = n
+
+    def __len__(self) -> int:
+        return self._count
--- a/codex-lens-v2/src/codexlens_search/core/factory.py
+++ b/codex-lens-v2/src/codexlens_search/core/factory.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from codexlens_search.config import Config
+from codexlens_search.core.base import BaseANNIndex, BaseBinaryIndex
+
+logger = logging.getLogger(__name__)
+
+try:
+    import faiss as _faiss  # noqa: F401
+    _FAISS_AVAILABLE = True
+except ImportError:
+    _FAISS_AVAILABLE = False
+
+try:
+    import hnswlib as _hnswlib  # noqa: F401
+    _HNSWLIB_AVAILABLE = True
+except ImportError:
+    _HNSWLIB_AVAILABLE = False
+
+
+def _has_faiss_gpu() -> bool:
+    """Check whether faiss-gpu is available (has GPU resources)."""
+    if not _FAISS_AVAILABLE:
+        return False
+    try:
+        import faiss
+        res = faiss.StandardGpuResources()  # noqa: F841
+        return True
+    except (AttributeError, RuntimeError):
+        return False
+
+
+def create_ann_index(path: str | Path, dim: int, config: Config) -> BaseANNIndex:
+    """Create an ANN index based on config.ann_backend.
+
+    Fallback chain for 'auto': faiss-gpu -> faiss-cpu -> hnswlib.
+
+    Args:
+        path: directory for index persistence
+        dim: vector dimensionality
+        config: project configuration
+
+    Returns:
+        A BaseANNIndex implementation
+
+    Raises:
+        ImportError: if no suitable backend is available
+    """
+    backend = config.ann_backend
+
+    if backend == "faiss":
+        from codexlens_search.core.faiss_index import FAISSANNIndex
+        return FAISSANNIndex(path, dim, config)
+
+    if backend == "hnswlib":
+        from codexlens_search.core.index import ANNIndex
+        return ANNIndex(path, dim, config)
+
+    # auto: try faiss first, then hnswlib
+    if _FAISS_AVAILABLE:
+        from codexlens_search.core.faiss_index import FAISSANNIndex
+        gpu_tag = " (GPU available)" if _has_faiss_gpu() else " (CPU)"
+        logger.info("Auto-selected FAISS ANN backend%s", gpu_tag)
+        return FAISSANNIndex(path, dim, config)
+
+    if _HNSWLIB_AVAILABLE:
+        from codexlens_search.core.index import ANNIndex
+        logger.info("Auto-selected hnswlib ANN backend")
+        return ANNIndex(path, dim, config)
+
+    raise ImportError(
+        "No ANN backend available. Install faiss-cpu, faiss-gpu, or hnswlib."
+    )
+
+
+def create_binary_index(
+    path: str | Path, dim: int, config: Config
+) -> BaseBinaryIndex:
+    """Create a binary index based on config.binary_backend.
+
+    Fallback chain for 'auto': faiss -> numpy BinaryStore.
+
+    Args:
+        path: directory for index persistence
+        dim: vector dimensionality
+        config: project configuration
+
+    Returns:
+        A BaseBinaryIndex implementation
+
+    Raises:
+        ImportError: if no suitable backend is available
+    """
+    backend = config.binary_backend
+
+    if backend == "faiss":
+        from codexlens_search.core.faiss_index import FAISSBinaryIndex
+        return FAISSBinaryIndex(path, dim, config)
+
+    if backend == "hnswlib":
+        from codexlens_search.core.binary import BinaryStore
+        return BinaryStore(path, dim, config)
+
+    # auto: try faiss first, then numpy-based BinaryStore
+    if _FAISS_AVAILABLE:
+        from codexlens_search.core.faiss_index import FAISSBinaryIndex
+        logger.info("Auto-selected FAISS binary backend")
+        return FAISSBinaryIndex(path, dim, config)
+
+    # numpy BinaryStore is always available (no extra deps)
+    from codexlens_search.core.binary import BinaryStore
+    logger.info("Auto-selected numpy BinaryStore backend")
+    return BinaryStore(path, dim, config)
--- a/codex-lens-v2/src/codexlens_search/core/faiss_index.py
+++ b/codex-lens-v2/src/codexlens_search/core/faiss_index.py
@@ -0,0 +1,275 @@
+from __future__ import annotations
+
+import logging
+import math
+import threading
+from pathlib import Path
+
+import numpy as np
+
+from codexlens_search.config import Config
+from codexlens_search.core.base import BaseANNIndex, BaseBinaryIndex
+
+logger = logging.getLogger(__name__)
+
+try:
+    import faiss
+    _FAISS_AVAILABLE = True
+except ImportError:
+    faiss = None  # type: ignore[assignment]
+    _FAISS_AVAILABLE = False
+
+
+def _try_gpu_index(index: "faiss.Index") -> "faiss.Index":
+    """Transfer a FAISS index to GPU if faiss-gpu is available.
+
+    Returns the GPU index on success, or the original CPU index on failure.
+    """
+    try:
+        res = faiss.StandardGpuResources()
+        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
+        logger.info("FAISS index transferred to GPU 0")
+        return gpu_index
+    except (AttributeError, RuntimeError) as exc:
+        logger.debug("GPU transfer unavailable, staying on CPU: %s", exc)
+        return index
+
+
+def _to_cpu_for_save(index: "faiss.Index") -> "faiss.Index":
+    """Convert a GPU index back to CPU for serialization."""
+    try:
+        return faiss.index_gpu_to_cpu(index)
+    except (AttributeError, RuntimeError):
+        return index
+
+
+class FAISSANNIndex(BaseANNIndex):
+    """FAISS-based ANN index using IndexHNSWFlat with optional GPU.
+
+    Uses Inner Product space with L2-normalized vectors for cosine similarity.
+    Thread-safe via RLock.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        if not _FAISS_AVAILABLE:
+            raise ImportError(
+                "faiss is required. Install with: pip install faiss-cpu "
+                "or pip install faiss-gpu"
+            )
+
+        self._path = Path(path)
+        self._index_path = self._path / "faiss_ann.index"
+        self._dim = dim
+        self._config = config
+        self._lock = threading.RLock()
+        self._index: faiss.Index | None = None
+
+    def _ensure_loaded(self) -> None:
+        """Load or initialize the index (caller holds lock)."""
+        if self._index is not None:
+            return
+        self.load()
+
+    def load(self) -> None:
+        """Load index from disk or initialize a fresh one."""
+        with self._lock:
+            if self._index_path.exists():
+                idx = faiss.read_index(str(self._index_path))
+                logger.debug(
+                    "Loaded FAISS ANN index from %s (%d items)",
+                    self._index_path, idx.ntotal,
+                )
+            else:
+                # HNSW with flat storage, M=32 by default
+                m = self._config.hnsw_M
+                idx = faiss.IndexHNSWFlat(self._dim, m, faiss.METRIC_INNER_PRODUCT)
+                idx.hnsw.efConstruction = self._config.hnsw_ef_construction
+                idx.hnsw.efSearch = self._config.hnsw_ef
+                logger.debug(
+                    "Initialized fresh FAISS HNSW index (dim=%d, M=%d)",
+                    self._dim, m,
+                )
+            self._index = _try_gpu_index(idx)
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add L2-normalized float32 vectors.
+
+        Vectors are normalized before insertion so that Inner Product
+        distance equals cosine similarity.
+
+        Args:
+            ids: shape (N,) int64 -- currently unused by FAISS flat index
+                 but kept for API compatibility. FAISS uses sequential IDs.
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        vecs = np.ascontiguousarray(vectors, dtype=np.float32)
+        # Normalize for cosine similarity via Inner Product
+        faiss.normalize_L2(vecs)
+
+        with self._lock:
+            self._ensure_loaded()
+            self._index.add(vecs)
+
+    def fine_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search for nearest neighbors.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.ann_top_k
+
+        Returns:
+            (ids, distances) as numpy arrays. For IP space, higher = more
+            similar, but distances are returned as-is for consumer handling.
+        """
+        k = top_k if top_k is not None else self._config.ann_top_k
+
+        with self._lock:
+            self._ensure_loaded()
+
+            count = self._index.ntotal
+            if count == 0:
+                return np.array([], dtype=np.int64), np.array([], dtype=np.float32)
+
+            k = min(k, count)
+            # Set efSearch for HNSW accuracy
+            try:
+                self._index.hnsw.efSearch = max(self._config.hnsw_ef, k)
+            except AttributeError:
+                pass  # GPU index may not expose hnsw attribute directly
+
+            q = np.ascontiguousarray(query_vec, dtype=np.float32).reshape(1, -1)
+            faiss.normalize_L2(q)
+            distances, labels = self._index.search(q, k)
+            return labels[0].astype(np.int64), distances[0].astype(np.float32)
+
+    def save(self) -> None:
+        """Save index to disk."""
+        with self._lock:
+            if self._index is None:
+                return
+            self._path.mkdir(parents=True, exist_ok=True)
+            cpu_index = _to_cpu_for_save(self._index)
+            faiss.write_index(cpu_index, str(self._index_path))
+
+    def __len__(self) -> int:
+        with self._lock:
+            if self._index is None:
+                return 0
+            return self._index.ntotal
+
+
+class FAISSBinaryIndex(BaseBinaryIndex):
+    """FAISS-based binary index using IndexBinaryFlat for Hamming distance.
+
+    Vectors are binary-quantized (sign bit) before insertion.
+    Thread-safe via RLock.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        if not _FAISS_AVAILABLE:
+            raise ImportError(
+                "faiss is required. Install with: pip install faiss-cpu "
+                "or pip install faiss-gpu"
+            )
+
+        self._path = Path(path)
+        self._index_path = self._path / "faiss_binary.index"
+        self._dim = dim
+        self._config = config
+        self._packed_bytes = math.ceil(dim / 8)
+        self._lock = threading.RLock()
+        self._index: faiss.IndexBinary | None = None
+
+    def _ensure_loaded(self) -> None:
+        if self._index is not None:
+            return
+        self.load()
+
+    def _quantize(self, vectors: np.ndarray) -> np.ndarray:
+        """Convert float32 vectors (N, dim) to packed uint8 (N, packed_bytes)."""
+        binary = (vectors > 0).astype(np.uint8)
+        return np.packbits(binary, axis=1)
+
+    def _quantize_single(self, vec: np.ndarray) -> np.ndarray:
+        """Convert a single float32 vector (dim,) to packed uint8 (1, packed_bytes)."""
+        binary = (vec > 0).astype(np.uint8)
+        return np.packbits(binary).reshape(1, -1)
+
+    def load(self) -> None:
+        """Load binary index from disk or initialize a fresh one."""
+        with self._lock:
+            if self._index_path.exists():
+                idx = faiss.read_index_binary(str(self._index_path))
+                logger.debug(
+                    "Loaded FAISS binary index from %s (%d items)",
+                    self._index_path, idx.ntotal,
+                )
+            else:
+                # IndexBinaryFlat takes dimension in bits
+                idx = faiss.IndexBinaryFlat(self._dim)
+                logger.debug(
+                    "Initialized fresh FAISS binary index (dim_bits=%d)", self._dim,
+                )
+            self._index = idx
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors (binary-quantized internally).
+
+        Args:
+            ids: shape (N,) int64 -- kept for API compatibility
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        packed = self._quantize(vectors)
+        packed = np.ascontiguousarray(packed, dtype=np.uint8)
+
+        with self._lock:
+            self._ensure_loaded()
+            self._index.add(packed)
+
+    def coarse_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search by Hamming distance.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.binary_top_k
+
+        Returns:
+            (ids, distances) sorted ascending by Hamming distance
+        """
+        with self._lock:
+            self._ensure_loaded()
+
+            if self._index.ntotal == 0:
+                return np.array([], dtype=np.int64), np.array([], dtype=np.int32)
+
+            k = top_k if top_k is not None else self._config.binary_top_k
+            k = min(k, self._index.ntotal)
+
+            q = self._quantize_single(query_vec)
+            q = np.ascontiguousarray(q, dtype=np.uint8)
+            distances, labels = self._index.search(q, k)
+            return labels[0].astype(np.int64), distances[0].astype(np.int32)
+
+    def save(self) -> None:
+        """Save binary index to disk."""
+        with self._lock:
+            if self._index is None:
+                return
+            self._path.mkdir(parents=True, exist_ok=True)
+            faiss.write_index_binary(self._index, str(self._index_path))
+
+    def __len__(self) -> int:
+        with self._lock:
+            if self._index is None:
+                return 0
+            return self._index.ntotal
--- a/codex-lens-v2/src/codexlens_search/core/index.py
+++ b/codex-lens-v2/src/codexlens_search/core/index.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import logging
+import threading
+from pathlib import Path
+
+import numpy as np
+
+from codexlens_search.config import Config
+from codexlens_search.core.base import BaseANNIndex
+
+logger = logging.getLogger(__name__)
+
+try:
+    import hnswlib
+    _HNSWLIB_AVAILABLE = True
+except ImportError:
+    _HNSWLIB_AVAILABLE = False
+
+
+class ANNIndex(BaseANNIndex):
+    """HNSW-based approximate nearest neighbor index.
+
+    Lazy-loads on first use, thread-safe via RLock.
+    """
+
+    def __init__(self, path: str | Path, dim: int, config: Config) -> None:
+        if not _HNSWLIB_AVAILABLE:
+            raise ImportError("hnswlib is required. Install with: pip install hnswlib")
+
+        self._path = Path(path)
+        self._hnsw_path = self._path / "ann_index.hnsw"
+        self._dim = dim
+        self._config = config
+        self._lock = threading.RLock()
+        self._index: hnswlib.Index | None = None
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _ensure_loaded(self) -> None:
+        """Load or initialize the index (caller holds lock)."""
+        if self._index is not None:
+            return
+        self.load()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def load(self) -> None:
+        """Load index from disk or initialize a fresh one."""
+        with self._lock:
+            idx = hnswlib.Index(space="cosine", dim=self._dim)
+            if self._hnsw_path.exists():
+                idx.load_index(str(self._hnsw_path), max_elements=0)
+                idx.set_ef(self._config.hnsw_ef)
+                logger.debug("Loaded HNSW index from %s (%d items)", self._hnsw_path, idx.get_current_count())
+            else:
+                idx.init_index(
+                    max_elements=1000,
+                    ef_construction=self._config.hnsw_ef_construction,
+                    M=self._config.hnsw_M,
+                )
+                idx.set_ef(self._config.hnsw_ef)
+                logger.debug("Initialized fresh HNSW index (dim=%d)", self._dim)
+            self._index = idx
+
+    def add(self, ids: np.ndarray, vectors: np.ndarray) -> None:
+        """Add float32 vectors.
+
+        Does NOT call save() internally -- callers must call save()
+        explicitly after batch indexing.
+
+        Args:
+            ids: shape (N,) int64
+            vectors: shape (N, dim) float32
+        """
+        if len(ids) == 0:
+            return
+
+        vecs = np.ascontiguousarray(vectors, dtype=np.float32)
+
+        with self._lock:
+            self._ensure_loaded()
+            # Expand capacity if needed
+            current = self._index.get_current_count()
+            max_el = self._index.get_max_elements()
+            needed = current + len(ids)
+            if needed > max_el:
+                new_cap = max(max_el * 2, needed + 100)
+                self._index.resize_index(new_cap)
+            self._index.add_items(vecs, ids.astype(np.int64))
+
+    def fine_search(
+        self, query_vec: np.ndarray, top_k: int | None = None
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """Search for nearest neighbors.
+
+        Args:
+            query_vec: float32 vector of shape (dim,)
+            top_k: number of results; defaults to config.ann_top_k
+
+        Returns:
+            (ids, distances) as numpy arrays
+        """
+        k = top_k if top_k is not None else self._config.ann_top_k
+
+        with self._lock:
+            self._ensure_loaded()
+
+            count = self._index.get_current_count()
+            if count == 0:
+                return np.array([], dtype=np.int64), np.array([], dtype=np.float32)
+
+            k = min(k, count)
+            self._index.set_ef(max(self._config.hnsw_ef, k))
+
+            q = np.ascontiguousarray(query_vec, dtype=np.float32).reshape(1, -1)
+            labels, distances = self._index.knn_query(q, k=k)
+            return labels[0].astype(np.int64), distances[0].astype(np.float32)
+
+    def save(self) -> None:
+        """Save index to disk (caller may or may not hold lock)."""
+        with self._lock:
+            if self._index is None:
+                return
+            self._path.mkdir(parents=True, exist_ok=True)
+            self._index.save_index(str(self._hnsw_path))
+
+    def __len__(self) -> int:
+        with self._lock:
+            if self._index is None:
+                return 0
+            return self._index.get_current_count()
--- a/codex-lens-v2/src/codexlens_search/embed/init.py
+++ b/codex-lens-v2/src/codexlens_search/embed/init.py
@@ -0,0 +1,4 @@
+from .base import BaseEmbedder
+from .local import FastEmbedEmbedder, EMBED_PROFILES
+
+__all__ = ["BaseEmbedder", "FastEmbedEmbedder", "EMBED_PROFILES"]
--- a/codex-lens-v2/src/codexlens_search/embed/base.py
+++ b/codex-lens-v2/src/codexlens_search/embed/base.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+import numpy as np
+
+
+class BaseEmbedder(ABC):
+    @abstractmethod
+    def embed_single(self, text: str) -> np.ndarray:
+        """Embed a single text, returns float32 ndarray shape (dim,)."""
+
+    @abstractmethod
+    def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
+        """Embed a list of texts, returns list of float32 ndarrays."""
--- a/codex-lens-v2/src/codexlens_search/embed/local.py
+++ b/codex-lens-v2/src/codexlens_search/embed/local.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import numpy as np
+
+from ..config import Config
+from .base import BaseEmbedder
+
+EMBED_PROFILES = {
+    "small": "BAAI/bge-small-en-v1.5",                    # 384d
+    "base": "BAAI/bge-base-en-v1.5",                      # 768d
+    "large": "BAAI/bge-large-en-v1.5",                    # 1024d
+    "code": "jinaai/jina-embeddings-v2-base-code",         # 768d
+}
+
+
+class FastEmbedEmbedder(BaseEmbedder):
+    """Embedder backed by fastembed.TextEmbedding with lazy model loading."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._model = None
+
+    def _load(self) -> None:
+        """Lazy-load the fastembed TextEmbedding model on first use."""
+        if self._model is not None:
+            return
+        from fastembed import TextEmbedding
+        providers = self._config.resolve_embed_providers()
+        try:
+            self._model = TextEmbedding(
+                model_name=self._config.embed_model,
+                providers=providers,
+            )
+        except TypeError:
+            # Older fastembed versions may not accept providers kwarg
+            self._model = TextEmbedding(model_name=self._config.embed_model)
+
+    def embed_single(self, text: str) -> np.ndarray:
+        """Embed a single text, returns float32 ndarray of shape (dim,)."""
+        self._load()
+        result = list(self._model.embed([text]))
+        return result[0].astype(np.float32)
+
+    def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
+        """Embed a list of texts in batches, returns list of float32 ndarrays."""
+        self._load()
+        batch_size = self._config.embed_batch_size
+        results: list[np.ndarray] = []
+        for start in range(0, len(texts), batch_size):
+            batch = texts[start : start + batch_size]
+            for vec in self._model.embed(batch):
+                results.append(vec.astype(np.float32))
+        return results
--- a/codex-lens-v2/src/codexlens_search/indexing/init.py
+++ b/codex-lens-v2/src/codexlens_search/indexing/init.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+from .pipeline import IndexingPipeline, IndexStats
+
+__all__ = ["IndexingPipeline", "IndexStats"]
--- a/codex-lens-v2/src/codexlens_search/indexing/pipeline.py
+++ b/codex-lens-v2/src/codexlens_search/indexing/pipeline.py
@@ -0,0 +1,277 @@
+"""Three-stage parallel indexing pipeline: chunk -> embed -> index.
+
+Uses threading.Thread with queue.Queue for producer-consumer handoff.
+The GIL is acceptable because embedding (onnxruntime) releases it in C extensions.
+"""
+from __future__ import annotations
+
+import logging
+import queue
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+
+from codexlens_search.config import Config
+from codexlens_search.core.binary import BinaryStore
+from codexlens_search.core.index import ANNIndex
+from codexlens_search.embed.base import BaseEmbedder
+from codexlens_search.search.fts import FTSEngine
+
+logger = logging.getLogger(__name__)
+
+# Sentinel value to signal worker shutdown
+_SENTINEL = None
+
+# Defaults for chunking (can be overridden via index_files kwargs)
+_DEFAULT_MAX_CHUNK_CHARS = 800
+_DEFAULT_CHUNK_OVERLAP = 100
+
+
+@dataclass
+class IndexStats:
+    """Statistics returned after indexing completes."""
+    files_processed: int = 0
+    chunks_created: int = 0
+    duration_seconds: float = 0.0
+
+
+class IndexingPipeline:
+    """Parallel 3-stage indexing pipeline with queue-based handoff.
+
+    Stage 1 (main thread): Read files, chunk text, push to embed_queue.
+    Stage 2 (embed worker): Pull text batches, call embed_batch(), push vectors to index_queue.
+    Stage 3 (index worker): Pull vectors+ids, call BinaryStore.add(), ANNIndex.add(), FTS.add_documents().
+
+    After all stages complete, save() is called on BinaryStore and ANNIndex exactly once.
+    """
+
+    def __init__(
+        self,
+        embedder: BaseEmbedder,
+        binary_store: BinaryStore,
+        ann_index: ANNIndex,
+        fts: FTSEngine,
+        config: Config,
+    ) -> None:
+        self._embedder = embedder
+        self._binary_store = binary_store
+        self._ann_index = ann_index
+        self._fts = fts
+        self._config = config
+
+    def index_files(
+        self,
+        files: list[Path],
+        *,
+        root: Path | None = None,
+        max_chunk_chars: int = _DEFAULT_MAX_CHUNK_CHARS,
+        chunk_overlap: int = _DEFAULT_CHUNK_OVERLAP,
+        max_file_size: int = 50_000,
+    ) -> IndexStats:
+        """Run the 3-stage pipeline on the given files.
+
+        Args:
+            files: List of file paths to index.
+            root: Optional root for computing relative paths. If None, uses
+                  each file's absolute path as its identifier.
+            max_chunk_chars: Maximum characters per chunk.
+            chunk_overlap: Character overlap between consecutive chunks.
+            max_file_size: Skip files larger than this (bytes).
+
+        Returns:
+            IndexStats with counts and timing.
+        """
+        if not files:
+            return IndexStats()
+
+        t0 = time.monotonic()
+
+        embed_queue: queue.Queue = queue.Queue(maxsize=4)
+        index_queue: queue.Queue = queue.Queue(maxsize=4)
+
+        # Track errors from workers
+        worker_errors: list[Exception] = []
+        error_lock = threading.Lock()
+
+        def _record_error(exc: Exception) -> None:
+            with error_lock:
+                worker_errors.append(exc)
+
+        # --- Start workers ---
+        embed_thread = threading.Thread(
+            target=self._embed_worker,
+            args=(embed_queue, index_queue, _record_error),
+            daemon=True,
+            name="indexing-embed",
+        )
+        index_thread = threading.Thread(
+            target=self._index_worker,
+            args=(index_queue, _record_error),
+            daemon=True,
+            name="indexing-index",
+        )
+        embed_thread.start()
+        index_thread.start()
+
+        # --- Stage 1: chunk files (main thread) ---
+        chunk_id = 0
+        files_processed = 0
+        chunks_created = 0
+
+        for fpath in files:
+            try:
+                if fpath.stat().st_size > max_file_size:
+                    continue
+                text = fpath.read_text(encoding="utf-8", errors="replace")
+            except Exception as exc:
+                logger.debug("Skipping %s: %s", fpath, exc)
+                continue
+
+            rel_path = str(fpath.relative_to(root)) if root else str(fpath)
+            file_chunks = self._chunk_text(text, rel_path, max_chunk_chars, chunk_overlap)
+
+            if not file_chunks:
+                continue
+
+            files_processed += 1
+
+            # Assign sequential IDs and push batch to embed queue
+            batch_ids = []
+            batch_texts = []
+            batch_paths = []
+            for chunk_text, path in file_chunks:
+                batch_ids.append(chunk_id)
+                batch_texts.append(chunk_text)
+                batch_paths.append(path)
+                chunk_id += 1
+
+            chunks_created += len(batch_ids)
+            embed_queue.put((batch_ids, batch_texts, batch_paths))
+
+        # Signal embed worker: no more data
+        embed_queue.put(_SENTINEL)
+
+        # Wait for workers to finish
+        embed_thread.join()
+        index_thread.join()
+
+        # --- Final flush ---
+        self._binary_store.save()
+        self._ann_index.save()
+
+        duration = time.monotonic() - t0
+        stats = IndexStats(
+            files_processed=files_processed,
+            chunks_created=chunks_created,
+            duration_seconds=round(duration, 2),
+        )
+
+        logger.info(
+            "Indexing complete: %d files, %d chunks in %.1fs",
+            stats.files_processed,
+            stats.chunks_created,
+            stats.duration_seconds,
+        )
+
+        # Raise first worker error if any occurred
+        if worker_errors:
+            raise worker_errors[0]
+
+        return stats
+
+    # ------------------------------------------------------------------
+    # Workers
+    # ------------------------------------------------------------------
+
+    def _embed_worker(
+        self,
+        in_q: queue.Queue,
+        out_q: queue.Queue,
+        on_error: callable,
+    ) -> None:
+        """Stage 2: Pull chunk batches, embed, push (ids, vecs, docs) to index queue."""
+        try:
+            while True:
+                item = in_q.get()
+                if item is _SENTINEL:
+                    break
+
+                batch_ids, batch_texts, batch_paths = item
+                try:
+                    vecs = self._embedder.embed_batch(batch_texts)
+                    vec_array = np.array(vecs, dtype=np.float32)
+                    id_array = np.array(batch_ids, dtype=np.int64)
+                    out_q.put((id_array, vec_array, batch_texts, batch_paths))
+                except Exception as exc:
+                    logger.error("Embed worker error: %s", exc)
+                    on_error(exc)
+        finally:
+            # Signal index worker: no more data
+            out_q.put(_SENTINEL)
+
+    def _index_worker(
+        self,
+        in_q: queue.Queue,
+        on_error: callable,
+    ) -> None:
+        """Stage 3: Pull (ids, vecs, texts, paths), write to stores."""
+        while True:
+            item = in_q.get()
+            if item is _SENTINEL:
+                break
+
+            id_array, vec_array, texts, paths = item
+            try:
+                self._binary_store.add(id_array, vec_array)
+                self._ann_index.add(id_array, vec_array)
+
+                fts_docs = [
+                    (int(id_array[i]), paths[i], texts[i])
+                    for i in range(len(id_array))
+                ]
+                self._fts.add_documents(fts_docs)
+            except Exception as exc:
+                logger.error("Index worker error: %s", exc)
+                on_error(exc)
+
+    # ------------------------------------------------------------------
+    # Chunking
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _chunk_text(
+        text: str,
+        path: str,
+        max_chars: int,
+        overlap: int,
+    ) -> list[tuple[str, str]]:
+        """Split file text into overlapping chunks.
+
+        Returns list of (chunk_text, path) tuples.
+        """
+        if not text.strip():
+            return []
+
+        chunks: list[tuple[str, str]] = []
+        lines = text.splitlines(keepends=True)
+        current: list[str] = []
+        current_len = 0
+
+        for line in lines:
+            if current_len + len(line) > max_chars and current:
+                chunk = "".join(current)
+                chunks.append((chunk, path))
+                # overlap: keep last N characters
+                tail = "".join(current)[-overlap:]
+                current = [tail] if tail else []
+                current_len = len(tail)
+            current.append(line)
+            current_len += len(line)
+
+        if current:
+            chunks.append(("".join(current), path))
+
+        return chunks
--- a/codex-lens-v2/src/codexlens_search/rerank/init.py
+++ b/codex-lens-v2/src/codexlens_search/rerank/init.py
@@ -0,0 +1,5 @@
+from .base import BaseReranker
+from .local import FastEmbedReranker
+from .api import APIReranker
+
+__all__ = ["BaseReranker", "FastEmbedReranker", "APIReranker"]
--- a/codex-lens-v2/src/codexlens_search/rerank/api.py
+++ b/codex-lens-v2/src/codexlens_search/rerank/api.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import logging
+import time
+
+import httpx
+
+from codexlens_search.config import Config
+from .base import BaseReranker
+
+logger = logging.getLogger(__name__)
+
+
+class APIReranker(BaseReranker):
+    """Reranker backed by a remote HTTP API (SiliconFlow/Cohere/Jina format)."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._client = httpx.Client(
+            headers={
+                "Authorization": f"Bearer {config.reranker_api_key}",
+                "Content-Type": "application/json",
+            },
+        )
+
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        if not documents:
+            return []
+        max_tokens = self._config.reranker_api_max_tokens_per_batch
+        batches = self._split_batches(documents, max_tokens)
+        scores = [0.0] * len(documents)
+        for batch in batches:
+            batch_scores = self._call_api_with_retry(query, batch)
+            for orig_idx, score in batch_scores.items():
+                scores[orig_idx] = score
+        return scores
+
+    def _split_batches(
+        self, documents: list[str], max_tokens: int
+    ) -> list[list[tuple[int, str]]]:
+        batches: list[list[tuple[int, str]]] = []
+        current_batch: list[tuple[int, str]] = []
+        current_tokens = 0
+
+        for idx, text in enumerate(documents):
+            doc_tokens = len(text) // 4
+            if current_tokens + doc_tokens > max_tokens and current_batch:
+                batches.append(current_batch)
+                current_batch = []
+                current_tokens = 0
+            current_batch.append((idx, text))
+            current_tokens += doc_tokens
+
+        if current_batch:
+            batches.append(current_batch)
+
+        return batches
+
+    def _call_api_with_retry(
+        self,
+        query: str,
+        docs: list[tuple[int, str]],
+        max_retries: int = 3,
+    ) -> dict[int, float]:
+        url = self._config.reranker_api_url.rstrip("/") + "/rerank"
+        payload = {
+            "model": self._config.reranker_api_model,
+            "query": query,
+            "documents": [t for _, t in docs],
+        }
+
+        last_exc: Exception | None = None
+        for attempt in range(max_retries):
+            try:
+                response = self._client.post(url, json=payload)
+            except Exception as exc:
+                last_exc = exc
+                time.sleep((2 ** attempt) * 0.5)
+                continue
+
+            if response.status_code in (429, 503):
+                logger.warning(
+                    "API reranker returned HTTP %s (attempt %d/%d), retrying...",
+                    response.status_code,
+                    attempt + 1,
+                    max_retries,
+                )
+                time.sleep((2 ** attempt) * 0.5)
+                continue
+
+            response.raise_for_status()
+            data = response.json()
+            results = data.get("results", [])
+            scores: dict[int, float] = {}
+            for item in results:
+                local_idx = int(item["index"])
+                orig_idx = docs[local_idx][0]
+                scores[orig_idx] = float(item["relevance_score"])
+            return scores
+
+        raise RuntimeError(
+            f"API reranker failed after {max_retries} attempts. Last error: {last_exc}"
+        )
--- a/codex-lens-v2/src/codexlens_search/rerank/base.py
+++ b/codex-lens-v2/src/codexlens_search/rerank/base.py
@@ -0,0 +1,8 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+
+
+class BaseReranker(ABC):
+    @abstractmethod
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        """Score (query, doc) pairs. Returns list of floats same length as documents."""
--- a/codex-lens-v2/src/codexlens_search/rerank/local.py
+++ b/codex-lens-v2/src/codexlens_search/rerank/local.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from codexlens_search.config import Config
+from .base import BaseReranker
+
+
+class FastEmbedReranker(BaseReranker):
+    """Local reranker backed by fastembed TextCrossEncoder."""
+
+    def __init__(self, config: Config) -> None:
+        self._config = config
+        self._model = None
+
+    def _load(self) -> None:
+        if self._model is None:
+            from fastembed.rerank.cross_encoder import TextCrossEncoder
+            self._model = TextCrossEncoder(model_name=self._config.reranker_model)
+
+    def score_pairs(self, query: str, documents: list[str]) -> list[float]:
+        self._load()
+        results = list(self._model.rerank(query, documents))
+        scores = [0.0] * len(documents)
+        for r in results:
+            scores[r.index] = float(r.score)
+        return scores
--- a/codex-lens-v2/src/codexlens_search/search/init.py
+++ b/codex-lens-v2/src/codexlens_search/search/init.py
@@ -0,0 +1,8 @@
+from .fts import FTSEngine
+from .fusion import reciprocal_rank_fusion, detect_query_intent, QueryIntent, DEFAULT_WEIGHTS
+from .pipeline import SearchPipeline, SearchResult
+
+__all__ = [
+    "FTSEngine", "reciprocal_rank_fusion", "detect_query_intent",
+    "QueryIntent", "DEFAULT_WEIGHTS", "SearchPipeline", "SearchResult",
+]
--- a/codex-lens-v2/src/codexlens_search/search/fts.py
+++ b/codex-lens-v2/src/codexlens_search/search/fts.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+
+class FTSEngine:
+    def __init__(self, db_path: str | Path) -> None:
+        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
+        self._conn.execute(
+            "CREATE VIRTUAL TABLE IF NOT EXISTS docs "
+            "USING fts5(content, tokenize='porter unicode61')"
+        )
+        self._conn.execute(
+            "CREATE TABLE IF NOT EXISTS docs_meta "
+            "(id INTEGER PRIMARY KEY, path TEXT)"
+        )
+        self._conn.commit()
+
+    def add_documents(self, docs: list[tuple[int, str, str]]) -> None:
+        """Add documents in batch. docs: list of (id, path, content)."""
+        if not docs:
+            return
+        self._conn.executemany(
+            "INSERT OR REPLACE INTO docs_meta (id, path) VALUES (?, ?)",
+            [(doc_id, path) for doc_id, path, content in docs],
+        )
+        self._conn.executemany(
+            "INSERT OR REPLACE INTO docs (rowid, content) VALUES (?, ?)",
+            [(doc_id, content) for doc_id, path, content in docs],
+        )
+        self._conn.commit()
+
+    def exact_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
+        """FTS5 MATCH query, return (id, bm25_score) sorted by score descending."""
+        try:
+            rows = self._conn.execute(
+                "SELECT rowid, bm25(docs) AS score FROM docs "
+                "WHERE docs MATCH ? ORDER BY score LIMIT ?",
+                (query, top_k),
+            ).fetchall()
+        except sqlite3.OperationalError:
+            return []
+        # bm25 in SQLite FTS5 returns negative values (lower = better match)
+        # Negate so higher is better
+        return [(int(row[0]), -float(row[1])) for row in rows]
+
+    def fuzzy_search(self, query: str, top_k: int = 50) -> list[tuple[int, float]]:
+        """Prefix search: each token + '*', return (id, score) sorted descending."""
+        tokens = query.strip().split()
+        if not tokens:
+            return []
+        prefix_query = " ".join(t + "*" for t in tokens)
+        try:
+            rows = self._conn.execute(
+                "SELECT rowid, bm25(docs) AS score FROM docs "
+                "WHERE docs MATCH ? ORDER BY score LIMIT ?",
+                (prefix_query, top_k),
+            ).fetchall()
+        except sqlite3.OperationalError:
+            return []
+        return [(int(row[0]), -float(row[1])) for row in rows]
+
+    def get_content(self, doc_id: int) -> str:
+        """Retrieve content for a doc_id."""
+        row = self._conn.execute(
+            "SELECT content FROM docs WHERE rowid = ?", (doc_id,)
+        ).fetchone()
+        return row[0] if row else ""
--- a/codex-lens-v2/src/codexlens_search/search/fusion.py
+++ b/codex-lens-v2/src/codexlens_search/search/fusion.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+import re
+from enum import Enum
+
+DEFAULT_WEIGHTS: dict[str, float] = {
+    "exact": 0.25,
+    "fuzzy": 0.10,
+    "vector": 0.50,
+    "graph": 0.15,
+}
+
+_CODE_CAMEL_RE = re.compile(r"[a-z][A-Z]")
+_CODE_SNAKE_RE = re.compile(r"\b[a-z_]+_[a-z_]+\b")
+_CODE_SYMBOLS_RE = re.compile(r"[.\[\](){}]|->|::")
+_CODE_KEYWORDS_RE = re.compile(r"\b(import|def|class|return|from|async|await|lambda|yield)\b")
+_QUESTION_WORDS_RE = re.compile(r"\b(how|what|why|when|where|which|who|does|do|is|are|can|should)\b", re.IGNORECASE)
+
+
+class QueryIntent(Enum):
+    CODE_SYMBOL = "code_symbol"
+    NATURAL_LANGUAGE = "natural"
+    MIXED = "mixed"
+
+
+def detect_query_intent(query: str) -> QueryIntent:
+    """Detect whether query is a code symbol, natural language, or mixed."""
+    words = query.strip().split()
+    word_count = len(words)
+
+    code_signals = 0
+    natural_signals = 0
+
+    if _CODE_CAMEL_RE.search(query):
+        code_signals += 2
+    if _CODE_SNAKE_RE.search(query):
+        code_signals += 2
+    if _CODE_SYMBOLS_RE.search(query):
+        code_signals += 2
+    if _CODE_KEYWORDS_RE.search(query):
+        code_signals += 2
+    if "`" in query:
+        code_signals += 1
+    if word_count < 4:
+        code_signals += 1
+
+    if _QUESTION_WORDS_RE.search(query):
+        natural_signals += 2
+    if word_count > 5:
+        natural_signals += 2
+    if code_signals == 0 and word_count >= 3:
+        natural_signals += 1
+
+    if code_signals >= 2 and natural_signals == 0:
+        return QueryIntent.CODE_SYMBOL
+    if natural_signals >= 2 and code_signals == 0:
+        return QueryIntent.NATURAL_LANGUAGE
+    if code_signals >= 2 and natural_signals == 0:
+        return QueryIntent.CODE_SYMBOL
+    if natural_signals > code_signals:
+        return QueryIntent.NATURAL_LANGUAGE
+    if code_signals > natural_signals:
+        return QueryIntent.CODE_SYMBOL
+    return QueryIntent.MIXED
+
+
+def get_adaptive_weights(intent: QueryIntent, base: dict | None = None) -> dict[str, float]:
+    """Return weights adapted to query intent."""
+    weights = dict(base or DEFAULT_WEIGHTS)
+    if intent == QueryIntent.CODE_SYMBOL:
+        weights["exact"] = 0.45
+        weights["vector"] = 0.35
+    elif intent == QueryIntent.NATURAL_LANGUAGE:
+        weights["vector"] = 0.65
+        weights["exact"] = 0.15
+    # MIXED: use weights as-is
+    return weights
+
+
+def reciprocal_rank_fusion(
+    results: dict[str, list[tuple[int, float]]],
+    weights: dict[str, float] | None = None,
+    k: int = 60,
+) -> list[tuple[int, float]]:
+    """Fuse ranked result lists using Reciprocal Rank Fusion.
+
+    results: {source_name: [(doc_id, score), ...]} each list sorted desc by score.
+    weights: weight per source (defaults to equal weight across all sources).
+    k: RRF constant (default 60).
+    Returns sorted list of (doc_id, fused_score) descending.
+    """
+    if not results:
+        return []
+
+    sources = list(results.keys())
+    if weights is None:
+        equal_w = 1.0 / len(sources)
+        weights = {s: equal_w for s in sources}
+
+    scores: dict[int, float] = {}
+    for source, ranked_list in results.items():
+        w = weights.get(source, 0.0)
+        for rank, (doc_id, _) in enumerate(ranked_list, start=1):
+            scores[doc_id] = scores.get(doc_id, 0.0) + w * (1.0 / (k + rank))
+
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
--- a/codex-lens-v2/src/codexlens_search/search/pipeline.py
+++ b/codex-lens-v2/src/codexlens_search/search/pipeline.py
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+
+import numpy as np
+
+from ..config import Config
+from ..core import ANNIndex, BinaryStore
+from ..embed import BaseEmbedder
+from ..rerank import BaseReranker
+from .fts import FTSEngine
+from .fusion import (
+    DEFAULT_WEIGHTS,
+    detect_query_intent,
+    get_adaptive_weights,
+    reciprocal_rank_fusion,
+)
+
+_log = logging.getLogger(__name__)
+
+
+@dataclass
+class SearchResult:
+    id: int
+    path: str
+    score: float
+    snippet: str = ""
+
+
+class SearchPipeline:
+    def __init__(
+        self,
+        embedder: BaseEmbedder,
+        binary_store: BinaryStore,
+        ann_index: ANNIndex,
+        reranker: BaseReranker,
+        fts: FTSEngine,
+        config: Config,
+    ) -> None:
+        self._embedder = embedder
+        self._binary_store = binary_store
+        self._ann_index = ann_index
+        self._reranker = reranker
+        self._fts = fts
+        self._config = config
+
+    # -- Helper: vector search (binary coarse + ANN fine) -----------------
+
+    def _vector_search(
+        self, query_vec: np.ndarray
+    ) -> list[tuple[int, float]]:
+        """Run binary coarse search then ANN fine search and intersect."""
+        cfg = self._config
+
+        # Binary coarse search -> candidate_ids set
+        candidate_ids_list, _ = self._binary_store.coarse_search(
+            query_vec, top_k=cfg.binary_top_k
+        )
+        candidate_ids = set(candidate_ids_list)
+
+        # ANN fine search on full index, then intersect with binary candidates
+        ann_ids, ann_scores = self._ann_index.fine_search(
+            query_vec, top_k=cfg.ann_top_k
+        )
+        # Keep only results that appear in binary candidates (2-stage funnel)
+        vector_results: list[tuple[int, float]] = [
+            (int(doc_id), float(score))
+            for doc_id, score in zip(ann_ids, ann_scores)
+            if int(doc_id) in candidate_ids
+        ]
+        # Fall back to full ANN results if intersection is empty
+        if not vector_results:
+            vector_results = [
+                (int(doc_id), float(score))
+                for doc_id, score in zip(ann_ids, ann_scores)
+            ]
+        return vector_results
+
+    # -- Helper: FTS search (exact + fuzzy) ------------------------------
+
+    def _fts_search(
+        self, query: str
+    ) -> tuple[list[tuple[int, float]], list[tuple[int, float]]]:
+        """Run exact and fuzzy full-text search."""
+        cfg = self._config
+        exact_results = self._fts.exact_search(query, top_k=cfg.fts_top_k)
+        fuzzy_results = self._fts.fuzzy_search(query, top_k=cfg.fts_top_k)
+        return exact_results, fuzzy_results
+
+    # -- Main search entry point -----------------------------------------
+
+    def search(self, query: str, top_k: int | None = None) -> list[SearchResult]:
+        cfg = self._config
+        final_top_k = top_k if top_k is not None else cfg.reranker_top_k
+
+        # 1. Detect intent -> adaptive weights
+        intent = detect_query_intent(query)
+        weights = get_adaptive_weights(intent, cfg.fusion_weights)
+
+        # 2. Embed query
+        query_vec = self._embedder.embed_single(query)
+
+        # 3. Parallel vector + FTS search
+        vector_results: list[tuple[int, float]] = []
+        exact_results: list[tuple[int, float]] = []
+        fuzzy_results: list[tuple[int, float]] = []
+
+        with ThreadPoolExecutor(max_workers=2) as pool:
+            vec_future = pool.submit(self._vector_search, query_vec)
+            fts_future = pool.submit(self._fts_search, query)
+
+            # Collect vector results
+            try:
+                vector_results = vec_future.result()
+            except Exception:
+                _log.warning("Vector search failed, using empty results", exc_info=True)
+
+            # Collect FTS results
+            try:
+                exact_results, fuzzy_results = fts_future.result()
+            except Exception:
+                _log.warning("FTS search failed, using empty results", exc_info=True)
+
+        # 4. RRF fusion
+        fusion_input: dict[str, list[tuple[int, float]]] = {}
+        if vector_results:
+            fusion_input["vector"] = vector_results
+        if exact_results:
+            fusion_input["exact"] = exact_results
+        if fuzzy_results:
+            fusion_input["fuzzy"] = fuzzy_results
+
+        if not fusion_input:
+            return []
+
+        fused = reciprocal_rank_fusion(fusion_input, weights=weights, k=cfg.fusion_k)
+
+        # 5. Rerank top candidates
+        rerank_ids = [doc_id for doc_id, _ in fused[:50]]
+        contents = [self._fts.get_content(doc_id) for doc_id in rerank_ids]
+        rerank_scores = self._reranker.score_pairs(query, contents)
+
+        # 6. Sort by rerank score, build SearchResult list
+        ranked = sorted(
+            zip(rerank_ids, rerank_scores), key=lambda x: x[1], reverse=True
+        )
+
+        results: list[SearchResult] = []
+        for doc_id, score in ranked[:final_top_k]:
+            path = self._fts._conn.execute(
+                "SELECT path FROM docs_meta WHERE id = ?", (doc_id,)
+            ).fetchone()
+            results.append(
+                SearchResult(
+                    id=doc_id,
+                    path=path[0] if path else "",
+                    score=float(score),
+                    snippet=self._fts.get_content(doc_id)[:200],
+                )
+            )
+        return results