feat: Enhance search functionality with quality tiers and scoped indexing

- Updated `search_code` function to include a `quality` parameter for search quality tiers: "fast", "balanced", "thorough", and "auto". - Introduced `search_scope` function to limit search results to a specific directory scope. - Added `index_scope` function for indexing a specific directory without re-indexing the entire project. - Refactored `SearchPipeline` to support quality-based routing in the `search` method. - Implemented `Shard` and `ShardManager` classes to manage multiple index shards with LRU eviction and efficient file routing. - Added debounce functionality in `IncrementalIndexer` to batch file events and reduce redundant processing. - Enhanced `FileWatcher` to integrate with `IncrementalIndexer` for improved event handling.
2026-03-22 19:18:47 +08:00 · 2026-03-19 17:47:53 +08:00
parent 54071473fc
commit 18aff260a0
46 changed files with 1537 additions and 658 deletions
--- a/codex-lens-v2/src/codexlens_search/bridge.py
+++ b/codex-lens-v2/src/codexlens_search/bridge.py
@@ -124,6 +124,19 @@ def create_config_from_env(db_path: str | Path, **overrides: object) -> "Config"
        kwargs["hnsw_ef"] = int(os.environ["CODEXLENS_HNSW_EF"])
    if os.environ.get("CODEXLENS_HNSW_M"):
        kwargs["hnsw_M"] = int(os.environ["CODEXLENS_HNSW_M"])
+    # Tier config from env
+    if os.environ.get("CODEXLENS_TIER_HOT_HOURS"):
+        kwargs["tier_hot_hours"] = int(os.environ["CODEXLENS_TIER_HOT_HOURS"])
+    if os.environ.get("CODEXLENS_TIER_COLD_HOURS"):
+        kwargs["tier_cold_hours"] = int(os.environ["CODEXLENS_TIER_COLD_HOURS"])
+    # Search quality tier from env
+    if os.environ.get("CODEXLENS_SEARCH_QUALITY"):
+        kwargs["default_search_quality"] = os.environ["CODEXLENS_SEARCH_QUALITY"]
+    # Shard config from env
+    if os.environ.get("CODEXLENS_NUM_SHARDS"):
+        kwargs["num_shards"] = int(os.environ["CODEXLENS_NUM_SHARDS"])
+    if os.environ.get("CODEXLENS_MAX_LOADED_SHARDS"):
+        kwargs["max_loaded_shards"] = int(os.environ["CODEXLENS_MAX_LOADED_SHARDS"])
    resolved = Path(db_path).resolve()
    kwargs["metadata_db_path"] = str(resolved / "metadata.db")
    return Config(**kwargs)
@@ -143,28 +156,8 @@ def _create_config(args: argparse.Namespace) -> "Config":
    return create_config_from_env(args.db_path, **overrides)


-def create_pipeline(
-    db_path: str | Path,
-    config: "Config | None" = None,
-) -> tuple:
-    """Construct pipeline components from db_path and config.
-
-    Returns (indexing_pipeline, search_pipeline, config).
-    Used by both CLI bridge and MCP server.
-    """
-    from codexlens_search.config import Config
-    from codexlens_search.core.factory import create_ann_index, create_binary_index
-    from codexlens_search.indexing.metadata import MetadataStore
-    from codexlens_search.indexing.pipeline import IndexingPipeline
-    from codexlens_search.search.fts import FTSEngine
-    from codexlens_search.search.pipeline import SearchPipeline
-
-    if config is None:
-        config = create_config_from_env(db_path)
-    resolved = Path(db_path).resolve()
-    resolved.mkdir(parents=True, exist_ok=True)
-
-    # Select embedder: API if configured, otherwise local fastembed
+def _create_embedder(config: "Config"):
+    """Create embedder based on config, auto-detecting embed_dim from API."""
    if config.embed_api_url:
        from codexlens_search.embed.api import APIEmbedder
        embedder = APIEmbedder(config)
@@ -179,13 +172,11 @@ def create_pipeline(
    else:
        from codexlens_search.embed.local import FastEmbedEmbedder
        embedder = FastEmbedEmbedder(config)
+    return embedder

-    binary_store = create_binary_index(resolved, config.embed_dim, config)
-    ann_index = create_ann_index(resolved, config.embed_dim, config)
-    fts = FTSEngine(resolved / "fts.db")
-    metadata = MetadataStore(resolved / "metadata.db")

-    # Select reranker: API if configured, otherwise local fastembed
+def _create_reranker(config: "Config"):
+    """Create reranker based on config."""
    if config.reranker_api_url:
        from codexlens_search.rerank.api import APIReranker
        reranker = APIReranker(config)
@@ -193,6 +184,60 @@ def create_pipeline(
    else:
        from codexlens_search.rerank.local import FastEmbedReranker
        reranker = FastEmbedReranker(config)
+    return reranker
+
+
+def create_pipeline(
+    db_path: str | Path,
+    config: "Config | None" = None,
+) -> tuple:
+    """Construct pipeline components from db_path and config.
+
+    Returns (indexing_pipeline, search_pipeline, config).
+    Used by both CLI bridge and MCP server.
+
+    When config.num_shards > 1, returns a ShardManager-backed pipeline
+    where indexing and search are delegated to the ShardManager.
+    The returned tuple is (shard_manager, shard_manager, config) so that
+    callers can use shard_manager.sync() and shard_manager.search().
+    """
+    from codexlens_search.config import Config
+
+    if config is None:
+        config = create_config_from_env(db_path)
+    resolved = Path(db_path).resolve()
+    resolved.mkdir(parents=True, exist_ok=True)
+
+    embedder = _create_embedder(config)
+    reranker = _create_reranker(config)
+
+    # Sharded mode: delegate to ShardManager
+    if config.num_shards > 1:
+        from codexlens_search.core.shard_manager import ShardManager
+        manager = ShardManager(
+            num_shards=config.num_shards,
+            db_path=resolved,
+            config=config,
+            embedder=embedder,
+            reranker=reranker,
+        )
+        log.info(
+            "Using ShardManager with %d shards (max_loaded=%d)",
+            config.num_shards, config.max_loaded_shards,
+        )
+        return manager, manager, config
+
+    # Single-shard mode: original behavior, no ShardManager overhead
+    from codexlens_search.core.factory import create_ann_index, create_binary_index
+    from codexlens_search.indexing.metadata import MetadataStore
+    from codexlens_search.indexing.pipeline import IndexingPipeline
+    from codexlens_search.search.fts import FTSEngine
+    from codexlens_search.search.pipeline import SearchPipeline
+
+    binary_store = create_binary_index(resolved, config.embed_dim, config)
+    ann_index = create_ann_index(resolved, config.embed_dim, config)
+    fts = FTSEngine(resolved / "fts.db")
+    metadata = MetadataStore(resolved / "metadata.db")

    indexing = IndexingPipeline(
        embedder=embedder,
--- a/codex-lens-v2/src/codexlens_search/config.py
+++ b/codex-lens-v2/src/codexlens_search/config.py
@@ -47,7 +47,7 @@ class Config:

    # Backend selection: 'auto', 'faiss', 'hnswlib'
    ann_backend: str = "auto"
-    binary_backend: str = "auto"
+    binary_backend: str = "faiss"

    # Indexing pipeline
    index_workers: int = 2  # number of parallel indexing workers
@@ -77,6 +77,17 @@ class Config:
    # Metadata store
    metadata_db_path: str = ""  # empty = no metadata tracking

+    # Data tiering (hot/warm/cold)
+    tier_hot_hours: int = 24  # files accessed within this window are 'hot'
+    tier_cold_hours: int = 168  # files not accessed for this long are 'cold'
+
+    # Search quality tier: 'fast', 'balanced', 'thorough', 'auto'
+    default_search_quality: str = "auto"
+
+    # Shard partitioning
+    num_shards: int = 1  # 1 = single partition (no sharding), >1 = hash-based sharding
+    max_loaded_shards: int = 4  # LRU limit for loaded shards in ShardManager
+
    # FTS
    fts_top_k: int = 50

--- a/codex-lens-v2/src/codexlens_search/core/binary.py
+++ b/codex-lens-v2/src/codexlens_search/core/binary.py
@@ -15,6 +15,13 @@ logger = logging.getLogger(__name__)
 class BinaryStore(BaseBinaryIndex):
    """Persistent binary vector store using numpy memmap.

+    .. deprecated::
+        Prefer ``FAISSBinaryIndex`` for binary coarse search. This class is
+        retained as a numpy-only fallback for environments where FAISS is not
+        available. New code should use ``create_binary_index()`` from
+        ``codexlens_search.core.factory`` which selects the best backend
+        automatically.
+
    Stores binary-quantized float32 vectors as packed uint8 arrays on disk.
    Supports fast coarse search via XOR + popcount Hamming distance.
    """
--- a/codex-lens-v2/src/codexlens_search/core/factory.py
+++ b/codex-lens-v2/src/codexlens_search/core/factory.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import logging
+import warnings
 from pathlib import Path

 from codexlens_search.config import Config
@@ -97,14 +98,29 @@ def create_binary_index(
    backend = config.binary_backend

    if backend == "faiss":
-        from codexlens_search.core.faiss_index import FAISSBinaryIndex
-        return FAISSBinaryIndex(path, dim, config)
+        if _FAISS_AVAILABLE:
+            from codexlens_search.core.faiss_index import FAISSBinaryIndex
+            return FAISSBinaryIndex(path, dim, config)
+        # FAISS explicitly requested but not installed: fall back with warning
+        from codexlens_search.core.binary import BinaryStore
+        warnings.warn(
+            "binary_backend='faiss' but FAISS is not installed. "
+            "Falling back to deprecated numpy BinaryStore. "
+            "Install faiss-cpu or faiss-gpu for the recommended binary backend.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        logger.warning(
+            "binary_backend='faiss' but FAISS not available, "
+            "falling back to deprecated numpy BinaryStore."
+        )
+        return BinaryStore(path, dim, config)

    if backend == "hnswlib":
        from codexlens_search.core.binary import BinaryStore
        return BinaryStore(path, dim, config)

-    # auto: try faiss first, then numpy-based BinaryStore
+    # auto: try faiss first, then numpy-based BinaryStore (deprecated fallback)
    if _FAISS_AVAILABLE:
        from codexlens_search.core.faiss_index import FAISSBinaryIndex
        logger.info("Auto-selected FAISS binary backend")
@@ -112,5 +128,14 @@ def create_binary_index(

    # numpy BinaryStore is always available (no extra deps)
    from codexlens_search.core.binary import BinaryStore
-    logger.info("Auto-selected numpy BinaryStore backend")
+    warnings.warn(
+        "Falling back to numpy BinaryStore because FAISS is not installed. "
+        "BinaryStore is deprecated; install faiss-cpu or faiss-gpu for better performance.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    logger.warning(
+        "FAISS not available, falling back to deprecated numpy BinaryStore. "
+        "Install faiss-cpu or faiss-gpu for the recommended binary backend."
+    )
    return BinaryStore(path, dim, config)
--- a/codex-lens-v2/src/codexlens_search/core/faiss_index.py
+++ b/codex-lens-v2/src/codexlens_search/core/faiss_index.py
@@ -71,10 +71,23 @@ class FAISSANNIndex(BaseANNIndex):
        self.load()

    def load(self) -> None:
-        """Load index from disk or initialize a fresh one."""
+        """Load index from disk or initialize a fresh one.
+
+        Uses IO_FLAG_MMAP for zero-copy memory-mapped loading when available,
+        falling back to regular read_index() on older faiss versions.
+        """
        with self._lock:
            if self._index_path.exists():
-                idx = faiss.read_index(str(self._index_path))
+                try:
+                    idx = faiss.read_index(
+                        str(self._index_path), faiss.IO_FLAG_MMAP
+                    )
+                except (AttributeError, RuntimeError, Exception) as exc:
+                    logger.debug(
+                        "MMAP load failed, falling back to regular read: %s",
+                        exc,
+                    )
+                    idx = faiss.read_index(str(self._index_path))
                logger.debug(
                    "Loaded FAISS ANN index from %s (%d items)",
                    self._index_path, idx.ntotal,
@@ -201,10 +214,23 @@ class FAISSBinaryIndex(BaseBinaryIndex):
        return np.packbits(binary).reshape(1, -1)

    def load(self) -> None:
-        """Load binary index from disk or initialize a fresh one."""
+        """Load binary index from disk or initialize a fresh one.
+
+        Uses IO_FLAG_MMAP for zero-copy memory-mapped loading when available,
+        falling back to regular read_index_binary() on older faiss versions.
+        """
        with self._lock:
            if self._index_path.exists():
-                idx = faiss.read_index_binary(str(self._index_path))
+                try:
+                    idx = faiss.read_index_binary(
+                        str(self._index_path), faiss.IO_FLAG_MMAP
+                    )
+                except (AttributeError, RuntimeError, Exception) as exc:
+                    logger.debug(
+                        "MMAP load failed, falling back to regular read: %s",
+                        exc,
+                    )
+                    idx = faiss.read_index_binary(str(self._index_path))
                logger.debug(
                    "Loaded FAISS binary index from %s (%d items)",
                    self._index_path, idx.ntotal,
--- a/codex-lens-v2/src/codexlens_search/core/shard.py
+++ b/codex-lens-v2/src/codexlens_search/core/shard.py
@@ -0,0 +1,178 @@
+"""Single index partition (shard) that owns FTS, binary, ANN, and metadata stores."""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from codexlens_search.config import Config
+from codexlens_search.core.base import BaseANNIndex, BaseBinaryIndex
+from codexlens_search.embed.base import BaseEmbedder
+from codexlens_search.indexing.metadata import MetadataStore
+from codexlens_search.indexing.pipeline import IndexingPipeline, IndexStats
+from codexlens_search.rerank import BaseReranker
+from codexlens_search.search.fts import FTSEngine
+from codexlens_search.search.pipeline import SearchPipeline, SearchResult
+
+logger = logging.getLogger(__name__)
+
+
+class Shard:
+    """A complete index partition with its own FTS, binary, ANN, and metadata stores.
+
+    Components are lazy-loaded on first access and can be explicitly unloaded
+    to release memory. The embedder and reranker are shared across shards
+    (passed in from ShardManager) since they are expensive to instantiate.
+    """
+
+    def __init__(
+        self,
+        shard_id: int,
+        db_path: str | Path,
+        config: Config,
+    ) -> None:
+        self._shard_id = shard_id
+        self._shard_dir = Path(db_path).resolve() / f"shard_{shard_id}"
+        self._config = config
+
+        # Lazy-loaded components (created on _ensure_loaded)
+        self._fts: FTSEngine | None = None
+        self._binary_store: BaseBinaryIndex | None = None
+        self._ann_index: BaseANNIndex | None = None
+        self._metadata: MetadataStore | None = None
+        self._indexing: IndexingPipeline | None = None
+        self._search: SearchPipeline | None = None
+        self._loaded = False
+
+    @property
+    def shard_id(self) -> int:
+        return self._shard_id
+
+    @property
+    def is_loaded(self) -> bool:
+        return self._loaded
+
+    def _ensure_loaded(
+        self,
+        embedder: BaseEmbedder,
+        reranker: BaseReranker,
+    ) -> None:
+        """Lazy-create all per-shard components if not yet loaded."""
+        if self._loaded:
+            return
+
+        from codexlens_search.core.factory import create_ann_index, create_binary_index
+
+        self._shard_dir.mkdir(parents=True, exist_ok=True)
+
+        self._fts = FTSEngine(self._shard_dir / "fts.db")
+        self._binary_store = create_binary_index(
+            self._shard_dir, self._config.embed_dim, self._config
+        )
+        self._ann_index = create_ann_index(
+            self._shard_dir, self._config.embed_dim, self._config
+        )
+        self._metadata = MetadataStore(self._shard_dir / "metadata.db")
+
+        self._indexing = IndexingPipeline(
+            embedder=embedder,
+            binary_store=self._binary_store,
+            ann_index=self._ann_index,
+            fts=self._fts,
+            config=self._config,
+            metadata=self._metadata,
+        )
+
+        self._search = SearchPipeline(
+            embedder=embedder,
+            binary_store=self._binary_store,
+            ann_index=self._ann_index,
+            reranker=reranker,
+            fts=self._fts,
+            config=self._config,
+            metadata_store=self._metadata,
+        )
+
+        self._loaded = True
+        logger.debug("Shard %d loaded from %s", self._shard_id, self._shard_dir)
+
+    def unload(self) -> None:
+        """Release memory by closing connections and dropping references."""
+        if not self._loaded:
+            return
+
+        if self._metadata is not None:
+            self._metadata.close()
+
+        self._fts = None
+        self._binary_store = None
+        self._ann_index = None
+        self._metadata = None
+        self._indexing = None
+        self._search = None
+        self._loaded = False
+        logger.debug("Shard %d unloaded", self._shard_id)
+
+    def load(
+        self,
+        embedder: BaseEmbedder,
+        reranker: BaseReranker,
+    ) -> None:
+        """Explicitly load shard components."""
+        self._ensure_loaded(embedder, reranker)
+
+    def save(self) -> None:
+        """Persist binary and ANN indexes to disk."""
+        if not self._loaded:
+            return
+        if self._binary_store is not None:
+            self._binary_store.save()
+        if self._ann_index is not None:
+            self._ann_index.save()
+
+    def search(
+        self,
+        query: str,
+        embedder: BaseEmbedder,
+        reranker: BaseReranker,
+        quality: str | None = None,
+        top_k: int | None = None,
+    ) -> list[SearchResult]:
+        """Search this shard's index.
+
+        Args:
+            query: Search query string.
+            embedder: Shared embedder instance.
+            reranker: Shared reranker instance.
+            quality: Search quality tier.
+            top_k: Maximum results to return.
+
+        Returns:
+            List of SearchResult from this shard.
+        """
+        self._ensure_loaded(embedder, reranker)
+        assert self._search is not None
+        return self._search.search(query, top_k=top_k, quality=quality)
+
+    def sync(
+        self,
+        files: list[Path],
+        root: Path | None,
+        embedder: BaseEmbedder,
+        reranker: BaseReranker,
+        **kwargs: object,
+    ) -> IndexStats:
+        """Sync this shard's index with the given files.
+
+        Args:
+            files: Files that belong to this shard.
+            root: Root directory for relative paths.
+            embedder: Shared embedder instance.
+            reranker: Shared reranker instance.
+            **kwargs: Forwarded to IndexingPipeline.sync().
+
+        Returns:
+            IndexStats for this shard's sync operation.
+        """
+        self._ensure_loaded(embedder, reranker)
+        assert self._indexing is not None
+        return self._indexing.sync(files, root=root, **kwargs)
--- a/codex-lens-v2/src/codexlens_search/core/shard_manager.py
+++ b/codex-lens-v2/src/codexlens_search/core/shard_manager.py
@@ -0,0 +1,250 @@
+"""ShardManager: manages multiple Shard instances with LRU eviction."""
+from __future__ import annotations
+
+import logging
+import threading
+from collections import OrderedDict
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+
+from codexlens_search.config import Config
+from codexlens_search.core.shard import Shard
+from codexlens_search.embed.base import BaseEmbedder
+from codexlens_search.indexing.pipeline import IndexStats
+from codexlens_search.rerank import BaseReranker
+from codexlens_search.search.fusion import reciprocal_rank_fusion
+from codexlens_search.search.pipeline import SearchResult
+
+logger = logging.getLogger(__name__)
+
+
+class ShardManager:
+    """Manages multiple Shard instances with hash-based file routing and LRU eviction.
+
+    Files are deterministically routed to shards via hash(path) % num_shards.
+    Search queries all shards in parallel and merges results via RRF fusion.
+    At most max_loaded_shards are kept in memory; least-recently-used shards
+    are unloaded when the limit is exceeded.
+    """
+
+    def __init__(
+        self,
+        num_shards: int,
+        db_path: str | Path,
+        config: Config,
+        embedder: BaseEmbedder,
+        reranker: BaseReranker,
+    ) -> None:
+        if num_shards < 1:
+            raise ValueError("num_shards must be >= 1")
+
+        self._num_shards = num_shards
+        self._db_path = Path(db_path).resolve()
+        self._config = config
+        self._embedder = embedder
+        self._reranker = reranker
+        self._max_loaded = config.max_loaded_shards
+
+        # Create all Shard objects (lazy-loaded, no I/O yet)
+        self._shards: dict[int, Shard] = {
+            i: Shard(i, self._db_path, config)
+            for i in range(num_shards)
+        }
+
+        # LRU tracking: keys are shard_ids, most-recently-used at end
+        self._loaded_order: OrderedDict[int, None] = OrderedDict()
+        self._lru_lock = threading.Lock()
+
+    @property
+    def num_shards(self) -> int:
+        return self._num_shards
+
+    def route_file(self, path: str) -> int:
+        """Deterministically route a file path to a shard ID.
+
+        Uses hash(path) % num_shards for uniform distribution.
+        """
+        return hash(path) % self._num_shards
+
+    def get_shard(self, shard_id: int) -> Shard:
+        """Return the Shard instance for a given shard_id."""
+        if shard_id not in self._shards:
+            raise ValueError(
+                f"Invalid shard_id {shard_id}, valid range: 0-{self._num_shards - 1}"
+            )
+        return self._shards[shard_id]
+
+    def _ensure_loaded(self, shard_id: int) -> Shard:
+        """Load a shard if needed, applying LRU eviction policy.
+
+        Thread-safe: protects OrderedDict mutations with a lock.
+        Returns the loaded Shard.
+        """
+        shard = self._shards[shard_id]
+
+        with self._lru_lock:
+            # Mark as most-recently-used
+            if shard_id in self._loaded_order:
+                self._loaded_order.move_to_end(shard_id)
+            else:
+                self._loaded_order[shard_id] = None
+
+            # Load if not already loaded
+            if not shard.is_loaded:
+                shard.load(self._embedder, self._reranker)
+
+            # Evict LRU shards if over limit
+            while len(self._loaded_order) > self._max_loaded:
+                evict_id, _ = self._loaded_order.popitem(last=False)
+                evict_shard = self._shards[evict_id]
+                if evict_shard.is_loaded:
+                    logger.info("LRU evicting shard %d", evict_id)
+                    evict_shard.unload()
+
+        return shard
+
+    def sync(
+        self,
+        files: list[Path],
+        root: Path | None = None,
+        **kwargs: object,
+    ) -> IndexStats:
+        """Sync index with files, routing each file to its shard.
+
+        Groups files by shard via route_file(), then syncs each shard
+        with its subset of files.
+
+        Args:
+            files: Current list of files to index.
+            root: Root directory for relative paths.
+            **kwargs: Forwarded to Shard.sync().
+
+        Returns:
+            Aggregated IndexStats across all shards.
+        """
+        # Group files by shard
+        shard_files: dict[int, list[Path]] = {i: [] for i in range(self._num_shards)}
+        for fpath in files:
+            rel = str(fpath.relative_to(root)) if root else str(fpath)
+            shard_id = self.route_file(rel)
+            shard_files[shard_id].append(fpath)
+
+        total_files = 0
+        total_chunks = 0
+        total_duration = 0.0
+
+        for shard_id, shard_file_list in shard_files.items():
+            if not shard_file_list:
+                continue
+            self._ensure_loaded(shard_id)
+            shard = self._shards[shard_id]
+            stats = shard.sync(
+                shard_file_list,
+                root=root,
+                embedder=self._embedder,
+                reranker=self._reranker,
+                **kwargs,
+            )
+            total_files += stats.files_processed
+            total_chunks += stats.chunks_created
+            total_duration += stats.duration_seconds
+
+        return IndexStats(
+            files_processed=total_files,
+            chunks_created=total_chunks,
+            duration_seconds=round(total_duration, 2),
+        )
+
+    def search(
+        self,
+        query: str,
+        quality: str | None = None,
+        top_k: int | None = None,
+    ) -> list[SearchResult]:
+        """Search all shards in parallel, merge results via RRF fusion.
+
+        Each shard returns its own ranked results. Cross-shard merging
+        uses reciprocal_rank_fusion with equal weights across shards.
+        Per-shard top_k is increased to compensate for cross-shard dilution.
+
+        Args:
+            query: Search query string.
+            quality: Search quality tier.
+            top_k: Maximum final results to return.
+
+        Returns:
+            Merged list of SearchResult ordered by relevance.
+        """
+        cfg = self._config
+        final_top_k = top_k if top_k is not None else cfg.reranker_top_k
+
+        # Increase per-shard top_k to get enough candidates for cross-shard RRF
+        per_shard_top_k = max(final_top_k, final_top_k * 2)
+
+        # Load all shards for search
+        for shard_id in range(self._num_shards):
+            self._ensure_loaded(shard_id)
+
+        # Parallel search across shards
+        shard_results: dict[int, list[SearchResult]] = {}
+
+        def _search_shard(sid: int) -> tuple[int, list[SearchResult]]:
+            shard = self._shards[sid]
+            results = shard.search(
+                query,
+                embedder=self._embedder,
+                reranker=self._reranker,
+                quality=quality,
+                top_k=per_shard_top_k,
+            )
+            return sid, results
+
+        with ThreadPoolExecutor(max_workers=min(self._num_shards, 4)) as pool:
+            futures = [pool.submit(_search_shard, sid) for sid in range(self._num_shards)]
+            for future in futures:
+                try:
+                    sid, results = future.result()
+                    shard_results[sid] = results
+                except Exception:
+                    logger.warning("Shard search failed", exc_info=True)
+
+        # If only one shard returned results, no merging needed
+        non_empty = {k: v for k, v in shard_results.items() if v}
+        if not non_empty:
+            return []
+        if len(non_empty) == 1:
+            results = list(non_empty.values())[0]
+            return results[:final_top_k]
+
+        # Cross-shard RRF merge
+        # Build ranked lists keyed by shard name, with (doc_id, score) tuples
+        # Use a global result map to look up SearchResult by a unique key
+        # Since doc_ids are shard-local, we need a composite key
+        rrf_input: dict[str, list[tuple[int, float]]] = {}
+        global_results: dict[int, SearchResult] = {}
+        global_id = 0
+
+        for sid, results in non_empty.items():
+            ranked: list[tuple[int, float]] = []
+            for r in results:
+                global_results[global_id] = r
+                ranked.append((global_id, r.score))
+                global_id += 1
+            rrf_input[f"shard_{sid}"] = ranked
+
+        fused = reciprocal_rank_fusion(rrf_input, k=cfg.fusion_k)
+
+        merged: list[SearchResult] = []
+        for gid, fused_score in fused[:final_top_k]:
+            result = global_results[gid]
+            merged.append(SearchResult(
+                id=result.id,
+                path=result.path,
+                score=fused_score,
+                snippet=result.snippet,
+                line=result.line,
+                end_line=result.end_line,
+                content=result.content,
+            ))
+
+        return merged
--- a/codex-lens-v2/src/codexlens_search/indexing/metadata.py
+++ b/codex-lens-v2/src/codexlens_search/indexing/metadata.py
@@ -2,6 +2,7 @@
 from __future__ import annotations

 import sqlite3
+import time
 from pathlib import Path


@@ -9,7 +10,8 @@ class MetadataStore:
    """Tracks file-to-chunk mappings and deleted chunk IDs (tombstones).

    Tables:
-        files      - file_path (PK), content_hash, last_modified
+        files      - file_path (PK), content_hash, last_modified, file_size,
+                     tier ('hot'/'warm'/'cold'), last_accessed (epoch float)
        chunks     - chunk_id (PK), file_path (FK CASCADE), chunk_hash
        deleted_chunks - chunk_id (PK) for tombstone tracking
    """
@@ -19,13 +21,18 @@ class MetadataStore:
        self._conn.execute("PRAGMA foreign_keys = ON")
        self._conn.execute("PRAGMA journal_mode = WAL")
        self._create_tables()
+        self._migrate_size_column()
+        self._migrate_tier_columns()

    def _create_tables(self) -> None:
        self._conn.executescript("""
            CREATE TABLE IF NOT EXISTS files (
                file_path   TEXT PRIMARY KEY,
                content_hash TEXT NOT NULL,
-                last_modified REAL NOT NULL
+                last_modified REAL NOT NULL,
+                file_size   INTEGER NOT NULL DEFAULT 0,
+                tier        TEXT NOT NULL DEFAULT 'warm',
+                last_accessed REAL
            );

            CREATE TABLE IF NOT EXISTS chunks (
@@ -41,14 +48,48 @@ class MetadataStore:
        """)
        self._conn.commit()

+    def _migrate_size_column(self) -> None:
+        """Add file_size column if missing (for pre-existing DBs)."""
+        cols = {
+            row[1]
+            for row in self._conn.execute("PRAGMA table_info(files)").fetchall()
+        }
+        if "file_size" not in cols:
+            self._conn.execute(
+                "ALTER TABLE files ADD COLUMN file_size INTEGER NOT NULL DEFAULT 0"
+            )
+            self._conn.commit()
+
+    def _migrate_tier_columns(self) -> None:
+        """Add tier and last_accessed columns if missing (for pre-existing DBs)."""
+        cols = {
+            row[1]
+            for row in self._conn.execute("PRAGMA table_info(files)").fetchall()
+        }
+        if "tier" not in cols:
+            self._conn.execute(
+                "ALTER TABLE files ADD COLUMN tier TEXT NOT NULL DEFAULT 'warm'"
+            )
+        if "last_accessed" not in cols:
+            self._conn.execute(
+                "ALTER TABLE files ADD COLUMN last_accessed REAL"
+            )
+        if "tier" not in cols or "last_accessed" not in cols:
+            self._conn.commit()
+
    def register_file(
-        self, file_path: str, content_hash: str, mtime: float
+        self,
+        file_path: str,
+        content_hash: str,
+        mtime: float,
+        file_size: int = 0,
    ) -> None:
        """Insert or update a file record."""
        self._conn.execute(
-            "INSERT OR REPLACE INTO files (file_path, content_hash, last_modified) "
-            "VALUES (?, ?, ?)",
-            (file_path, content_hash, mtime),
+            "INSERT OR REPLACE INTO files "
+            "(file_path, content_hash, last_modified, file_size) "
+            "VALUES (?, ?, ?, ?)",
+            (file_path, content_hash, mtime, file_size),
        )
        self._conn.commit()

@@ -121,6 +162,24 @@ class MetadataStore:
            return True  # New file
        return stored != content_hash

+    def file_needs_update_fast(
+        self, file_path: str, mtime: float, size: int
+    ) -> bool:
+        """Fast pre-check using mtime and file size (no content read needed).
+
+        Returns True if the file appears changed or is not yet tracked.
+        When mtime and size both match stored values, the file is assumed
+        unchanged (~1000x faster than content-hash comparison).
+        """
+        row = self._conn.execute(
+            "SELECT last_modified, file_size FROM files WHERE file_path = ?",
+            (file_path,),
+        ).fetchone()
+        if row is None:
+            return True  # New file
+        stored_mtime, stored_size = row
+        return stored_mtime != mtime or stored_size != size
+
    def compact_deleted(self) -> set[int]:
        """Return deleted IDs and clear the deleted_chunks table.

@@ -161,5 +220,81 @@ class MetadataStore:
        ).fetchone()
        return row[0] if row[0] is not None else -1

+    # ------------------------------------------------------------------
+    # Tier management
+    # ------------------------------------------------------------------
+
+    def record_access(self, file_path: str) -> None:
+        """Update last_accessed timestamp for a file."""
+        self._conn.execute(
+            "UPDATE files SET last_accessed = ? WHERE file_path = ?",
+            (time.time(), file_path),
+        )
+        self._conn.commit()
+
+    def record_access_batch(self, file_paths: list[str]) -> None:
+        """Batch-update last_accessed timestamps for multiple files."""
+        if not file_paths:
+            return
+        now = time.time()
+        self._conn.executemany(
+            "UPDATE files SET last_accessed = ? WHERE file_path = ?",
+            [(now, fp) for fp in file_paths],
+        )
+        self._conn.commit()
+
+    def classify_tiers(
+        self, hot_threshold_hours: int = 24, cold_threshold_hours: int = 168
+    ) -> None:
+        """Reclassify all files into hot/warm/cold tiers based on last_accessed.
+
+        - hot:  last_accessed within hot_threshold_hours
+        - cold: last_accessed older than cold_threshold_hours (or never accessed)
+        - warm: everything in between
+        """
+        now = time.time()
+        hot_cutoff = now - hot_threshold_hours * 3600
+        cold_cutoff = now - cold_threshold_hours * 3600
+
+        # Hot: recently accessed
+        self._conn.execute(
+            "UPDATE files SET tier = 'hot' "
+            "WHERE last_accessed IS NOT NULL AND last_accessed >= ?",
+            (hot_cutoff,),
+        )
+        # Cold: not accessed for a long time, or never accessed
+        self._conn.execute(
+            "UPDATE files SET tier = 'cold' "
+            "WHERE last_accessed IS NULL "
+            "OR (last_accessed < ? AND last_accessed < ?)",
+            (cold_cutoff, hot_cutoff),
+        )
+        # Warm: between hot and cold cutoffs
+        self._conn.execute(
+            "UPDATE files SET tier = 'warm' "
+            "WHERE last_accessed IS NOT NULL "
+            "AND last_accessed >= ? AND last_accessed < ?",
+            (cold_cutoff, hot_cutoff),
+        )
+        self._conn.commit()
+
+    def get_files_by_tier(self, tier: str) -> list[str]:
+        """Return file paths in the specified tier ('hot', 'warm', or 'cold')."""
+        rows = self._conn.execute(
+            "SELECT file_path FROM files WHERE tier = ?", (tier,)
+        ).fetchall()
+        return [r[0] for r in rows]
+
+    def get_cold_files(self) -> list[str]:
+        """Return file paths in the 'cold' tier."""
+        return self.get_files_by_tier("cold")
+
+    def get_file_tier(self, file_path: str) -> str | None:
+        """Return the tier for a specific file, or None if not tracked."""
+        row = self._conn.execute(
+            "SELECT tier FROM files WHERE file_path = ?", (file_path,)
+        ).fetchone()
+        return row[0] if row else None
+
    def close(self) -> None:
        self._conn.close()
--- a/codex-lens-v2/src/codexlens_search/indexing/pipeline.py
+++ b/codex-lens-v2/src/codexlens_search/indexing/pipeline.py
@@ -17,8 +17,7 @@ from pathlib import Path
 import numpy as np

 from codexlens_search.config import Config
-from codexlens_search.core.binary import BinaryStore
-from codexlens_search.core.index import ANNIndex
+from codexlens_search.core.base import BaseANNIndex, BaseBinaryIndex
 from codexlens_search.embed.base import BaseEmbedder
 from codexlens_search.indexing.metadata import MetadataStore
 from codexlens_search.search.fts import FTSEngine
@@ -100,8 +99,8 @@ class IndexingPipeline:
    def __init__(
        self,
        embedder: BaseEmbedder,
-        binary_store: BinaryStore,
-        ann_index: ANNIndex,
+        binary_store: BaseBinaryIndex,
+        ann_index: BaseANNIndex,
        fts: FTSEngine,
        config: Config,
        metadata: MetadataStore | None = None,
@@ -463,6 +462,94 @@ class IndexingPipeline:
        meta = self._require_metadata()
        return meta.max_chunk_id() + 1

+    def index_files_fts_only(
+        self,
+        files: list[Path],
+        *,
+        root: Path | None = None,
+        max_chunk_chars: int = _DEFAULT_MAX_CHUNK_CHARS,
+        chunk_overlap: int = _DEFAULT_CHUNK_OVERLAP,
+    ) -> IndexStats:
+        """Index files into FTS5 only, without embedding or vector indexing.
+
+        Chunks files using the same logic as the full pipeline, then inserts
+        directly into FTS. No embedding computation, no binary/ANN store writes.
+
+        Args:
+            files: List of file paths to index.
+            root: Optional root for computing relative paths.
+            max_chunk_chars: Maximum characters per chunk.
+            chunk_overlap: Character overlap between consecutive chunks.
+
+        Returns:
+            IndexStats with counts and timing.
+        """
+        if not files:
+            return IndexStats()
+
+        meta = self._require_metadata()
+        t0 = time.monotonic()
+        chunk_id = self._next_chunk_id()
+        files_processed = 0
+        chunks_created = 0
+
+        for fpath in files:
+            exclude_reason = is_file_excluded(fpath, self._config)
+            if exclude_reason:
+                logger.debug("Skipping %s: %s", fpath, exclude_reason)
+                continue
+            try:
+                text = fpath.read_text(encoding="utf-8", errors="replace")
+            except Exception as exc:
+                logger.debug("Skipping %s: %s", fpath, exc)
+                continue
+
+            rel_path = str(fpath.relative_to(root)) if root else str(fpath)
+            content_hash = self._content_hash(text)
+
+            # Skip unchanged files
+            if not meta.file_needs_update(rel_path, content_hash):
+                continue
+
+            # Remove old FTS data if file was previously indexed
+            if meta.get_file_hash(rel_path) is not None:
+                meta.mark_file_deleted(rel_path)
+                self._fts.delete_by_path(rel_path)
+
+            file_chunks = self._smart_chunk(text, rel_path, max_chunk_chars, chunk_overlap)
+            if not file_chunks:
+                st = fpath.stat()
+                meta.register_file(rel_path, content_hash, st.st_mtime, st.st_size)
+                continue
+
+            files_processed += 1
+            fts_docs = []
+            chunk_id_hashes = []
+            for chunk_text, path, sl, el in file_chunks:
+                fts_docs.append((chunk_id, path, chunk_text, sl, el))
+                chunk_id_hashes.append((chunk_id, self._content_hash(chunk_text)))
+                chunk_id += 1
+
+            self._fts.add_documents(fts_docs)
+            chunks_created += len(fts_docs)
+
+            # Register metadata
+            st = fpath.stat()
+            meta.register_file(rel_path, content_hash, st.st_mtime, st.st_size)
+            meta.register_chunks(rel_path, chunk_id_hashes)
+
+        duration = time.monotonic() - t0
+        stats = IndexStats(
+            files_processed=files_processed,
+            chunks_created=chunks_created,
+            duration_seconds=round(duration, 2),
+        )
+        logger.info(
+            "FTS-only indexing complete: %d files, %d chunks in %.1fs",
+            stats.files_processed, stats.chunks_created, stats.duration_seconds,
+        )
+        return stats
+
    def index_file(
        self,
        file_path: Path,
@@ -522,7 +609,8 @@ class IndexingPipeline:
        file_chunks = self._smart_chunk(text, rel_path, max_chunk_chars, chunk_overlap)
        if not file_chunks:
            # Register file with no chunks
-            meta.register_file(rel_path, content_hash, file_path.stat().st_mtime)
+            st = file_path.stat()
+            meta.register_file(rel_path, content_hash, st.st_mtime, st.st_size)
            return IndexStats(
                files_processed=1,
                duration_seconds=round(time.monotonic() - t0, 2),
@@ -556,7 +644,8 @@ class IndexingPipeline:
        self._fts.add_documents(fts_docs)

        # Register in metadata
-        meta.register_file(rel_path, content_hash, file_path.stat().st_mtime)
+        st = file_path.stat()
+        meta.register_file(rel_path, content_hash, st.st_mtime, st.st_size)
        chunk_id_hashes = [
            (batch_ids[i], self._content_hash(batch_texts[i]))
            for i in range(len(batch_ids))
@@ -605,6 +694,7 @@ class IndexingPipeline:
        chunk_overlap: int = _DEFAULT_CHUNK_OVERLAP,
        max_file_size: int = 50_000,
        progress_callback: callable | None = None,
+        tier: str = "full",
    ) -> IndexStats:
        """Reconcile index state against a current file list.

@@ -617,6 +707,9 @@ class IndexingPipeline:
            max_chunk_chars: Maximum characters per chunk.
            chunk_overlap: Character overlap between consecutive chunks.
            max_file_size: Skip files larger than this (bytes).
+            tier: Indexing tier - 'full' (default) runs the full pipeline
+                  with embedding, 'fts_only' runs FTS-only indexing without
+                  embedding or vector stores.

        Returns:
            Aggregated IndexStats for all operations.
@@ -638,33 +731,72 @@ class IndexingPipeline:
        for rel in removed:
            self.remove_file(rel)

-        # Collect files needing update
+        # Collect files needing update using 4-level detection:
+        # Level 1: set diff (removed files) - handled above
+        # Level 2: mtime + size fast pre-check via stat()
+        # Level 3: content hash only when mtime/size mismatch
        files_to_index: list[Path] = []
        for rel, fpath in current_rel_paths.items():
+            # Level 2: stat-based fast check
+            try:
+                st = fpath.stat()
+            except OSError:
+                continue
+            if not meta.file_needs_update_fast(rel, st.st_mtime, st.st_size):
+                # mtime + size match stored values -> skip (no read needed)
+                continue
+
+            # Level 3: mtime/size changed -> verify with content hash
            try:
                text = fpath.read_text(encoding="utf-8", errors="replace")
            except Exception:
                continue
            content_hash = self._content_hash(text)
-            if meta.file_needs_update(rel, content_hash):
-                # Remove old data if previously indexed
-                if meta.get_file_hash(rel) is not None:
-                    meta.mark_file_deleted(rel)
-                    self._fts.delete_by_path(rel)
-                files_to_index.append(fpath)
+            if not meta.file_needs_update(rel, content_hash):
+                # Content unchanged despite mtime/size change -> update metadata only
+                meta.register_file(rel, content_hash, st.st_mtime, st.st_size)
+                continue

-        # Batch index via parallel pipeline
+            # File genuinely changed -> remove old data and queue for re-index
+            if meta.get_file_hash(rel) is not None:
+                meta.mark_file_deleted(rel)
+                self._fts.delete_by_path(rel)
+            files_to_index.append(fpath)
+
+        # Sort files by data tier priority: hot first, then warm, then cold
        if files_to_index:
-            # Set starting chunk ID from metadata
-            start_id = self._next_chunk_id()
-            batch_stats = self._index_files_with_metadata(
-                files_to_index,
-                root=root,
-                max_chunk_chars=max_chunk_chars,
-                chunk_overlap=chunk_overlap,
-                start_chunk_id=start_id,
-                progress_callback=progress_callback,
-            )
+            _tier_priority = {"hot": 0, "warm": 1, "cold": 2}
+            def _tier_sort_key(fp: Path) -> int:
+                rel = str(fp.relative_to(root)) if root else str(fp)
+                t = meta.get_file_tier(rel)
+                return _tier_priority.get(t or "warm", 1)
+            files_to_index.sort(key=_tier_sort_key)
+
+        # Reclassify data tiers after sync detection
+        meta.classify_tiers(
+            self._config.tier_hot_hours, self._config.tier_cold_hours
+        )
+
+        # Batch index via parallel pipeline or FTS-only
+        if files_to_index:
+            if tier == "fts_only":
+                batch_stats = self.index_files_fts_only(
+                    files_to_index,
+                    root=root,
+                    max_chunk_chars=max_chunk_chars,
+                    chunk_overlap=chunk_overlap,
+                )
+            else:
+                # Full pipeline with embedding
+                start_id = self._next_chunk_id()
+                batch_stats = self._index_files_with_metadata(
+                    files_to_index,
+                    root=root,
+                    max_chunk_chars=max_chunk_chars,
+                    chunk_overlap=chunk_overlap,
+                    start_chunk_id=start_id,
+                    progress_callback=progress_callback,
+                )
            total_files = batch_stats.files_processed
            total_chunks = batch_stats.chunks_created
        else:
@@ -781,7 +913,8 @@ class IndexingPipeline:
            file_chunks = self._smart_chunk(text, rel_path, max_chunk_chars, chunk_overlap)

            if not file_chunks:
-                meta.register_file(rel_path, content_hash, fpath.stat().st_mtime)
+                st = fpath.stat()
+                meta.register_file(rel_path, content_hash, st.st_mtime, st.st_size)
                continue

            files_processed += 1
@@ -806,7 +939,8 @@ class IndexingPipeline:
            chunks_created += len(file_chunk_ids)

            # Register metadata per file
-            meta.register_file(rel_path, content_hash, fpath.stat().st_mtime)
+            st = fpath.stat()
+            meta.register_file(rel_path, content_hash, st.st_mtime, st.st_size)
            chunk_id_hashes = [
                (cid, self._content_hash(ct)) for cid, ct in file_chunk_ids
            ]
--- a/codex-lens-v2/src/codexlens_search/mcp_server.py
+++ b/codex-lens-v2/src/codexlens_search/mcp_server.py
@@ -102,13 +102,20 @@ def _get_pipelines(project_path: str) -> tuple:
 # ---------------------------------------------------------------------------

@mcp.tool()
-def search_code(project_path: str, query: str, top_k: int = 10) -> str:
+def search_code(
+    project_path: str, query: str, top_k: int = 10, quality: str = "auto"
+) -> str:
    """Semantic code search with hybrid fusion (vector + FTS + reranking).

    Args:
        project_path: Absolute path to the project root directory.
        query: Natural language or code search query.
        top_k: Maximum number of results to return (default 10).
+        quality: Search quality tier (default "auto"):
+            - "fast": FTS-only + rerank (no embedding needed, fastest)
+            - "balanced": FTS + binary coarse search + rerank
+            - "thorough": Full 2-stage vector + FTS + reranking (best quality)
+            - "auto": Uses "thorough" if vector index exists, else "fast"

    Returns:
        Search results as formatted text with file paths, line numbers, scores, and code snippets.
@@ -121,15 +128,75 @@ def search_code(project_path: str, query: str, top_k: int = 10) -> str:
    if not (db_path / "metadata.db").exists():
        return f"Error: no index found at {db_path}. Run index_project first."

+    valid_qualities = ("fast", "balanced", "thorough", "auto")
+    if quality not in valid_qualities:
+        return f"Error: invalid quality '{quality}'. Must be one of: {', '.join(valid_qualities)}"
+
    _, search, _ = _get_pipelines(project_path)
-    results = search.search(query, top_k=top_k)
+    results = search.search(query, top_k=top_k, quality=quality)

    if not results:
        return "No results found."

    lines = []
    for i, r in enumerate(results, 1):
-        lines.append(f"## Result {i} — {r.path} (L{r.line}-{r.end_line}, score: {r.score:.4f})")
+        lines.append(f"## Result {i} -- {r.path} (L{r.line}-{r.end_line}, score: {r.score:.4f})")
+        lines.append(f"```\n{r.content}\n```")
+        lines.append("")
+    return "\n".join(lines)
+
+
+@mcp.tool()
+def search_scope(
+    project_path: str,
+    query: str,
+    scope_path: str,
+    top_k: int = 10,
+    quality: str = "auto",
+) -> str:
+    """Search within a specific directory scope of a project.
+
+    Runs a normal search then filters results to only include files
+    under the specified scope path.
+
+    Args:
+        project_path: Absolute path to the project root directory.
+        query: Natural language or code search query.
+        scope_path: Relative directory path to limit search scope (e.g. "src/auth").
+        top_k: Maximum number of scoped results to return (default 10).
+        quality: Search quality tier ("fast", "balanced", "thorough", "auto").
+
+    Returns:
+        Search results filtered to the scope path.
+    """
+    root = Path(project_path).resolve()
+    if not root.is_dir():
+        return f"Error: project path not found: {root}"
+
+    db_path = _db_path_for_project(project_path)
+    if not (db_path / "metadata.db").exists():
+        return f"Error: no index found at {db_path}. Run index_project first."
+
+    # Normalize scope path for prefix matching
+    scope = scope_path.replace("\\", "/").strip("/")
+
+    _, search, _ = _get_pipelines(project_path)
+    # Fetch more results than top_k to account for filtering
+    all_results = search.search(query, top_k=top_k * 5, quality=quality)
+
+    # Filter by scope path prefix
+    scoped = [
+        r for r in all_results
+        if r.path.replace("\\", "/").startswith(scope + "/")
+        or r.path.replace("\\", "/") == scope
+    ]
+
+    if not scoped:
+        return f"No results found in scope '{scope_path}'."
+
+    lines = []
+    for i, r in enumerate(scoped[:top_k], 1):
+        lines.append(f"## Result {i} -- {r.path} (L{r.line}-{r.end_line}, score: {r.score:.4f})")
        lines.append(f"```\n{r.content}\n```")
        lines.append("")
    return "\n".join(lines)
@@ -275,6 +342,59 @@ async def index_update(
    )


+@mcp.tool()
+def index_scope(
+    project_path: str,
+    scope_path: str,
+    glob_pattern: str = "**/*",
+    tier: str = "full",
+) -> str:
+    """Index a specific directory scope within a project.
+
+    Useful for quickly indexing a subdirectory (e.g. after editing files
+    in a specific module) without re-indexing the entire project.
+
+    Args:
+        project_path: Absolute path to the project root directory.
+        scope_path: Relative directory path to index (e.g. "src/auth").
+        glob_pattern: Glob pattern for files within scope (default "**/*").
+        tier: Indexing tier - "full" (default) runs full pipeline with
+              embedding, "fts_only" indexes text only (faster, no vectors).
+
+    Returns:
+        Indexing summary for the scoped directory.
+    """
+    root = Path(project_path).resolve()
+    if not root.is_dir():
+        return f"Error: project path not found: {root}"
+
+    scope_dir = root / scope_path
+    if not scope_dir.is_dir():
+        return f"Error: scope directory not found: {scope_dir}"
+
+    valid_tiers = ("full", "fts_only")
+    if tier not in valid_tiers:
+        return f"Error: invalid tier '{tier}'. Must be one of: {', '.join(valid_tiers)}"
+
+    indexing, _, _ = _get_pipelines(project_path)
+
+    file_paths = [
+        p for p in scope_dir.glob(glob_pattern)
+        if p.is_file() and not should_exclude(p.relative_to(root), DEFAULT_EXCLUDES)
+    ]
+
+    if not file_paths:
+        return f"No files found in {scope_path} matching '{glob_pattern}'."
+
+    stats = indexing.sync(file_paths, root=root, tier=tier)
+    tier_label = "FTS-only" if tier == "fts_only" else "full"
+    return (
+        f"Indexed {stats.files_processed} files ({tier_label}), "
+        f"{stats.chunks_created} chunks in {stats.duration_seconds:.1f}s. "
+        f"Scope: {scope_path}"
+    )
+
+
 # ---------------------------------------------------------------------------
 # File discovery
 # ---------------------------------------------------------------------------
--- a/codex-lens-v2/src/codexlens_search/search/pipeline.py
+++ b/codex-lens-v2/src/codexlens_search/search/pipeline.py
@@ -7,7 +7,7 @@ from dataclasses import dataclass
 import numpy as np

 from ..config import Config
-from ..core import ANNIndex, BinaryStore
+from ..core.base import BaseANNIndex, BaseBinaryIndex
 from ..embed import BaseEmbedder
 from ..indexing.metadata import MetadataStore
 from ..rerank import BaseReranker
@@ -21,6 +21,8 @@ from .fusion import (

 _log = logging.getLogger(__name__)

+_VALID_QUALITIES = ("fast", "balanced", "thorough", "auto")
+

@dataclass
 class SearchResult:
@@ -37,8 +39,8 @@ class SearchPipeline:
    def __init__(
        self,
        embedder: BaseEmbedder,
-        binary_store: BinaryStore,
-        ann_index: ANNIndex,
+        binary_store: BaseBinaryIndex,
+        ann_index: BaseANNIndex,
        reranker: BaseReranker,
        fts: FTSEngine,
        config: Config,
@@ -52,6 +54,15 @@ class SearchPipeline:
        self._config = config
        self._metadata_store = metadata_store

+    # -- Helper: check if vector index has data ----------------------------
+
+    def _has_vector_index(self) -> bool:
+        """Check if the binary store has any indexed entries."""
+        try:
+            return len(self._binary_store) > 0
+        except Exception:
+            return False
+
    # -- Helper: vector search (binary coarse + ANN fine) -----------------

    def _vector_search(
@@ -84,6 +95,21 @@ class SearchPipeline:
            ]
        return vector_results

+    # -- Helper: binary coarse search only --------------------------------
+
+    def _binary_coarse_search(
+        self, query_vec: np.ndarray
+    ) -> list[tuple[int, float]]:
+        """Run binary coarse search only (no ANN fine search)."""
+        cfg = self._config
+        candidate_ids, distances = self._binary_store.coarse_search(
+            query_vec, top_k=cfg.binary_top_k
+        )
+        return [
+            (int(doc_id), float(dist))
+            for doc_id, dist in zip(candidate_ids, distances)
+        ]
+
    # -- Helper: FTS search (exact + fuzzy) ------------------------------

    def _fts_search(
@@ -95,55 +121,12 @@ class SearchPipeline:
        fuzzy_results = self._fts.fuzzy_search(query, top_k=cfg.fts_top_k)
        return exact_results, fuzzy_results

-    # -- Main search entry point -----------------------------------------
+    # -- Helper: filter deleted IDs ---------------------------------------

-    def search(self, query: str, top_k: int | None = None) -> list[SearchResult]:
-        cfg = self._config
-        final_top_k = top_k if top_k is not None else cfg.reranker_top_k
-
-        # 1. Detect intent -> adaptive weights
-        intent = detect_query_intent(query)
-        weights = get_adaptive_weights(intent, cfg.fusion_weights)
-
-        # 2. Embed query
-        query_vec = self._embedder.embed_single(query)
-
-        # 3. Parallel vector + FTS search
-        vector_results: list[tuple[int, float]] = []
-        exact_results: list[tuple[int, float]] = []
-        fuzzy_results: list[tuple[int, float]] = []
-
-        with ThreadPoolExecutor(max_workers=2) as pool:
-            vec_future = pool.submit(self._vector_search, query_vec)
-            fts_future = pool.submit(self._fts_search, query)
-
-            # Collect vector results
-            try:
-                vector_results = vec_future.result()
-            except Exception:
-                _log.warning("Vector search failed, using empty results", exc_info=True)
-
-            # Collect FTS results
-            try:
-                exact_results, fuzzy_results = fts_future.result()
-            except Exception:
-                _log.warning("FTS search failed, using empty results", exc_info=True)
-
-        # 4. RRF fusion
-        fusion_input: dict[str, list[tuple[int, float]]] = {}
-        if vector_results:
-            fusion_input["vector"] = vector_results
-        if exact_results:
-            fusion_input["exact"] = exact_results
-        if fuzzy_results:
-            fusion_input["fuzzy"] = fuzzy_results
-
-        if not fusion_input:
-            return []
-
-        fused = reciprocal_rank_fusion(fusion_input, weights=weights, k=cfg.fusion_k)
-
-        # 4b. Filter out deleted IDs (tombstone filtering)
+    def _filter_deleted(
+        self, fused: list[tuple[int, float]]
+    ) -> list[tuple[int, float]]:
+        """Remove tombstoned chunk IDs from results."""
        if self._metadata_store is not None:
            deleted_ids = self._metadata_store.get_deleted_ids()
            if deleted_ids:
@@ -152,16 +135,30 @@ class SearchPipeline:
                    for doc_id, score in fused
                    if doc_id not in deleted_ids
                ]
+        return fused

-        # 5. Rerank top candidates
-        rerank_ids = [doc_id for doc_id, _ in fused[:50]]
-        contents = [self._fts.get_content(doc_id) for doc_id in rerank_ids]
-        rerank_scores = self._reranker.score_pairs(query, contents)
+    # -- Helper: rerank and build results ---------------------------------

-        # 6. Sort by rerank score, build SearchResult list
-        ranked = sorted(
-            zip(rerank_ids, rerank_scores), key=lambda x: x[1], reverse=True
-        )
+    def _rerank_and_build(
+        self,
+        query: str,
+        fused: list[tuple[int, float]],
+        final_top_k: int,
+        use_reranker: bool = True,
+    ) -> list[SearchResult]:
+        """Rerank candidates (optionally) and build SearchResult list."""
+        if not fused:
+            return []
+
+        if use_reranker:
+            rerank_ids = [doc_id for doc_id, _ in fused[:50]]
+            contents = [self._fts.get_content(doc_id) for doc_id in rerank_ids]
+            rerank_scores = self._reranker.score_pairs(query, contents)
+            ranked = sorted(
+                zip(rerank_ids, rerank_scores), key=lambda x: x[1], reverse=True
+            )
+        else:
+            ranked = fused

        results: list[SearchResult] = []
        for doc_id, score in ranked[:final_top_k]:
@@ -179,3 +176,178 @@ class SearchPipeline:
                )
            )
        return results
+
+    # -- Helper: record access for tier tracking --------------------------
+
+    def _record_access(self, results: list[SearchResult]) -> None:
+        """Record file access for data tier tracking."""
+        if results and self._metadata_store is not None:
+            unique_paths = list({r.path for r in results})
+            try:
+                self._metadata_store.record_access_batch(unique_paths)
+            except Exception:
+                _log.debug("Failed to record access for tier tracking", exc_info=True)
+
+    # -- Quality-routed search methods ------------------------------------
+
+    def _search_fast(
+        self, query: str, final_top_k: int
+    ) -> list[SearchResult]:
+        """FTS-only search with reranking. No embedding needed."""
+        exact_results, fuzzy_results = self._fts_search(query)
+
+        fusion_input: dict[str, list[tuple[int, float]]] = {}
+        if exact_results:
+            fusion_input["exact"] = exact_results
+        if fuzzy_results:
+            fusion_input["fuzzy"] = fuzzy_results
+
+        if not fusion_input:
+            return []
+
+        fused = reciprocal_rank_fusion(
+            fusion_input, weights={"exact": 0.7, "fuzzy": 0.3},
+            k=self._config.fusion_k,
+        )
+        fused = self._filter_deleted(fused)
+        return self._rerank_and_build(query, fused, final_top_k, use_reranker=True)
+
+    def _search_balanced(
+        self, query: str, final_top_k: int
+    ) -> list[SearchResult]:
+        """FTS + binary coarse search with RRF fusion and reranking.
+
+        Embeds the query for binary coarse search but skips ANN fine search.
+        """
+        intent = detect_query_intent(query)
+        weights = get_adaptive_weights(intent, self._config.fusion_weights)
+
+        query_vec = self._embedder.embed_single(query)
+
+        # Parallel: binary coarse + FTS
+        coarse_results: list[tuple[int, float]] = []
+        exact_results: list[tuple[int, float]] = []
+        fuzzy_results: list[tuple[int, float]] = []
+
+        with ThreadPoolExecutor(max_workers=2) as pool:
+            coarse_future = pool.submit(self._binary_coarse_search, query_vec)
+            fts_future = pool.submit(self._fts_search, query)
+
+            try:
+                coarse_results = coarse_future.result()
+            except Exception:
+                _log.warning("Binary coarse search failed", exc_info=True)
+
+            try:
+                exact_results, fuzzy_results = fts_future.result()
+            except Exception:
+                _log.warning("FTS search failed", exc_info=True)
+
+        fusion_input: dict[str, list[tuple[int, float]]] = {}
+        if coarse_results:
+            fusion_input["vector"] = coarse_results
+        if exact_results:
+            fusion_input["exact"] = exact_results
+        if fuzzy_results:
+            fusion_input["fuzzy"] = fuzzy_results
+
+        if not fusion_input:
+            return []
+
+        fused = reciprocal_rank_fusion(fusion_input, weights=weights, k=self._config.fusion_k)
+        fused = self._filter_deleted(fused)
+        return self._rerank_and_build(query, fused, final_top_k, use_reranker=True)
+
+    def _search_thorough(
+        self, query: str, final_top_k: int
+    ) -> list[SearchResult]:
+        """Full 2-stage vector + FTS + reranking pipeline (original behavior)."""
+        cfg = self._config
+
+        intent = detect_query_intent(query)
+        weights = get_adaptive_weights(intent, cfg.fusion_weights)
+
+        query_vec = self._embedder.embed_single(query)
+
+        # Parallel vector + FTS search
+        vector_results: list[tuple[int, float]] = []
+        exact_results: list[tuple[int, float]] = []
+        fuzzy_results: list[tuple[int, float]] = []
+
+        with ThreadPoolExecutor(max_workers=2) as pool:
+            vec_future = pool.submit(self._vector_search, query_vec)
+            fts_future = pool.submit(self._fts_search, query)
+
+            try:
+                vector_results = vec_future.result()
+            except Exception:
+                _log.warning("Vector search failed, using empty results", exc_info=True)
+
+            try:
+                exact_results, fuzzy_results = fts_future.result()
+            except Exception:
+                _log.warning("FTS search failed, using empty results", exc_info=True)
+
+        fusion_input: dict[str, list[tuple[int, float]]] = {}
+        if vector_results:
+            fusion_input["vector"] = vector_results
+        if exact_results:
+            fusion_input["exact"] = exact_results
+        if fuzzy_results:
+            fusion_input["fuzzy"] = fuzzy_results
+
+        if not fusion_input:
+            return []
+
+        fused = reciprocal_rank_fusion(fusion_input, weights=weights, k=cfg.fusion_k)
+        fused = self._filter_deleted(fused)
+        return self._rerank_and_build(query, fused, final_top_k, use_reranker=True)
+
+    # -- Main search entry point -----------------------------------------
+
+    def search(
+        self,
+        query: str,
+        top_k: int | None = None,
+        quality: str | None = None,
+    ) -> list[SearchResult]:
+        """Search with quality-based routing.
+
+        Args:
+            query: Search query string.
+            top_k: Maximum results to return.
+            quality: Search quality tier:
+                - 'fast': FTS-only + rerank (no embedding, no vector search)
+                - 'balanced': FTS + binary coarse + rerank (no ANN fine search)
+                - 'thorough': Full 2-stage vector + FTS + reranking
+                - 'auto': Selects 'thorough' if vectors exist, else 'fast'
+                - None: Uses config.default_search_quality
+
+        Returns:
+            List of SearchResult ordered by relevance.
+        """
+        cfg = self._config
+        final_top_k = top_k if top_k is not None else cfg.reranker_top_k
+
+        # Resolve quality tier
+        effective_quality = quality or cfg.default_search_quality
+        if effective_quality not in _VALID_QUALITIES:
+            _log.warning(
+                "Invalid search quality '%s', falling back to 'auto'",
+                effective_quality,
+            )
+            effective_quality = "auto"
+
+        # Auto-detect: use thorough if vector index has data, else fast
+        if effective_quality == "auto":
+            effective_quality = "thorough" if self._has_vector_index() else "fast"
+
+        if effective_quality == "fast":
+            results = self._search_fast(query, final_top_k)
+        elif effective_quality == "balanced":
+            results = self._search_balanced(query, final_top_k)
+        else:
+            results = self._search_thorough(query, final_top_k)
+
+        self._record_access(results)
+        return results
--- a/codex-lens-v2/src/codexlens_search/watcher/file_watcher.py
+++ b/codex-lens-v2/src/codexlens_search/watcher/file_watcher.py
@@ -20,6 +20,7 @@ from watchdog.events import FileSystemEventHandler
 from watchdog.observers import Observer

 from .events import ChangeType, FileEvent, WatcherConfig
+from .incremental_indexer import IncrementalIndexer

 logger = logging.getLogger(__name__)

@@ -261,3 +262,24 @@ class FileWatcher:
        if output:
            sys.stdout.write(output + "\n")
            sys.stdout.flush()
+
+    @classmethod
+    def create_with_indexer(
+        cls,
+        root_path: Path,
+        config: WatcherConfig,
+        indexer: IncrementalIndexer,
+    ) -> "FileWatcher":
+        """Create a FileWatcher wired to an IncrementalIndexer's async path.
+
+        Uses ``indexer.process_events_async()`` as the callback so that
+        events are debounced and batched within the indexer before
+        processing, preventing redundant per-file pipeline startups.
+
+        Example::
+
+            indexer = IncrementalIndexer(pipeline, root=root)
+            watcher = FileWatcher.create_with_indexer(root, config, indexer)
+            watcher.start()
+        """
+        return cls(root_path, config, indexer.process_events_async)
--- a/codex-lens-v2/src/codexlens_search/watcher/incremental_indexer.py
+++ b/codex-lens-v2/src/codexlens_search/watcher/incremental_indexer.py
@@ -4,10 +4,13 @@ Ported from codex-lens v1 with simplifications:
 - Uses IndexingPipeline.index_file() / remove_file() directly
 - No v1-specific Config, ParserFactory, DirIndexStore dependencies
 - Per-file error isolation: one failure does not stop batch processing
+- Debounce batching: process_events_async() buffers events and flushes
+  after a configurable window to prevent redundant per-file pipeline startups
 """
 from __future__ import annotations

 import logging
+import threading
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import List, Optional
@@ -60,6 +63,7 @@ class IncrementalIndexer:
        pipeline: IndexingPipeline,
        *,
        root: Optional[Path] = None,
+        debounce_window_ms: int = 500,
    ) -> None:
        """Initialize the incremental indexer.

@@ -67,9 +71,15 @@ class IncrementalIndexer:
            pipeline: The indexing pipeline with metadata store configured.
            root: Optional project root for computing relative paths.
                  If None, absolute paths are used as identifiers.
+            debounce_window_ms: Milliseconds to buffer events before flushing
+                in process_events_async(). Default 500ms.
        """
        self._pipeline = pipeline
        self._root = root
+        self._debounce_window_ms = debounce_window_ms
+        self._event_buffer: List[FileEvent] = []
+        self._buffer_lock = threading.Lock()
+        self._flush_timer: Optional[threading.Timer] = None

    def process_events(self, events: List[FileEvent]) -> BatchResult:
        """Process a batch of file events with per-file error isolation.
@@ -107,6 +117,52 @@ class IncrementalIndexer:

        return result

+    def process_events_async(self, events: List[FileEvent]) -> None:
+        """Buffer events and flush after the debounce window expires.
+
+        Non-blocking: events are accumulated in an internal buffer.
+        When no new events arrive within *debounce_window_ms*, the buffer
+        is flushed and all accumulated events are processed as a single
+        batch via process_events().
+
+        Args:
+            events: List of file events to buffer.
+        """
+        with self._buffer_lock:
+            self._event_buffer.extend(events)
+
+            # Cancel previous timer and start a new one (true debounce)
+            if self._flush_timer is not None:
+                self._flush_timer.cancel()
+
+            self._flush_timer = threading.Timer(
+                self._debounce_window_ms / 1000.0,
+                self._flush_buffer,
+            )
+            self._flush_timer.daemon = True
+            self._flush_timer.start()
+
+    def _flush_buffer(self) -> None:
+        """Flush the event buffer and process all accumulated events."""
+        with self._buffer_lock:
+            if not self._event_buffer:
+                return
+            events = list(self._event_buffer)
+            self._event_buffer.clear()
+            self._flush_timer = None
+
+        # Deduplicate: keep the last event per path
+        seen: dict[Path, FileEvent] = {}
+        for event in events:
+            seen[event.path] = event
+        deduped = list(seen.values())
+
+        logger.debug(
+            "Flushing debounce buffer: %d events (%d after dedup)",
+            len(events), len(deduped),
+        )
+        self.process_events(deduped)
+
    def _handle_index(self, event: FileEvent, result: BatchResult) -> None:
        """Index a created or modified file."""
        stats = self._pipeline.index_file(