feat: enhance search, ranking, reranker and CLI tooling across ccw and codex-lens

Major improvements to smart-search, chain-search cascade, ranking pipeline, reranker factory, CLI history store, codex-lens integration, and uv-manager. Simplify command-generator skill by inlining phases. Add comprehensive tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 18:58:47 +08:00 · 2026-03-16 20:35:08 +08:00
parent 1cd96b90e8
commit 5a4b18d9b1
73 changed files with 14684 additions and 2442 deletions
--- a/codex-lens/README.md
+++ b/codex-lens/README.md
@@ -41,6 +41,56 @@ pip install codex-lens[semantic-directml]
 pip install codex-lens[full]
 ```

+### Local ONNX Reranker Bootstrap
+
+Use the pinned bootstrap flow when you want the local-only reranker backend in an
+existing CodexLens virtual environment without asking pip to resolve the whole
+project extras set at once.
+
+1. Start from the CodexLens repo root and create or activate the project venv.
+2. Review the pinned install manifest in `scripts/requirements-reranker-local.txt`.
+3. Render the deterministic setup plan:
+
+```bash
+python scripts/bootstrap_reranker_local.py --dry-run
+```
+
+The bootstrap script always targets the selected venv Python, installs the local
+ONNX reranker stack in a fixed order, and keeps the package set pinned to the
+validated Python 3.13-compatible combination:
+
+- `numpy==2.4.0`
+- `onnxruntime==1.23.2`
+- `huggingface-hub==0.36.2`
+- `transformers==4.53.3`
+- `optimum[onnxruntime]==2.1.0`
+
+When you are ready to apply it to the CodexLens venv, use:
+
+```bash
+python scripts/bootstrap_reranker_local.py --apply
+```
+
+To pre-download the default local reranker model (`Xenova/ms-marco-MiniLM-L-6-v2`)
+into the repo-local Hugging Face cache, use:
+
+```bash
+python scripts/bootstrap_reranker_local.py --apply --download-model
+```
+
+The dry-run plan also prints the equivalent explicit model download command. On
+Windows PowerShell with the default repo venv, it looks like:
+
+```bash
+.venv/Scripts/hf.exe download Xenova/ms-marco-MiniLM-L-6-v2 --local-dir .cache/huggingface/models/Xenova--ms-marco-MiniLM-L-6-v2
+```
+
+After installation, probe the backend from the same venv:
+
+```bash
+python scripts/bootstrap_reranker_local.py --apply --probe
+```
+
 ## Requirements

 - Python >= 3.10
--- a/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl
+++ b/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl
@@ -0,0 +1,16 @@
+{"query":"executeHybridMode dense_rerank semantic smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-semantic-routing","notes":"CCW semantic mode delegates to CodexLens dense_rerank."}
+{"query":"parse CodexLens JSON output strip ANSI smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-json-fallback","notes":"Covers JSON/plain-text fallback handling for CodexLens output."}
+{"query":"smart_search init embed search action schema","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-action-schema","notes":"Find the Zod schema that defines init/embed/search actions."}
+{"query":"auto init missing job dedupe smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-auto-init","notes":"Targets background init/embed warmup and dedupe state."}
+{"query":"smart_search exact mode fallback to CodexLens fts","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-exact-fallback","notes":"Tracks the exact-mode fallback path into CodexLens FTS."}
+{"query":"smart_search settings snapshot embedding backend reranker backend staged stage2 mode","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-config-snapshot","notes":"Reads local config snapshot for embedding/reranker/staged pipeline settings."}
+{"query":"embedding backend fastembed local litellm api config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-embedding-config","notes":"Local-only benchmark should resolve to fastembed defaults."}
+{"query":"reranker backend onnx api legacy configuration","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-reranker-config","notes":"Covers both config dataclass fields and env overrides."}
+{"query":"staged stage2 mode precomputed realtime static_global_graph","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-stage2-config","notes":"Benchmark matrix should exercise the three supported stage2 modes."}
+{"query":"enable staged rerank stage 4 config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-stage4-rerank","notes":"Stage 4 rerank flag needs to stay enabled for local benchmarks."}
+{"query":"cascade_search dense_rerank staged pipeline ChainSearchEngine","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-cascade","notes":"Baseline query for the central retrieval engine."}
+{"query":"realtime LSP expand stage2 search pipeline","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-realtime","notes":"Targets realtime stage2 expansion logic."}
+{"query":"static global graph stage2 expansion implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-static","notes":"Targets static_global_graph stage2 expansion logic."}
+{"query":"cross encoder rerank stage 4 implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-rerank","notes":"Relevant for dense_rerank and staged rerank latency comparisons."}
+{"query":"get_reranker factory onnx backend selection","relevant_paths":["codex-lens/src/codexlens/semantic/reranker/factory.py"],"intent":"reranker-factory","notes":"Keeps the benchmark aligned with local ONNX reranker selection."}
+{"query":"EMBEDDING_BACKEND and RERANKER_BACKEND environment variables","relevant_paths":["codex-lens/src/codexlens/env_config.py"],"intent":"env-overrides","notes":"Covers CCW/CodexLens local-only environment overrides."}
--- a/codex-lens/benchmarks/compare_accuracy_labeled.py
+++ b/codex-lens/benchmarks/compare_accuracy_labeled.py
@@ -239,6 +239,7 @@ def main() -> None:
    config.staged_clustering_strategy = str(args.staged_cluster_strategy or "path").strip().lower()
    # Stability: on some Windows setups, DirectML/ONNX can crash under load.
    config.embedding_use_gpu = False
+    config.reranker_use_gpu = False

    registry = RegistryStore()
    registry.initialize()
@@ -362,4 +363,3 @@ def main() -> None:

 if __name__ == "__main__":
    main()
-
--- a/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py
+++ b/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py
@@ -0,0 +1,980 @@
+#!/usr/bin/env python
+"""Benchmark local-only staged stage2 modes for CCW smart_search queries.
+
+This benchmark reuses the existing CodexLens benchmark style, but focuses on
+the real search intents that drive CCW `smart_search`. It evaluates:
+
+1. `dense_rerank` baseline
+2. `staged` + `precomputed`
+3. `staged` + `realtime`
+4. `staged` + `static_global_graph`
+
+Metrics:
+  - Hit@K
+  - MRR@K
+  - Recall@K
+  - latency (avg/p50/p95)
+
+The runner is intentionally local-only. By default it uses:
+  - embedding backend: `fastembed`
+  - reranker backend: `onnx`
+
+Examples:
+  python benchmarks/compare_ccw_smart_search_stage2.py --dry-run
+  python benchmarks/compare_ccw_smart_search_stage2.py --self-check
+  python benchmarks/compare_ccw_smart_search_stage2.py --source .. --k 10
+  python benchmarks/compare_ccw_smart_search_stage2.py --embedding-model code --reranker-model cross-encoder/ms-marco-MiniLM-L-6-v2
+"""
+
+from __future__ import annotations
+
+import argparse
+from copy import deepcopy
+import gc
+import json
+import os
+import re
+import statistics
+import sys
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from codexlens.config import Config
+from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
+from codexlens.search.ranking import (
+    QueryIntent,
+    detect_query_intent,
+    is_generated_artifact_path,
+    is_test_file,
+    query_prefers_lexical_search,
+    query_targets_generated_files,
+)
+from codexlens.storage.path_mapper import PathMapper
+from codexlens.storage.registry import RegistryStore
+
+
+DEFAULT_SOURCE = Path(__file__).resolve().parents[2]
+DEFAULT_QUERIES_FILE = Path(__file__).parent / "accuracy_queries_ccw_smart_search.jsonl"
+DEFAULT_OUTPUT = Path(__file__).parent / "results" / "ccw_smart_search_stage2.json"
+
+VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph")
+VALID_LOCAL_EMBEDDING_BACKENDS = ("fastembed",)
+VALID_LOCAL_RERANKER_BACKENDS = ("onnx", "fastembed", "legacy")
+VALID_BASELINE_METHODS = ("auto", "fts", "hybrid")
+DEFAULT_LOCAL_ONNX_RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"
+
+
+def _now_ms() -> float:
+    return time.perf_counter() * 1000.0
+
+
+def _normalize_path_key(path: str) -> str:
+    try:
+        candidate = Path(path)
+        if str(candidate) and (candidate.is_absolute() or re.match(r"^[A-Za-z]:", str(candidate))):
+            normalized = str(candidate.resolve())
+        else:
+            normalized = str(candidate)
+    except Exception:
+        normalized = path
+    normalized = normalized.replace("/", "\\")
+    if os.name == "nt":
+        normalized = normalized.lower()
+    return normalized
+
+
+def _dedup_topk(paths: Iterable[str], k: int) -> List[str]:
+    output: List[str] = []
+    seen: set[str] = set()
+    for path in paths:
+        if path in seen:
+            continue
+        seen.add(path)
+        output.append(path)
+        if len(output) >= k:
+            break
+    return output
+
+
+def _first_hit_rank(topk_paths: Sequence[str], relevant: set[str]) -> Optional[int]:
+    for index, path in enumerate(topk_paths, start=1):
+        if path in relevant:
+            return index
+    return None
+
+
+def _mrr(ranks: Sequence[Optional[int]]) -> float:
+    values = [1.0 / rank for rank in ranks if rank and rank > 0]
+    return statistics.mean(values) if values else 0.0
+
+
+def _mean(values: Sequence[float]) -> float:
+    return statistics.mean(values) if values else 0.0
+
+
+def _percentile(values: Sequence[float], percentile: float) -> float:
+    if not values:
+        return 0.0
+    ordered = sorted(values)
+    if len(ordered) == 1:
+        return ordered[0]
+    index = (len(ordered) - 1) * percentile
+    lower = int(index)
+    upper = min(lower + 1, len(ordered) - 1)
+    if lower == upper:
+        return ordered[lower]
+    fraction = index - lower
+    return ordered[lower] + (ordered[upper] - ordered[lower]) * fraction
+
+
+def _load_labeled_queries(path: Path, limit: Optional[int]) -> List[Dict[str, Any]]:
+    if not path.is_file():
+        raise SystemExit(f"Queries file does not exist: {path}")
+
+    output: List[Dict[str, Any]] = []
+    for raw_line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
+        line = raw_line.strip()
+        if not line or line.startswith("#"):
+            continue
+        try:
+            item = json.loads(line)
+        except Exception as exc:
+            raise SystemExit(f"Invalid JSONL line in {path}: {raw_line!r} ({exc})") from exc
+        if not isinstance(item, dict) or "query" not in item or "relevant_paths" not in item:
+            raise SystemExit(f"Invalid query item (expected object with query/relevant_paths): {item!r}")
+        relevant_paths = item.get("relevant_paths")
+        if not isinstance(relevant_paths, list) or not relevant_paths:
+            raise SystemExit(f"Query item must include non-empty relevant_paths[]: {item!r}")
+        output.append(item)
+        if limit is not None and len(output) >= limit:
+            break
+    return output
+
+
+def _resolve_expected_paths(source_root: Path, paths: Sequence[str]) -> Tuple[List[str], set[str], List[str]]:
+    resolved_display: List[str] = []
+    resolved_keys: set[str] = set()
+    missing: List[str] = []
+
+    for raw_path in paths:
+        candidate = Path(raw_path)
+        if not candidate.is_absolute():
+            candidate = (source_root / candidate).resolve()
+        if not candidate.exists():
+            missing.append(str(candidate))
+        resolved_display.append(str(candidate))
+        resolved_keys.add(_normalize_path_key(str(candidate)))
+    return resolved_display, resolved_keys, missing
+
+
+def _validate_local_only_backends(embedding_backend: str, reranker_backend: str) -> None:
+    if embedding_backend not in VALID_LOCAL_EMBEDDING_BACKENDS:
+        raise SystemExit(
+            "This runner is local-only. "
+            f"--embedding-backend must be one of {', '.join(VALID_LOCAL_EMBEDDING_BACKENDS)}; got {embedding_backend!r}"
+        )
+    if reranker_backend not in VALID_LOCAL_RERANKER_BACKENDS:
+        raise SystemExit(
+            "This runner is local-only. "
+            f"--reranker-backend must be one of {', '.join(VALID_LOCAL_RERANKER_BACKENDS)}; got {reranker_backend!r}"
+        )
+
+
+def _validate_stage2_modes(stage2_modes: Sequence[str]) -> List[str]:
+    normalized = [str(mode).strip().lower() for mode in stage2_modes if str(mode).strip()]
+    if not normalized:
+        raise SystemExit("At least one --stage2-modes entry is required")
+    invalid = [mode for mode in normalized if mode not in VALID_STAGE2_MODES]
+    if invalid:
+        raise SystemExit(
+            f"Invalid --stage2-modes entry: {invalid[0]} "
+            f"(valid: {', '.join(VALID_STAGE2_MODES)})"
+        )
+    deduped: List[str] = []
+    seen: set[str] = set()
+    for mode in normalized:
+        if mode in seen:
+            continue
+        seen.add(mode)
+        deduped.append(mode)
+    return deduped
+
+
+def _validate_baseline_methods(methods: Sequence[str]) -> List[str]:
+    normalized = [str(method).strip().lower() for method in methods if str(method).strip()]
+    invalid = [method for method in normalized if method not in VALID_BASELINE_METHODS]
+    if invalid:
+        raise SystemExit(
+            f"Invalid --baseline-methods entry: {invalid[0]} "
+            f"(valid: {', '.join(VALID_BASELINE_METHODS)})"
+        )
+    deduped: List[str] = []
+    seen: set[str] = set()
+    for method in normalized:
+        if method in seen:
+            continue
+        seen.add(method)
+        deduped.append(method)
+    return deduped
+
+
+@dataclass
+class StrategyRun:
+    strategy_key: str
+    strategy: str
+    stage2_mode: Optional[str]
+    effective_method: str
+    execution_method: str
+    latency_ms: float
+    topk_paths: List[str]
+    first_hit_rank: Optional[int]
+    hit_at_k: bool
+    recall_at_k: float
+    generated_artifact_count: int
+    test_file_count: int
+    error: Optional[str] = None
+
+
+@dataclass
+class QueryEvaluation:
+    query: str
+    intent: Optional[str]
+    notes: Optional[str]
+    relevant_paths: List[str]
+    runs: Dict[str, StrategyRun]
+
+
+@dataclass
+class PairwiseDelta:
+    mode_a: str
+    mode_b: str
+    hit_at_k_delta: float
+    mrr_at_k_delta: float
+    avg_recall_at_k_delta: float
+    avg_latency_ms_delta: float
+
+
+@dataclass
+class StrategySpec:
+    strategy_key: str
+    strategy: str
+    stage2_mode: Optional[str]
+
+
+@dataclass
+class StrategyRuntime:
+    strategy_spec: StrategySpec
+    config: Config
+    registry: RegistryStore
+    engine: ChainSearchEngine
+
+
+def _strategy_specs(
+    stage2_modes: Sequence[str],
+    include_dense_baseline: bool,
+    *,
+    baseline_methods: Sequence[str],
+) -> List[StrategySpec]:
+    specs: List[StrategySpec] = []
+    for method in baseline_methods:
+        specs.append(StrategySpec(strategy_key=method, strategy=method, stage2_mode=None))
+    if include_dense_baseline:
+        specs.append(StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None))
+    for stage2_mode in stage2_modes:
+        specs.append(
+            StrategySpec(
+                strategy_key=f"staged:{stage2_mode}",
+                strategy="staged",
+                stage2_mode=stage2_mode,
+            )
+        )
+    return specs
+
+
+def _build_strategy_runtime(base_config: Config, strategy_spec: StrategySpec) -> StrategyRuntime:
+    runtime_config = deepcopy(base_config)
+    registry = RegistryStore()
+    registry.initialize()
+    mapper = PathMapper()
+    engine = ChainSearchEngine(registry=registry, mapper=mapper, config=runtime_config)
+    return StrategyRuntime(
+        strategy_spec=strategy_spec,
+        config=runtime_config,
+        registry=registry,
+        engine=engine,
+    )
+
+
+def _select_effective_method(query: str, requested_method: str) -> str:
+    requested = str(requested_method).strip().lower()
+    if requested != "auto":
+        return requested
+    if query_targets_generated_files(query) or query_prefers_lexical_search(query):
+        return "fts"
+    intent = detect_query_intent(query)
+    if intent == QueryIntent.KEYWORD:
+        return "fts"
+    if intent == QueryIntent.SEMANTIC:
+        return "dense_rerank"
+    return "hybrid"
+
+
+def _filter_dataset_by_query_match(
+    dataset: Sequence[Dict[str, Any]],
+    query_match: Optional[str],
+) -> List[Dict[str, Any]]:
+    """Filter labeled queries by case-insensitive substring match."""
+    needle = str(query_match or "").strip().casefold()
+    if not needle:
+        return list(dataset)
+    return [
+        dict(item)
+        for item in dataset
+        if needle in str(item.get("query", "")).casefold()
+    ]
+
+
+def _apply_query_limit(
+    dataset: Sequence[Dict[str, Any]],
+    query_limit: Optional[int],
+) -> List[Dict[str, Any]]:
+    """Apply the optional query limit after any dataset-level filtering."""
+    if query_limit is None:
+        return list(dataset)
+    return [dict(item) for item in list(dataset)[: max(0, int(query_limit))]]
+
+
+def _write_json_payload(path: Path, payload: Dict[str, Any]) -> None:
+    """Persist a benchmark payload as UTF-8 JSON."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def _write_final_outputs(
+    *,
+    output_path: Path,
+    progress_output: Optional[Path],
+    payload: Dict[str, Any],
+) -> None:
+    """Persist the final completed payload to both result and progress outputs."""
+    _write_json_payload(output_path, payload)
+    if progress_output is not None:
+        _write_json_payload(progress_output, payload)
+
+
+def _make_progress_payload(
+    *,
+    args: argparse.Namespace,
+    source_root: Path,
+    strategy_specs: Sequence[StrategySpec],
+    evaluations: Sequence[QueryEvaluation],
+    query_index: int,
+    total_queries: int,
+    run_index: int,
+    total_runs: int,
+    current_query: str,
+    current_strategy_key: str,
+) -> Dict[str, Any]:
+    """Create a partial progress snapshot for long benchmark runs."""
+    return {
+        "status": "running",
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "source": str(source_root),
+        "queries_file": str(args.queries_file),
+        "query_count": len(evaluations),
+        "planned_query_count": total_queries,
+        "k": int(args.k),
+        "coarse_k": int(args.coarse_k),
+        "strategy_keys": [spec.strategy_key for spec in strategy_specs],
+        "progress": {
+            "completed_queries": query_index,
+            "total_queries": total_queries,
+            "completed_runs": run_index,
+            "total_runs": total_runs,
+            "current_query": current_query,
+            "current_strategy_key": current_strategy_key,
+        },
+        "evaluations": [
+            {
+                "query": evaluation.query,
+                "intent": evaluation.intent,
+                "notes": evaluation.notes,
+                "relevant_paths": evaluation.relevant_paths,
+                "runs": {key: asdict(run) for key, run in evaluation.runs.items()},
+            }
+            for evaluation in evaluations
+        ],
+    }
+
+
+def _make_search_options(method: str, *, k: int) -> SearchOptions:
+    normalized = str(method).strip().lower()
+    if normalized == "fts":
+        return SearchOptions(
+            total_limit=k,
+            hybrid_mode=False,
+            enable_fuzzy=False,
+            enable_vector=False,
+            pure_vector=False,
+            enable_cascade=False,
+        )
+    if normalized == "hybrid":
+        return SearchOptions(
+            total_limit=k,
+            hybrid_mode=True,
+            enable_fuzzy=False,
+            enable_vector=True,
+            pure_vector=False,
+            enable_cascade=False,
+        )
+    if normalized in {"dense_rerank", "staged"}:
+        return SearchOptions(
+            total_limit=k,
+            hybrid_mode=True,
+            enable_fuzzy=False,
+            enable_vector=True,
+            pure_vector=False,
+            enable_cascade=True,
+        )
+    raise ValueError(f"Unsupported benchmark method: {method}")
+
+
+def _run_strategy(
+    engine: ChainSearchEngine,
+    config: Config,
+    *,
+    strategy_spec: StrategySpec,
+    query: str,
+    source_path: Path,
+    k: int,
+    coarse_k: int,
+    relevant: set[str],
+) -> StrategyRun:
+    gc.collect()
+    effective_method = _select_effective_method(query, strategy_spec.strategy)
+    execution_method = "cascade" if effective_method in {"dense_rerank", "staged"} else effective_method
+    previous_cascade_strategy = getattr(config, "cascade_strategy", None)
+    previous_stage2_mode = getattr(config, "staged_stage2_mode", None)
+
+    start_ms = _now_ms()
+    try:
+        options = _make_search_options(
+            "staged" if strategy_spec.strategy == "staged" else effective_method,
+            k=k,
+        )
+        if strategy_spec.strategy == "staged":
+            config.cascade_strategy = "staged"
+            if strategy_spec.stage2_mode:
+                config.staged_stage2_mode = strategy_spec.stage2_mode
+            result = engine.cascade_search(
+                query=query,
+                source_path=source_path,
+                k=k,
+                coarse_k=coarse_k,
+                options=options,
+                strategy="staged",
+            )
+        elif effective_method == "dense_rerank":
+            config.cascade_strategy = "dense_rerank"
+            result = engine.cascade_search(
+                query=query,
+                source_path=source_path,
+                k=k,
+                coarse_k=coarse_k,
+                options=options,
+                strategy="dense_rerank",
+            )
+        else:
+            result = engine.search(
+                query=query,
+                source_path=source_path,
+                options=options,
+            )
+        latency_ms = _now_ms() - start_ms
+        paths_raw = [item.path for item in (result.results or []) if getattr(item, "path", None)]
+        topk = _dedup_topk((_normalize_path_key(path) for path in paths_raw), k=k)
+        rank = _first_hit_rank(topk, relevant)
+        recall = 0.0
+        if relevant:
+            recall = len(set(topk) & relevant) / float(len(relevant))
+        return StrategyRun(
+            strategy_key=strategy_spec.strategy_key,
+            strategy=strategy_spec.strategy,
+            stage2_mode=strategy_spec.stage2_mode,
+            effective_method=effective_method,
+            execution_method=execution_method,
+            latency_ms=latency_ms,
+            topk_paths=topk,
+            first_hit_rank=rank,
+            hit_at_k=rank is not None,
+            recall_at_k=recall,
+            generated_artifact_count=sum(1 for path in topk if is_generated_artifact_path(path)),
+            test_file_count=sum(1 for path in topk if is_test_file(path)),
+            error=None,
+        )
+    except Exception as exc:
+        latency_ms = _now_ms() - start_ms
+        return StrategyRun(
+            strategy_key=strategy_spec.strategy_key,
+            strategy=strategy_spec.strategy,
+            stage2_mode=strategy_spec.stage2_mode,
+            effective_method=effective_method,
+            execution_method=execution_method,
+            latency_ms=latency_ms,
+            topk_paths=[],
+            first_hit_rank=None,
+            hit_at_k=False,
+            recall_at_k=0.0,
+            generated_artifact_count=0,
+            test_file_count=0,
+            error=f"{type(exc).__name__}: {exc}",
+        )
+    finally:
+        config.cascade_strategy = previous_cascade_strategy
+        config.staged_stage2_mode = previous_stage2_mode
+
+
+def _summarize_runs(runs: Sequence[StrategyRun]) -> Dict[str, Any]:
+    latencies = [run.latency_ms for run in runs if not run.error]
+    ranks = [run.first_hit_rank for run in runs]
+    effective_method_counts: Dict[str, int] = {}
+    for run in runs:
+        effective_method_counts[run.effective_method] = effective_method_counts.get(run.effective_method, 0) + 1
+    return {
+        "query_count": len(runs),
+        "hit_at_k": _mean([1.0 if run.hit_at_k else 0.0 for run in runs]),
+        "mrr_at_k": _mrr(ranks),
+        "avg_recall_at_k": _mean([run.recall_at_k for run in runs]),
+        "avg_latency_ms": _mean(latencies),
+        "p50_latency_ms": _percentile(latencies, 0.50),
+        "p95_latency_ms": _percentile(latencies, 0.95),
+        "avg_generated_artifact_count": _mean([float(run.generated_artifact_count) for run in runs]),
+        "avg_test_file_count": _mean([float(run.test_file_count) for run in runs]),
+        "runs_with_generated_artifacts": sum(1 for run in runs if run.generated_artifact_count > 0),
+        "runs_with_test_files": sum(1 for run in runs if run.test_file_count > 0),
+        "effective_methods": effective_method_counts,
+        "errors": sum(1 for run in runs if run.error),
+    }
+
+
+def _build_pairwise_deltas(stage2_summaries: Dict[str, Dict[str, Any]]) -> List[PairwiseDelta]:
+    modes = list(stage2_summaries.keys())
+    deltas: List[PairwiseDelta] = []
+    for left_index in range(len(modes)):
+        for right_index in range(left_index + 1, len(modes)):
+            left = modes[left_index]
+            right = modes[right_index]
+            left_summary = stage2_summaries[left]
+            right_summary = stage2_summaries[right]
+            deltas.append(
+                PairwiseDelta(
+                    mode_a=left,
+                    mode_b=right,
+                    hit_at_k_delta=left_summary["hit_at_k"] - right_summary["hit_at_k"],
+                    mrr_at_k_delta=left_summary["mrr_at_k"] - right_summary["mrr_at_k"],
+                    avg_recall_at_k_delta=left_summary["avg_recall_at_k"] - right_summary["avg_recall_at_k"],
+                    avg_latency_ms_delta=left_summary["avg_latency_ms"] - right_summary["avg_latency_ms"],
+                )
+            )
+    return deltas
+
+
+def _make_plan_payload(
+    *,
+    args: argparse.Namespace,
+    source_root: Path,
+    dataset: Sequence[Dict[str, Any]],
+    baseline_methods: Sequence[str],
+    stage2_modes: Sequence[str],
+    strategy_specs: Sequence[StrategySpec],
+) -> Dict[str, Any]:
+    return {
+        "mode": "dry-run" if args.dry_run else "self-check",
+        "local_only": True,
+        "source": str(source_root),
+        "queries_file": str(args.queries_file),
+        "query_count": len(dataset),
+        "query_match": args.query_match,
+        "k": int(args.k),
+        "coarse_k": int(args.coarse_k),
+        "baseline_methods": list(baseline_methods),
+        "stage2_modes": list(stage2_modes),
+        "strategy_keys": [spec.strategy_key for spec in strategy_specs],
+        "local_backends": {
+            "embedding_backend": args.embedding_backend,
+            "embedding_model": args.embedding_model,
+            "reranker_backend": args.reranker_backend,
+            "reranker_model": args.reranker_model,
+            "embedding_use_gpu": bool(args.embedding_use_gpu),
+            "reranker_use_gpu": bool(args.reranker_use_gpu),
+        },
+        "output": str(args.output),
+        "progress_output": str(args.progress_output) if args.progress_output else None,
+        "dataset_preview": [
+            {
+                "query": item.get("query"),
+                "intent": item.get("intent"),
+                "relevant_paths": item.get("relevant_paths"),
+            }
+            for item in list(dataset)[: min(3, len(dataset))]
+        ],
+    }
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=DEFAULT_SOURCE,
+        help="Source root to benchmark. Defaults to the repository root so CCW and CodexLens paths resolve together.",
+    )
+    parser.add_argument(
+        "--queries-file",
+        type=Path,
+        default=DEFAULT_QUERIES_FILE,
+        help="Labeled JSONL dataset of CCW smart_search queries",
+    )
+    parser.add_argument("--query-limit", type=int, default=None, help="Optional query limit")
+    parser.add_argument(
+        "--query-match",
+        type=str,
+        default=None,
+        help="Optional case-insensitive substring filter for selecting specific benchmark queries.",
+    )
+    parser.add_argument("--k", type=int, default=10, help="Top-k to evaluate")
+    parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k")
+    parser.add_argument(
+        "--baseline-methods",
+        nargs="*",
+        default=list(VALID_BASELINE_METHODS),
+        help="Requested smart_search baselines to compare before staged modes (valid: auto, fts, hybrid).",
+    )
+    parser.add_argument(
+        "--stage2-modes",
+        nargs="*",
+        default=list(VALID_STAGE2_MODES),
+        help="Stage-2 modes to compare",
+    )
+    parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per strategy")
+    parser.add_argument(
+        "--embedding-backend",
+        default="fastembed",
+        help="Local embedding backend. This runner only accepts fastembed.",
+    )
+    parser.add_argument(
+        "--embedding-model",
+        default="code",
+        help="Embedding model/profile for the local embedding backend",
+    )
+    parser.add_argument(
+        "--embedding-use-gpu",
+        action="store_true",
+        help="Enable GPU acceleration for local embeddings. Off by default for stability.",
+    )
+    parser.add_argument(
+        "--reranker-backend",
+        default="onnx",
+        help="Local reranker backend. Supported local values: onnx, fastembed, legacy.",
+    )
+    parser.add_argument(
+        "--reranker-model",
+        default=DEFAULT_LOCAL_ONNX_RERANKER_MODEL,
+        help="Reranker model name for the local reranker backend",
+    )
+    parser.add_argument(
+        "--reranker-use-gpu",
+        action="store_true",
+        help="Enable GPU acceleration for the local reranker. Off by default for stability.",
+    )
+    parser.add_argument(
+        "--skip-dense-baseline",
+        action="store_true",
+        help="Only compare staged stage2 modes and skip the dense_rerank baseline.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate dataset/config and print the benchmark plan without running retrieval.",
+    )
+    parser.add_argument(
+        "--self-check",
+        action="store_true",
+        help="Smoke-check the entrypoint by validating dataset, source paths, and stage matrix wiring.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=DEFAULT_OUTPUT,
+        help="Output JSON path",
+    )
+    parser.add_argument(
+        "--progress-output",
+        type=Path,
+        default=None,
+        help="Optional JSON path updated after each query with partial progress and completed runs.",
+    )
+    return parser
+
+
+def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    source_root = args.source.expanduser().resolve()
+    if not source_root.exists():
+        raise SystemExit(f"Source path does not exist: {source_root}")
+    if int(args.k) <= 0:
+        raise SystemExit("--k must be > 0")
+    if int(args.coarse_k) <= 0:
+        raise SystemExit("--coarse-k must be > 0")
+    if int(args.coarse_k) < int(args.k):
+        raise SystemExit("--coarse-k must be >= --k")
+    if int(args.warmup) < 0:
+        raise SystemExit("--warmup must be >= 0")
+
+    embedding_backend = str(args.embedding_backend).strip().lower()
+    reranker_backend = str(args.reranker_backend).strip().lower()
+    _validate_local_only_backends(embedding_backend, reranker_backend)
+    baseline_methods = _validate_baseline_methods(args.baseline_methods)
+    stage2_modes = _validate_stage2_modes(args.stage2_modes)
+
+    dataset = _load_labeled_queries(args.queries_file, None)
+    dataset = _filter_dataset_by_query_match(dataset, args.query_match)
+    dataset = _apply_query_limit(dataset, args.query_limit)
+    if not dataset:
+        raise SystemExit("No queries to run")
+
+    missing_paths: List[str] = []
+    for item in dataset:
+        _, _, item_missing = _resolve_expected_paths(source_root, [str(path) for path in item["relevant_paths"]])
+        missing_paths.extend(item_missing)
+    if missing_paths:
+        preview = ", ".join(missing_paths[:3])
+        raise SystemExit(
+            "Dataset relevant_paths do not resolve under the selected source root. "
+            f"Examples: {preview}"
+        )
+
+    strategy_specs = _strategy_specs(
+        stage2_modes,
+        include_dense_baseline=not args.skip_dense_baseline,
+        baseline_methods=baseline_methods,
+    )
+
+    if args.dry_run or args.self_check:
+        payload = _make_plan_payload(
+            args=args,
+            source_root=source_root,
+            dataset=dataset,
+            baseline_methods=baseline_methods,
+            stage2_modes=stage2_modes,
+            strategy_specs=strategy_specs,
+        )
+        if args.self_check:
+            payload["status"] = "ok"
+            payload["checks"] = {
+                "dataset_loaded": True,
+                "stage2_matrix_size": len(stage2_modes),
+                "local_only_validation": True,
+                "source_path_exists": True,
+            }
+        print(json.dumps(payload, ensure_ascii=False, indent=2))
+        return
+
+    config = Config.load()
+    config.cascade_strategy = "staged"
+    config.enable_staged_rerank = True
+    config.enable_cross_encoder_rerank = True
+    config.embedding_backend = embedding_backend
+    config.embedding_model = str(args.embedding_model).strip()
+    config.embedding_use_gpu = bool(args.embedding_use_gpu)
+    config.embedding_auto_embed_missing = False
+    config.reranker_backend = reranker_backend
+    config.reranker_model = str(args.reranker_model).strip()
+    config.reranker_use_gpu = bool(args.reranker_use_gpu)
+
+    strategy_runtimes = {
+        spec.strategy_key: _build_strategy_runtime(config, spec)
+        for spec in strategy_specs
+    }
+
+    evaluations: List[QueryEvaluation] = []
+    total_queries = len(dataset)
+    total_runs = total_queries * len(strategy_specs)
+    completed_runs = 0
+
+    try:
+        if int(args.warmup) > 0:
+            warm_query = str(dataset[0]["query"]).strip()
+            warm_relevant_paths = [str(path) for path in dataset[0]["relevant_paths"]]
+            _, warm_relevant, _ = _resolve_expected_paths(source_root, warm_relevant_paths)
+            for spec in strategy_specs:
+                runtime = strategy_runtimes[spec.strategy_key]
+                for _ in range(int(args.warmup)):
+                    _run_strategy(
+                        runtime.engine,
+                        runtime.config,
+                        strategy_spec=spec,
+                        query=warm_query,
+                        source_path=source_root,
+                        k=min(int(args.k), 5),
+                        coarse_k=min(int(args.coarse_k), 50),
+                        relevant=warm_relevant,
+                    )
+
+        for index, item in enumerate(dataset, start=1):
+            query = str(item.get("query", "")).strip()
+            if not query:
+                continue
+            print(f"[query {index}/{total_queries}] {query}", flush=True)
+            relevant_paths, relevant, _ = _resolve_expected_paths(
+                source_root,
+                [str(path) for path in item["relevant_paths"]],
+            )
+            runs: Dict[str, StrategyRun] = {}
+            for spec in strategy_specs:
+                if args.progress_output is not None:
+                    _write_json_payload(
+                        args.progress_output,
+                        _make_progress_payload(
+                            args=args,
+                            source_root=source_root,
+                            strategy_specs=strategy_specs,
+                            evaluations=evaluations,
+                            query_index=index - 1,
+                            total_queries=total_queries,
+                            run_index=completed_runs,
+                            total_runs=total_runs,
+                            current_query=query,
+                            current_strategy_key=spec.strategy_key,
+                        ),
+                    )
+                print(
+                    f"[run {completed_runs + 1}/{total_runs}] "
+                    f"strategy={spec.strategy_key} query={query}",
+                    flush=True,
+                )
+                runtime = strategy_runtimes[spec.strategy_key]
+                runs[spec.strategy_key] = _run_strategy(
+                    runtime.engine,
+                    runtime.config,
+                    strategy_spec=spec,
+                    query=query,
+                    source_path=source_root,
+                    k=int(args.k),
+                    coarse_k=int(args.coarse_k),
+                    relevant=relevant,
+                )
+                completed_runs += 1
+                run = runs[spec.strategy_key]
+                outcome = "error" if run.error else "ok"
+                print(
+                    f"[done {completed_runs}/{total_runs}] "
+                    f"strategy={spec.strategy_key} outcome={outcome} "
+                    f"latency_ms={run.latency_ms:.2f} "
+                    f"first_hit_rank={run.first_hit_rank}",
+                    flush=True,
+                )
+            evaluations.append(
+                QueryEvaluation(
+                    query=query,
+                    intent=str(item.get("intent")) if item.get("intent") is not None else None,
+                    notes=str(item.get("notes")) if item.get("notes") is not None else None,
+                    relevant_paths=relevant_paths,
+                    runs=runs,
+                )
+            )
+            if args.progress_output is not None:
+                _write_json_payload(
+                    args.progress_output,
+                    _make_progress_payload(
+                        args=args,
+                        source_root=source_root,
+                        strategy_specs=strategy_specs,
+                        evaluations=evaluations,
+                        query_index=index,
+                        total_queries=total_queries,
+                        run_index=completed_runs,
+                        total_runs=total_runs,
+                        current_query=query,
+                        current_strategy_key="complete",
+                    ),
+                )
+    finally:
+        for runtime in strategy_runtimes.values():
+            try:
+                runtime.engine.close()
+            except Exception:
+                pass
+        for runtime in strategy_runtimes.values():
+            try:
+                runtime.registry.close()
+            except Exception:
+                pass
+
+    strategy_summaries: Dict[str, Dict[str, Any]] = {}
+    for spec in strategy_specs:
+        spec_runs = [evaluation.runs[spec.strategy_key] for evaluation in evaluations if spec.strategy_key in evaluation.runs]
+        summary = _summarize_runs(spec_runs)
+        summary["strategy"] = spec.strategy
+        summary["stage2_mode"] = spec.stage2_mode
+        strategy_summaries[spec.strategy_key] = summary
+
+    stage2_mode_matrix = {
+        mode: strategy_summaries[f"staged:{mode}"]
+        for mode in stage2_modes
+        if f"staged:{mode}" in strategy_summaries
+    }
+    pairwise_deltas = [asdict(item) for item in _build_pairwise_deltas(stage2_mode_matrix)]
+
+    payload = {
+        "status": "completed",
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "source": str(source_root),
+        "queries_file": str(args.queries_file),
+        "query_count": len(evaluations),
+        "query_match": args.query_match,
+        "k": int(args.k),
+        "coarse_k": int(args.coarse_k),
+        "local_only": True,
+        "strategies": strategy_summaries,
+        "stage2_mode_matrix": stage2_mode_matrix,
+        "pairwise_stage2_deltas": pairwise_deltas,
+        "config": {
+            "embedding_backend": config.embedding_backend,
+            "embedding_model": config.embedding_model,
+            "embedding_use_gpu": bool(config.embedding_use_gpu),
+            "reranker_backend": config.reranker_backend,
+            "reranker_model": config.reranker_model,
+            "reranker_use_gpu": bool(config.reranker_use_gpu),
+            "enable_staged_rerank": bool(config.enable_staged_rerank),
+            "enable_cross_encoder_rerank": bool(config.enable_cross_encoder_rerank),
+        },
+        "progress_output": str(args.progress_output) if args.progress_output else None,
+        "evaluations": [
+            {
+                "query": evaluation.query,
+                "intent": evaluation.intent,
+                "notes": evaluation.notes,
+                "relevant_paths": evaluation.relevant_paths,
+                "runs": {key: asdict(run) for key, run in evaluation.runs.items()},
+            }
+            for evaluation in evaluations
+        ],
+    }
+
+    _write_final_outputs(
+        output_path=args.output,
+        progress_output=args.progress_output,
+        payload=payload,
+    )
+    print(json.dumps(payload, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()
--- a/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py
+++ b/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py
@@ -280,8 +280,9 @@ def main() -> None:
    if args.staged_cluster_strategy:
        config.staged_clustering_strategy = str(args.staged_cluster_strategy)
    # Stability: on some Windows setups, fastembed + DirectML can crash under load.
-    # Dense_rerank uses the embedding backend that matches the index; force CPU here.
+    # Force local embeddings and reranking onto CPU for reproducible benchmark runs.
    config.embedding_use_gpu = False
+    config.reranker_use_gpu = False
    registry = RegistryStore()
    registry.initialize()
    mapper = PathMapper()
--- a/codex-lens/benchmarks/results/ccw_smart_search_stage2.json
+++ b/codex-lens/benchmarks/results/ccw_smart_search_stage2.json
--- a/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json
+++ b/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json
@@ -0,0 +1,526 @@
+{
+  "timestamp": "2026-03-14 23:16:55",
+  "source": "D:\\Claude_dms3",
+  "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
+  "query_count": 4,
+  "k": 10,
+  "coarse_k": 100,
+  "local_only": true,
+  "strategies": {
+    "dense_rerank": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 20171.940174996853,
+      "p50_latency_ms": 14222.247749984264,
+      "p95_latency_ms": 35222.31535999476,
+      "errors": 0,
+      "strategy": "dense_rerank",
+      "stage2_mode": null
+    },
+    "staged:precomputed": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13679.793299987912,
+      "p50_latency_ms": 12918.63379997015,
+      "p95_latency_ms": 16434.964765003322,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "staged:realtime": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13885.101849973202,
+      "p50_latency_ms": 13826.323699980974,
+      "p95_latency_ms": 14867.712269958853,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "staged:static_global_graph": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13336.124025002122,
+      "p50_latency_ms": 13415.476950019598,
+      "p95_latency_ms": 13514.329230004549,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "stage2_mode_matrix": {
+    "precomputed": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13679.793299987912,
+      "p50_latency_ms": 12918.63379997015,
+      "p95_latency_ms": 16434.964765003322,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "realtime": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13885.101849973202,
+      "p50_latency_ms": 13826.323699980974,
+      "p95_latency_ms": 14867.712269958853,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "static_global_graph": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13336.124025002122,
+      "p50_latency_ms": 13415.476950019598,
+      "p95_latency_ms": 13514.329230004549,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "pairwise_stage2_deltas": [
+    {
+      "mode_a": "precomputed",
+      "mode_b": "realtime",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": -205.30854998528957
+    },
+    {
+      "mode_a": "precomputed",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 343.66927498579025
+    },
+    {
+      "mode_a": "realtime",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 548.9778249710798
+    }
+  ],
+  "config": {
+    "embedding_backend": "fastembed",
+    "embedding_model": "code",
+    "embedding_use_gpu": false,
+    "reranker_backend": "onnx",
+    "reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
+    "enable_staged_rerank": true,
+    "enable_cross_encoder_rerank": true
+  },
+  "evaluations": [
+    {
+      "query": "executeHybridMode dense_rerank semantic smart_search",
+      "intent": "ccw-semantic-routing",
+      "notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 38829.27079999447,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
+            "d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
+            "d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 16915.833400011063,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 13961.2567999959,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 12986.330999970436,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    },
+    {
+      "query": "parse CodexLens JSON output strip ANSI smart_search",
+      "intent": "ccw-json-fallback",
+      "notes": "Covers JSON/plain-text fallback handling for CodexLens output.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 14782.901199996471,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\codex-lens-lsp.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\queue\\queueexecuteinsession.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-dashboard\\queuepanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usewebsocket.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useflows.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-error-monitoring.spec.ts",
+            "d:\\claude_dms3\\ccw\\tests\\native-session-discovery.test.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\services\\checkpoint-service.ts",
+            "d:\\claude_dms3\\ccw\\tests\\integration\\system-routes.test.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 13710.042499959469,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
+            "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 15027.674999952316,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
+            "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 13389.622500002384,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
+            "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    },
+    {
+      "query": "smart_search init embed search action schema",
+      "intent": "ccw-action-schema",
+      "notes": "Find the Zod schema that defines init/embed/search actions.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 13661.594299972057,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\discovery.spec.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\__tests__\\ask-question.test.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\a2ui\\a2uiwebsockethandler.js",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\dashboard.spec.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 12127.225099980831,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 12860.084999978542,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 13441.331400036812,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    },
+    {
+      "query": "auto init missing job dedupe smart_search",
+      "intent": "ccw-auto-init",
+      "notes": "Targets background init/embed warmup and dedupe state.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 13413.994400024414,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\memory-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usememory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\batchoperationtoolbar.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\memory.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useprompthistory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\flowstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\services\\deepwiki-service.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\claude-routes.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 11966.072200000286,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 13691.39059996605,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 13527.211199998856,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    }
+  ]
+}
--- a/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json
+++ b/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json
@@ -0,0 +1,415 @@
+{
+  "timestamp": "2026-03-15 00:19:16",
+  "source": "D:\\Claude_dms3",
+  "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
+  "query_count": 1,
+  "k": 10,
+  "coarse_k": 100,
+  "local_only": true,
+  "strategies": {
+    "auto": {
+      "query_count": 1,
+      "hit_at_k": 1.0,
+      "mrr_at_k": 1.0,
+      "avg_recall_at_k": 1.0,
+      "avg_latency_ms": 1377.3565999865532,
+      "p50_latency_ms": 1377.3565999865532,
+      "p95_latency_ms": 1377.3565999865532,
+      "avg_generated_artifact_count": 0.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 0,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "fts": 1
+      },
+      "errors": 0,
+      "strategy": "auto",
+      "stage2_mode": null
+    },
+    "fts": {
+      "query_count": 1,
+      "hit_at_k": 1.0,
+      "mrr_at_k": 1.0,
+      "avg_recall_at_k": 1.0,
+      "avg_latency_ms": 1460.0819000601768,
+      "p50_latency_ms": 1460.0819000601768,
+      "p95_latency_ms": 1460.0819000601768,
+      "avg_generated_artifact_count": 0.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 0,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "fts": 1
+      },
+      "errors": 0,
+      "strategy": "fts",
+      "stage2_mode": null
+    },
+    "hybrid": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 45991.74140000343,
+      "p50_latency_ms": 45991.74140000343,
+      "p95_latency_ms": 45991.74140000343,
+      "avg_generated_artifact_count": 0.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 0,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "hybrid": 1
+      },
+      "errors": 0,
+      "strategy": "hybrid",
+      "stage2_mode": null
+    },
+    "dense_rerank": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 22739.62610000372,
+      "p50_latency_ms": 22739.62610000372,
+      "p95_latency_ms": 22739.62610000372,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 2.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 1,
+      "effective_methods": {
+        "dense_rerank": 1
+      },
+      "errors": 0,
+      "strategy": "dense_rerank",
+      "stage2_mode": null
+    },
+    "staged:precomputed": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14900.017599999905,
+      "p50_latency_ms": 14900.017599999905,
+      "p95_latency_ms": 14900.017599999905,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "staged:realtime": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14104.314599990845,
+      "p50_latency_ms": 14104.314599990845,
+      "p95_latency_ms": 14104.314599990845,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "staged:static_global_graph": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 11906.852500021458,
+      "p50_latency_ms": 11906.852500021458,
+      "p95_latency_ms": 11906.852500021458,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "stage2_mode_matrix": {
+    "precomputed": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14900.017599999905,
+      "p50_latency_ms": 14900.017599999905,
+      "p95_latency_ms": 14900.017599999905,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "realtime": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14104.314599990845,
+      "p50_latency_ms": 14104.314599990845,
+      "p95_latency_ms": 14104.314599990845,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "static_global_graph": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 11906.852500021458,
+      "p50_latency_ms": 11906.852500021458,
+      "p95_latency_ms": 11906.852500021458,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "pairwise_stage2_deltas": [
+    {
+      "mode_a": "precomputed",
+      "mode_b": "realtime",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 795.7030000090599
+    },
+    {
+      "mode_a": "precomputed",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 2993.165099978447
+    },
+    {
+      "mode_a": "realtime",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 2197.462099969387
+    }
+  ],
+  "config": {
+    "embedding_backend": "fastembed",
+    "embedding_model": "code",
+    "embedding_use_gpu": false,
+    "reranker_backend": "onnx",
+    "reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
+    "reranker_use_gpu": false,
+    "enable_staged_rerank": true,
+    "enable_cross_encoder_rerank": true
+  },
+  "evaluations": [
+    {
+      "query": "executeHybridMode dense_rerank semantic smart_search",
+      "intent": "ccw-semantic-routing",
+      "notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "auto": {
+          "strategy_key": "auto",
+          "strategy": "auto",
+          "stage2_mode": null,
+          "effective_method": "fts",
+          "execution_method": "fts",
+          "latency_ms": 1377.3565999865532,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+          ],
+          "first_hit_rank": 1,
+          "hit_at_k": true,
+          "recall_at_k": 1.0,
+          "generated_artifact_count": 0,
+          "test_file_count": 0,
+          "error": null
+        },
+        "fts": {
+          "strategy_key": "fts",
+          "strategy": "fts",
+          "stage2_mode": null,
+          "effective_method": "fts",
+          "execution_method": "fts",
+          "latency_ms": 1460.0819000601768,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+          ],
+          "first_hit_rank": 1,
+          "hit_at_k": true,
+          "recall_at_k": 1.0,
+          "generated_artifact_count": 0,
+          "test_file_count": 0,
+          "error": null
+        },
+        "hybrid": {
+          "strategy_key": "hybrid",
+          "strategy": "hybrid",
+          "stage2_mode": null,
+          "effective_method": "hybrid",
+          "execution_method": "hybrid",
+          "latency_ms": 45991.74140000343,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\config\\litellm-api-config-manager.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py",
+            "d:\\claude_dms3\\ccw\\src\\commands\\core-memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
+            "d:\\claude_dms3\\codex-lens\\scripts\\generate_embeddings.py",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\notification-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\team-msg.ts",
+            "d:\\claude_dms3\\ccw\\src\\types\\remote-notification.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\memory-store.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 0,
+          "test_file_count": 0,
+          "error": null
+        },
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "effective_method": "dense_rerank",
+          "execution_method": "cascade",
+          "latency_ms": 22739.62610000372,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
+            "d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
+            "d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 2,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "effective_method": "staged",
+          "execution_method": "cascade",
+          "latency_ms": 14900.017599999905,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "effective_method": "staged",
+          "execution_method": "cascade",
+          "latency_ms": 14104.314599990845,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "effective_method": "staged",
+          "execution_method": "cascade",
+          "latency_ms": 11906.852500021458,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 0,
+          "error": null
+        }
+      }
+    }
+  ]
+}
--- a/codex-lens/pyproject.toml
+++ b/codex-lens/pyproject.toml
@@ -57,9 +57,9 @@ semantic-directml = [
 # Cross-encoder reranking (second-stage, optional)
 # Install with: pip install codexlens[reranker] (default: ONNX backend)
 reranker-onnx = [
-    "optimum~=1.16.0",
-    "onnxruntime~=1.15.0",
-    "transformers~=4.36.0",
+    "optimum[onnxruntime]~=2.1.0",
+    "onnxruntime~=1.23.0",
+    "transformers~=4.53.0",
 ]

 # Remote reranking via HTTP API
@@ -79,9 +79,9 @@ reranker-legacy = [

 # Backward-compatible alias for default reranker backend
 reranker = [
-    "optimum~=1.16.0",
-    "onnxruntime~=1.15.0",
-    "transformers~=4.36.0",
+    "optimum[onnxruntime]~=2.1.0",
+    "onnxruntime~=1.23.0",
+    "transformers~=4.53.0",
 ]

 # Encoding detection for non-UTF8 files
@@ -116,3 +116,12 @@ package-dir = { "" = "src" }

 [tool.setuptools.package-data]
 "codexlens.lsp" = ["lsp-servers.json"]
+
+[tool.pytest.ini_options]
+markers = [
+    "integration: marks tests that exercise broader end-to-end or dependency-heavy flows",
+]
+filterwarnings = [
+    "ignore:'BaseCommand' is deprecated and will be removed in Click 9.0.*:DeprecationWarning",
+    "ignore:The '__version__' attribute is deprecated and will be removed in Click 9.1.*:DeprecationWarning",
+]
--- a/codex-lens/scripts/bootstrap_reranker_local.py
+++ b/codex-lens/scripts/bootstrap_reranker_local.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python3
+"""Bootstrap a local-only ONNX reranker environment for CodexLens.
+
+This script defaults to dry-run output so it can be used as a reproducible
+bootstrap manifest. When `--apply` is passed, it installs pinned reranker
+packages into the selected virtual environment and can optionally pre-download
+the ONNX reranker model into a repo-local Hugging Face cache.
+
+Examples:
+    python scripts/bootstrap_reranker_local.py --dry-run
+    python scripts/bootstrap_reranker_local.py --apply --download-model
+    python scripts/bootstrap_reranker_local.py --venv .venv --model Xenova/ms-marco-MiniLM-L-12-v2
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import shlex
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+MANIFEST_PATH = Path(__file__).with_name("requirements-reranker-local.txt")
+DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"
+DEFAULT_HF_HOME = PROJECT_ROOT / ".cache" / "huggingface"
+
+STEP_NOTES = {
+    "runtime": "Install the local ONNX runtime first so optimum/transformers do not backtrack over runtime wheels.",
+    "hf-stack": "Pin the Hugging Face stack used by the ONNX reranker backend.",
+}
+
+
+@dataclass(frozen=True)
+class RequirementStep:
+    name: str
+    packages: tuple[str, ...]
+
+
+def _normalize_venv_path(raw_path: str | Path) -> Path:
+    return (Path(raw_path) if raw_path else PROJECT_ROOT / ".venv").expanduser().resolve()
+
+
+def _venv_python(venv_path: Path) -> Path:
+    if os.name == "nt":
+        return venv_path / "Scripts" / "python.exe"
+    return venv_path / "bin" / "python"
+
+
+def _venv_huggingface_cli(venv_path: Path) -> Path:
+    if os.name == "nt":
+        preferred = venv_path / "Scripts" / "hf.exe"
+        return preferred if preferred.exists() else venv_path / "Scripts" / "huggingface-cli.exe"
+    preferred = venv_path / "bin" / "hf"
+    return preferred if preferred.exists() else venv_path / "bin" / "huggingface-cli"
+
+
+def _default_shell() -> str:
+    return "powershell" if os.name == "nt" else "bash"
+
+
+def _shell_quote(value: str, shell: str) -> str:
+    if shell == "bash":
+        return shlex.quote(value)
+    return "'" + value.replace("'", "''") + "'"
+
+
+def _format_command(parts: Iterable[str], shell: str) -> str:
+    return " ".join(_shell_quote(str(part), shell) for part in parts)
+
+
+def _format_set_env(name: str, value: str, shell: str) -> str:
+    quoted_value = _shell_quote(value, shell)
+    if shell == "bash":
+        return f"export {name}={quoted_value}"
+    return f"$env:{name} = {quoted_value}"
+
+
+def _model_local_dir(hf_home: Path, model_name: str) -> Path:
+    slug = model_name.replace("/", "--")
+    return hf_home / "models" / slug
+
+
+def _parse_manifest(manifest_path: Path) -> list[RequirementStep]:
+    current_name: str | None = None
+    current_packages: list[str] = []
+    steps: list[RequirementStep] = []
+
+    for raw_line in manifest_path.read_text(encoding="utf-8").splitlines():
+        line = raw_line.strip()
+        if not line:
+            continue
+
+        if line.startswith("# [") and line.endswith("]"):
+            if current_name and current_packages:
+                steps.append(RequirementStep(current_name, tuple(current_packages)))
+            current_name = line[3:-1]
+            current_packages = []
+            continue
+
+        if line.startswith("#"):
+            continue
+
+        if current_name is None:
+            raise ValueError(f"Package entry found before a section header in {manifest_path}")
+        current_packages.append(line)
+
+    if current_name and current_packages:
+        steps.append(RequirementStep(current_name, tuple(current_packages)))
+
+    if not steps:
+        raise ValueError(f"No requirement steps found in {manifest_path}")
+    return steps
+
+
+def _pip_install_command(python_path: Path, packages: Iterable[str]) -> list[str]:
+    return [
+        str(python_path),
+        "-m",
+        "pip",
+        "install",
+        "--upgrade",
+        "--disable-pip-version-check",
+        "--upgrade-strategy",
+        "only-if-needed",
+        "--only-binary=:all:",
+        *packages,
+    ]
+
+
+def _probe_command(python_path: Path) -> list[str]:
+    return [
+        str(python_path),
+        "-c",
+        (
+            "from codexlens.semantic.reranker.factory import check_reranker_available; "
+            "print(check_reranker_available('onnx'))"
+        ),
+    ]
+
+
+def _download_command(huggingface_cli: Path, model_name: str, model_dir: Path) -> list[str]:
+    return [
+        str(huggingface_cli),
+        "download",
+        model_name,
+        "--local-dir",
+        str(model_dir),
+    ]
+
+
+def _print_plan(
+    shell: str,
+    venv_path: Path,
+    python_path: Path,
+    huggingface_cli: Path,
+    manifest_path: Path,
+    steps: list[RequirementStep],
+    model_name: str,
+    hf_home: Path,
+) -> None:
+    model_dir = _model_local_dir(hf_home, model_name)
+
+    print("CodexLens local reranker bootstrap")
+    print(f"manifest: {manifest_path}")
+    print(f"target_venv: {venv_path}")
+    print(f"target_python: {python_path}")
+    print(f"backend: onnx")
+    print(f"model: {model_name}")
+    print(f"hf_home: {hf_home}")
+    print("mode: dry-run")
+    print("notes:")
+    print("- Uses only the selected venv Python; no global pip commands are emitted.")
+    print("- Targets the local ONNX reranker backend only; no API or LiteLLM providers are involved.")
+    print("")
+    print("pinned_steps:")
+    for step in steps:
+        print(f"- {step.name}: {', '.join(step.packages)}")
+        note = STEP_NOTES.get(step.name)
+        if note:
+            print(f"  note: {note}")
+    print("")
+    print("commands:")
+    print(
+        "1. "
+        + _format_command(
+            [
+                str(python_path),
+                "-m",
+                "pip",
+                "install",
+                "--upgrade",
+                "pip",
+                "setuptools",
+                "wheel",
+            ],
+            shell,
+        )
+    )
+    command_index = 2
+    for step in steps:
+        print(f"{command_index}. " + _format_command(_pip_install_command(python_path, step.packages), shell))
+        command_index += 1
+    print(f"{command_index}. " + _format_set_env("HF_HOME", str(hf_home), shell))
+    command_index += 1
+    print(f"{command_index}. " + _format_command(_download_command(huggingface_cli, model_name, model_dir), shell))
+    command_index += 1
+    print(f"{command_index}. " + _format_command(_probe_command(python_path), shell))
+    print("")
+    print("optional_runtime_env:")
+    print(_format_set_env("RERANKER_BACKEND", "onnx", shell))
+    print(_format_set_env("RERANKER_MODEL", str(model_dir), shell))
+    print(_format_set_env("HF_HOME", str(hf_home), shell))
+
+
+def _run_command(command: list[str], *, env: dict[str, str] | None = None) -> None:
+    command_env = os.environ.copy()
+    if env:
+        command_env.update(env)
+    command_env.setdefault("PYTHONUTF8", "1")
+    command_env.setdefault("PYTHONIOENCODING", "utf-8")
+    subprocess.run(command, check=True, env=command_env)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Bootstrap pinned local-only ONNX reranker dependencies for a CodexLens virtual environment.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "--venv",
+        type=Path,
+        default=PROJECT_ROOT / ".venv",
+        help="Path to the CodexLens virtual environment (default: ./.venv under codex-lens).",
+    )
+    parser.add_argument(
+        "--model",
+        default=DEFAULT_MODEL,
+        help=f"Model repo to pre-download for local reranking (default: {DEFAULT_MODEL}).",
+    )
+    parser.add_argument(
+        "--hf-home",
+        type=Path,
+        default=DEFAULT_HF_HOME,
+        help="Repo-local Hugging Face cache directory used for optional model downloads.",
+    )
+    parser.add_argument(
+        "--shell",
+        choices=("powershell", "bash"),
+        default=_default_shell(),
+        help="Shell syntax to use when rendering dry-run commands.",
+    )
+    parser.add_argument(
+        "--apply",
+        action="store_true",
+        help="Execute the pinned install steps against the selected virtual environment.",
+    )
+    parser.add_argument(
+        "--download-model",
+        action="store_true",
+        help="When used with --apply, pre-download the model into the configured HF_HOME directory.",
+    )
+    parser.add_argument(
+        "--probe",
+        action="store_true",
+        help="When used with --apply, run a small reranker availability probe at the end.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print the deterministic bootstrap plan. This is also the default when --apply is omitted.",
+    )
+
+    args = parser.parse_args()
+
+    steps = _parse_manifest(MANIFEST_PATH)
+    venv_path = _normalize_venv_path(args.venv)
+    python_path = _venv_python(venv_path)
+    huggingface_cli = _venv_huggingface_cli(venv_path)
+    hf_home = args.hf_home.expanduser().resolve()
+
+    if not args.apply:
+        _print_plan(
+            shell=args.shell,
+            venv_path=venv_path,
+            python_path=python_path,
+            huggingface_cli=huggingface_cli,
+            manifest_path=MANIFEST_PATH,
+            steps=steps,
+            model_name=args.model,
+            hf_home=hf_home,
+        )
+        return 0
+
+    if not python_path.exists():
+        print(f"Target venv Python not found: {python_path}", file=sys.stderr)
+        return 1
+
+    _run_command(
+        [
+            str(python_path),
+            "-m",
+            "pip",
+            "install",
+            "--upgrade",
+            "pip",
+            "setuptools",
+            "wheel",
+        ]
+    )
+    for step in steps:
+        _run_command(_pip_install_command(python_path, step.packages))
+
+    if args.download_model:
+        if not huggingface_cli.exists():
+            print(f"Expected venv-local Hugging Face CLI not found: {huggingface_cli}", file=sys.stderr)
+            return 1
+        download_env = os.environ.copy()
+        download_env["HF_HOME"] = str(hf_home)
+        hf_home.mkdir(parents=True, exist_ok=True)
+        _run_command(_download_command(huggingface_cli, args.model, _model_local_dir(hf_home, args.model)), env=download_env)
+
+    if args.probe:
+        local_model_dir = _model_local_dir(hf_home, args.model)
+        probe_env = os.environ.copy()
+        probe_env["HF_HOME"] = str(hf_home)
+        probe_env.setdefault("RERANKER_BACKEND", "onnx")
+        probe_env.setdefault("RERANKER_MODEL", str(local_model_dir if local_model_dir.exists() else args.model))
+        _run_command(_probe_command(python_path), env=probe_env)
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/codex-lens/scripts/requirements-reranker-local.txt
+++ b/codex-lens/scripts/requirements-reranker-local.txt
@@ -0,0 +1,13 @@
+# Ordered local ONNX reranker pins for CodexLens.
+# Validated against the repo-local Python 3.13 virtualenv on Windows.
+# bootstrap_reranker_local.py installs each section in file order to keep
+# pip resolver work bounded and repeatable.
+
+# [runtime]
+numpy==2.4.0
+onnxruntime==1.23.2
+
+# [hf-stack]
+huggingface-hub==0.36.2
+transformers==4.53.3
+optimum[onnxruntime]==2.1.0
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -2,10 +2,13 @@

 from __future__ import annotations

+import inspect
 import json
 import logging
 import os
+import re
 import shutil
+import subprocess
 from pathlib import Path
 from typing import Annotated, Any, Dict, Iterable, List, Optional

@@ -22,6 +25,13 @@ from codexlens.storage.registry import RegistryStore, ProjectInfo
 from codexlens.storage.index_tree import IndexTreeBuilder
 from codexlens.storage.dir_index import DirIndexStore
 from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
+from codexlens.search.ranking import (
+    QueryIntent,
+    apply_path_penalties,
+    detect_query_intent,
+    query_prefers_lexical_search,
+    query_targets_generated_files,
+)
 from codexlens.watcher import WatcherManager, WatcherConfig

 from .output import (
@@ -34,6 +44,56 @@ from .output import (
 )

 app = typer.Typer(help="CodexLens CLI — local code indexing and search.")
+# Index subcommand group for reorganized commands
+def _patch_typer_click_help_compat() -> None:
+    """Patch Typer help rendering for Click versions that pass ctx to make_metavar()."""
+    import click.core
+    from typer.core import TyperArgument
+
+    try:
+        params = inspect.signature(TyperArgument.make_metavar).parameters
+    except (TypeError, ValueError):
+        return
+
+    if len(params) != 1:
+        return
+
+    def _compat_make_metavar(self, ctx=None):  # type: ignore[override]
+        if self.metavar is not None:
+            return self.metavar
+
+        var = (self.name or "").upper()
+        if not self.required:
+            var = f"[{var}]"
+
+        try:
+            type_var = self.type.get_metavar(param=self, ctx=ctx)
+        except TypeError:
+            try:
+                type_var = self.type.get_metavar(self, ctx)
+            except TypeError:
+                type_var = self.type.get_metavar(self)
+
+        if type_var:
+            var += f":{type_var}"
+        if self.nargs != 1:
+            var += "..."
+        return var
+
+    TyperArgument.make_metavar = _compat_make_metavar
+
+    param_params = inspect.signature(click.core.Parameter.make_metavar).parameters
+    if len(param_params) == 2:
+        original_param_make_metavar = click.core.Parameter.make_metavar
+
+        def _compat_param_make_metavar(self, ctx=None):  # type: ignore[override]
+            return original_param_make_metavar(self, ctx)
+
+        click.core.Parameter.make_metavar = _compat_param_make_metavar
+
+
+_patch_typer_click_help_compat()
+

 # Index subcommand group for reorganized commands
 index_app = typer.Typer(help="Index management commands (init, embeddings, binary, status, migrate, all)")
@@ -119,6 +179,281 @@ def _extract_embedding_error(embed_result: Dict[str, Any]) -> str:
    return "Embedding generation failed (no error details provided)"


+def _auto_select_search_method(query: str) -> str:
+    """Choose a default search method from query intent."""
+    if query_targets_generated_files(query) or query_prefers_lexical_search(query):
+        return "fts"
+
+    intent = detect_query_intent(query)
+    if intent == QueryIntent.KEYWORD:
+        return "fts"
+    if intent == QueryIntent.SEMANTIC:
+        return "dense_rerank"
+    return "hybrid"
+
+
+_CLI_NON_CODE_EXTENSIONS = {
+    "md", "txt", "json", "yaml", "yml", "xml", "csv", "log",
+    "ini", "cfg", "conf", "toml", "env", "properties",
+    "html", "htm", "svg", "png", "jpg", "jpeg", "gif", "ico", "webp",
+    "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
+    "lock", "sum", "mod",
+}
+_FALLBACK_ARTIFACT_DIRS = {
+    "dist",
+    "build",
+    "out",
+    "coverage",
+    "htmlcov",
+    ".cache",
+    ".workflow",
+    ".next",
+    ".nuxt",
+    ".parcel-cache",
+    ".turbo",
+    "tmp",
+    "temp",
+    "generated",
+}
+_FALLBACK_SOURCE_DIRS = {
+    "src",
+    "lib",
+    "core",
+    "app",
+    "server",
+    "client",
+    "services",
+}
+
+
+def _normalize_extension_filters(exclude_extensions: Optional[Iterable[str]]) -> set[str]:
+    """Normalize extension filters to lowercase values without leading dots."""
+    normalized: set[str] = set()
+    for ext in exclude_extensions or []:
+        cleaned = (ext or "").strip().lower().lstrip(".")
+        if cleaned:
+            normalized.add(cleaned)
+    return normalized
+
+
+def _score_filesystem_fallback_match(
+    query: str,
+    path_text: str,
+    line_text: str,
+    *,
+    base_score: float,
+) -> float:
+    """Score filesystem fallback hits with light source-aware heuristics."""
+    score = max(0.0, float(base_score))
+    if score <= 0:
+        return 0.0
+
+    query_intent = detect_query_intent(query)
+    if query_intent != QueryIntent.KEYWORD:
+        return score
+
+    path_parts = {
+        part.casefold()
+        for part in str(path_text).replace("\\", "/").split("/")
+        if part and part != "."
+    }
+    if _FALLBACK_SOURCE_DIRS.intersection(path_parts):
+        score *= 1.15
+
+    symbol = (query or "").strip()
+    if " " in symbol or not symbol:
+        return score
+
+    escaped_symbol = re.escape(symbol)
+    definition_patterns = (
+        rf"^\s*(?:export\s+)?(?:async\s+)?def\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:async\s+)?function\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?class\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?interface\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?type\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:const|let|var)\s+{escaped_symbol}\b",
+    )
+    if any(re.search(pattern, line_text) for pattern in definition_patterns):
+        score *= 1.8
+
+    return score
+
+
+def _filesystem_fallback_search(
+    query: str,
+    search_path: Path,
+    *,
+    limit: int,
+    config: Config,
+    code_only: bool = False,
+    exclude_extensions: Optional[Iterable[str]] = None,
+) -> Optional[dict[str, Any]]:
+    """Fallback to ripgrep when indexed keyword search returns no results."""
+    rg_path = shutil.which("rg")
+    if not rg_path or not query.strip():
+        return None
+
+    import time
+
+    allow_generated = query_targets_generated_files(query)
+    ignored_dirs = {name for name in IndexTreeBuilder.IGNORE_DIRS if name}
+    ignored_dirs.add(".workflow")
+    if allow_generated:
+        ignored_dirs.difference_update(_FALLBACK_ARTIFACT_DIRS)
+
+    excluded_exts = _normalize_extension_filters(exclude_extensions)
+    if code_only:
+        excluded_exts.update(_CLI_NON_CODE_EXTENSIONS)
+
+    args = [
+        rg_path,
+        "--json",
+        "--line-number",
+        "--fixed-strings",
+        "--smart-case",
+        "--max-count",
+        "1",
+    ]
+    if allow_generated:
+        args.append("--hidden")
+
+    for dirname in sorted(ignored_dirs):
+        args.extend(["--glob", f"!**/{dirname}/**"])
+
+    args.extend([query, str(search_path)])
+
+    start_time = time.perf_counter()
+    proc = subprocess.run(
+        args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        encoding="utf-8",
+        errors="replace",
+        check=False,
+    )
+
+    if proc.returncode not in (0, 1):
+        return None
+
+    matches: List[SearchResult] = []
+    seen_paths: set[str] = set()
+    for raw_line in proc.stdout.splitlines():
+        if len(matches) >= limit:
+            break
+        try:
+            event = json.loads(raw_line)
+        except json.JSONDecodeError:
+            continue
+        if event.get("type") != "match":
+            continue
+
+        data = event.get("data") or {}
+        path_text = ((data.get("path") or {}).get("text") or "").strip()
+        if not path_text or path_text in seen_paths:
+            continue
+
+        path_obj = Path(path_text)
+        extension = path_obj.suffix.lower().lstrip(".")
+        if extension and extension in excluded_exts:
+            continue
+        if code_only and config.language_for_path(path_obj) is None:
+            continue
+
+        line_text = ((data.get("lines") or {}).get("text") or "").rstrip("\r\n")
+        line_number = data.get("line_number")
+        seen_paths.add(path_text)
+        base_score = float(limit - len(matches))
+        matches.append(
+            SearchResult(
+                path=path_text,
+                score=_score_filesystem_fallback_match(
+                    query,
+                    path_text,
+                    line_text,
+                    base_score=base_score,
+                ),
+                excerpt=line_text.strip() or line_text or path_text,
+                content=None,
+                metadata={
+                    "filesystem_fallback": True,
+                    "backend": "ripgrep-fallback",
+                    "stale_index_suspected": True,
+                },
+                start_line=line_number,
+                end_line=line_number,
+            )
+        )
+
+    if not matches:
+        return None
+
+    matches = apply_path_penalties(
+        matches,
+        query,
+        test_file_penalty=config.test_file_penalty,
+        generated_file_penalty=config.generated_file_penalty,
+    )
+    return {
+        "results": matches,
+        "time_ms": (time.perf_counter() - start_time) * 1000.0,
+        "fallback": {
+            "backend": "ripgrep-fallback",
+            "stale_index_suspected": True,
+            "reason": "Indexed FTS search returned no results; filesystem fallback used.",
+        },
+    }
+
+
+def _remove_tree_best_effort(target: Path) -> dict[str, Any]:
+    """Remove a directory tree without aborting on locked files."""
+    target = target.resolve()
+    if not target.exists():
+        return {
+            "removed": True,
+            "partial": False,
+            "locked_paths": [],
+            "errors": [],
+            "remaining_path": None,
+        }
+
+    locked_paths: List[str] = []
+    errors: List[str] = []
+    entries = sorted(target.rglob("*"), key=lambda path: len(path.parts), reverse=True)
+
+    for entry in entries:
+        try:
+            if entry.is_dir() and not entry.is_symlink():
+                entry.rmdir()
+            else:
+                entry.unlink()
+        except FileNotFoundError:
+            continue
+        except PermissionError:
+            locked_paths.append(str(entry))
+        except OSError as exc:
+            if entry.is_dir():
+                continue
+            errors.append(f"{entry}: {exc}")
+
+    try:
+        target.rmdir()
+    except FileNotFoundError:
+        pass
+    except PermissionError:
+        locked_paths.append(str(target))
+    except OSError:
+        pass
+
+    return {
+        "removed": not target.exists(),
+        "partial": target.exists(),
+        "locked_paths": sorted(set(locked_paths)),
+        "errors": errors,
+        "remaining_path": str(target) if target.exists() else None,
+    }
+
+
 def _get_index_root() -> Path:
    """Get the index root directory from config or default.

@@ -542,7 +877,7 @@ def search(
    offset: int = typer.Option(0, "--offset", min=0, help="Pagination offset - skip first N results."),
    depth: int = typer.Option(-1, "--depth", "-d", help="Search depth (-1 = unlimited, 0 = current only)."),
    files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."),
-    method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."),
+    method: str = typer.Option("auto", "--method", "-m", help="Search method: 'auto' (intent-aware, default), 'dense_rerank' (semantic), 'fts' (exact keyword)."),
    use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."),
    code_only: bool = typer.Option(False, "--code-only", help="Only return code files (excludes md, txt, json, yaml, xml, etc.)."),
    exclude_extensions: Optional[str] = typer.Option(None, "--exclude-extensions", help="Comma-separated list of file extensions to exclude (e.g., 'md,txt,json')."),
@@ -576,14 +911,16 @@ def search(
    Use --depth to limit search recursion (0 = current dir only).

    Search Methods:
-      - dense_rerank (default): Semantic search using Dense embedding coarse retrieval +
+      - auto (default): Intent-aware routing. KEYWORD -> fts, MIXED -> hybrid,
+        SEMANTIC -> dense_rerank.
+      - dense_rerank: Semantic search using Dense embedding coarse retrieval +
        Cross-encoder reranking. Best for natural language queries and code understanding.
      - fts: Full-text search using FTS5 (unicode61 tokenizer). Best for exact code
        identifiers like function/class names. Use --use-fuzzy for typo tolerance.

    Method Selection Guide:
-      - Code identifiers (function/class names): fts
-      - Natural language queries: dense_rerank (default)
+      - Code identifiers (function/class names): auto or fts
+      - Natural language queries: auto or dense_rerank
      - Typo-tolerant search: fts --use-fuzzy

    Requirements:
@@ -591,7 +928,7 @@ def search(
      Use 'codexlens embeddings-generate' to create embeddings first.

    Examples:
-      # Default semantic search (dense_rerank)
+      # Default intent-aware search
      codexlens search "authentication logic"

      # Exact code identifier search
@@ -612,7 +949,7 @@ def search(

        # Map old mode values to new method values
        mode_to_method = {
-            "auto": "hybrid",
+            "auto": "auto",
            "exact": "fts",
            "fuzzy": "fts",  # with use_fuzzy=True
            "hybrid": "hybrid",
@@ -638,19 +975,27 @@ def search(

    # Validate method - simplified interface exposes only dense_rerank and fts
    # Other methods (vector, hybrid, cascade) are hidden but still work for backward compatibility
-    valid_methods = ["fts", "dense_rerank", "vector", "hybrid", "cascade"]
+    valid_methods = ["auto", "fts", "dense_rerank", "vector", "hybrid", "cascade"]
    if actual_method not in valid_methods:
        if json_mode:
-            print_json(success=False, error=f"Invalid method: {actual_method}. Use 'dense_rerank' (semantic) or 'fts' (exact keyword).")
+            print_json(success=False, error=f"Invalid method: {actual_method}. Use 'auto', 'dense_rerank', or 'fts'.")
        else:
            console.print(f"[red]Invalid method:[/red] {actual_method}")
-            console.print("[dim]Use 'dense_rerank' (semantic, default) or 'fts' (exact keyword)[/dim]")
+            console.print("[dim]Use 'auto' (default), 'dense_rerank' (semantic), or 'fts' (exact keyword)[/dim]")
        raise typer.Exit(code=1)

+    resolved_method = (
+        _auto_select_search_method(query)
+        if actual_method == "auto"
+        else actual_method
+    )
+    display_method = resolved_method
+    execution_method = resolved_method
+
    # Map dense_rerank to cascade method internally
    internal_cascade_strategy = cascade_strategy
-    if actual_method == "dense_rerank":
-        actual_method = "cascade"
+    if execution_method == "dense_rerank":
+        execution_method = "cascade"
        internal_cascade_strategy = "dense_rerank"

    # Validate cascade_strategy if provided (for advanced users)
@@ -733,32 +1078,32 @@ def search(
        # vector: Pure vector semantic search
        # hybrid: RRF fusion of sparse + dense
        # cascade: Two-stage binary + dense retrieval
-        if actual_method == "fts":
+        if execution_method == "fts":
            hybrid_mode = False
            enable_fuzzy = use_fuzzy
            enable_vector = False
            pure_vector = False
            enable_cascade = False
-        elif actual_method == "vector":
+        elif execution_method == "vector":
            hybrid_mode = True
            enable_fuzzy = False
            enable_vector = True
            pure_vector = True
            enable_cascade = False
-        elif actual_method == "hybrid":
+        elif execution_method == "hybrid":
            hybrid_mode = True
            enable_fuzzy = use_fuzzy
            enable_vector = True
            pure_vector = False
            enable_cascade = False
-        elif actual_method == "cascade":
+        elif execution_method == "cascade":
            hybrid_mode = True
            enable_fuzzy = False
            enable_vector = True
            pure_vector = False
            enable_cascade = True
        else:
-            raise ValueError(f"Invalid method: {actual_method}")
+            raise ValueError(f"Invalid method: {execution_method}")

        # Parse exclude_extensions from comma-separated string
        exclude_exts_list = None
@@ -790,10 +1135,28 @@ def search(
                    console.print(fp)
        else:
            # Dispatch to cascade_search for cascade method
-            if actual_method == "cascade":
+            if execution_method == "cascade":
                result = engine.cascade_search(query, search_path, k=limit, options=options, strategy=internal_cascade_strategy)
            else:
                result = engine.search(query, search_path, options)
+            effective_results = result.results
+            effective_files_matched = result.stats.files_matched
+            effective_time_ms = result.stats.time_ms
+            fallback_payload = None
+            if display_method == "fts" and not use_fuzzy and not effective_results:
+                fallback_payload = _filesystem_fallback_search(
+                    query,
+                    search_path,
+                    limit=limit,
+                    config=config,
+                    code_only=code_only,
+                    exclude_extensions=exclude_exts_list,
+                )
+                if fallback_payload is not None:
+                    effective_results = fallback_payload["results"]
+                    effective_files_matched = len(effective_results)
+                    effective_time_ms = result.stats.time_ms + float(fallback_payload["time_ms"])
+
            results_list = [
                {
                    "path": r.path,
@@ -803,25 +1166,29 @@ def search(
                    "source": getattr(r, "search_source", None),
                    "symbol": getattr(r, "symbol", None),
                }
-                for r in result.results
+                for r in effective_results
            ]

            payload = {
                "query": query,
-                "method": actual_method,
+                "method": display_method,
                "count": len(results_list),
                "results": results_list,
                "stats": {
                    "dirs_searched": result.stats.dirs_searched,
-                    "files_matched": result.stats.files_matched,
-                    "time_ms": result.stats.time_ms,
+                    "files_matched": effective_files_matched,
+                    "time_ms": effective_time_ms,
                },
            }
+            if fallback_payload is not None:
+                payload["fallback"] = fallback_payload["fallback"]
            if json_mode:
                print_json(success=True, result=payload)
            else:
-                render_search_results(result.results, verbose=verbose)
-                console.print(f"[dim]Method: {actual_method} | Searched {result.stats.dirs_searched} directories in {result.stats.time_ms:.1f}ms[/dim]")
+                render_search_results(effective_results, verbose=verbose)
+                if fallback_payload is not None:
+                    console.print("[yellow]No indexed matches found; showing filesystem fallback results (stale index suspected).[/yellow]")
+                console.print(f"[dim]Method: {display_method} | Searched {result.stats.dirs_searched} directories in {effective_time_ms:.1f}ms[/dim]")

    except SearchError as exc:
        if json_mode:
@@ -1454,7 +1821,7 @@ def projects(
                mapper = PathMapper()
                index_root = mapper.source_to_index_dir(project_path)
                if index_root.exists():
-                    shutil.rmtree(index_root)
+                    _remove_tree_best_effort(index_root)

                if json_mode:
                    print_json(success=True, result={"removed": str(project_path)})
@@ -1966,17 +2333,30 @@ def clean(
                registry_path.unlink()

            # Remove all indexes
-            shutil.rmtree(index_root)
+            removal = _remove_tree_best_effort(index_root)

            result = {
                "cleaned": str(index_root),
                "size_freed_mb": round(total_size / (1024 * 1024), 2),
+                "partial": bool(removal["partial"]),
+                "locked_paths": removal["locked_paths"],
+                "remaining_path": removal["remaining_path"],
+                "errors": removal["errors"],
            }

            if json_mode:
                print_json(success=True, result=result)
            else:
-                console.print(f"[green]Removed all indexes:[/green] {result['size_freed_mb']} MB freed")
+                if result["partial"]:
+                    console.print(
+                        f"[yellow]Partially removed all indexes:[/yellow] {result['size_freed_mb']} MB freed"
+                    )
+                    if result["locked_paths"]:
+                        console.print(
+                            f"[dim]Locked paths left behind: {len(result['locked_paths'])}[/dim]"
+                        )
+                else:
+                    console.print(f"[green]Removed all indexes:[/green] {result['size_freed_mb']} MB freed")

        elif path:
            # Remove specific project
@@ -2003,18 +2383,29 @@ def clean(
            registry.close()

            # Remove indexes
-            shutil.rmtree(project_index)
+            removal = _remove_tree_best_effort(project_index)

            result = {
                "cleaned": str(project_path),
                "index_path": str(project_index),
                "size_freed_mb": round(total_size / (1024 * 1024), 2),
+                "partial": bool(removal["partial"]),
+                "locked_paths": removal["locked_paths"],
+                "remaining_path": removal["remaining_path"],
+                "errors": removal["errors"],
            }

            if json_mode:
                print_json(success=True, result=result)
            else:
-                console.print(f"[green]Removed indexes for:[/green] {project_path}")
+                if result["partial"]:
+                    console.print(f"[yellow]Partially removed indexes for:[/yellow] {project_path}")
+                    if result["locked_paths"]:
+                        console.print(
+                            f"[dim]Locked paths left behind: {len(result['locked_paths'])}[/dim]"
+                        )
+                else:
+                    console.print(f"[green]Removed indexes for:[/green] {project_path}")
                console.print(f"  Freed: {result['size_freed_mb']} MB")

        else:
@@ -2617,7 +3008,7 @@ def embeddings_status(
        codexlens embeddings-status ~/projects/my-app                  # Check project (auto-finds index)
    """
    _deprecated_command_warning("embeddings-status", "index status")
-    from codexlens.cli.embedding_manager import check_index_embeddings, get_embedding_stats_summary
+    from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status

    # Determine what to check
    if path is None:
@@ -3715,7 +4106,7 @@ def index_status(
    """
    _configure_logging(verbose, json_mode)

-    from codexlens.cli.embedding_manager import check_index_embeddings, get_embedding_stats_summary
+    from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status

    # Determine target path and index root
    if path is None:
@@ -3751,13 +4142,19 @@ def index_status(
            raise typer.Exit(code=1)

    # Get embeddings status
-    embeddings_result = get_embedding_stats_summary(index_root)
+    embeddings_result = get_embeddings_status(index_root)
+    embeddings_summary_result = get_embedding_stats_summary(index_root)

    # Build combined result
    result = {
        "index_root": str(index_root),
-        "embeddings": embeddings_result.get("result") if embeddings_result.get("success") else None,
-        "embeddings_error": embeddings_result.get("error") if not embeddings_result.get("success") else None,
+        # Keep "embeddings" backward-compatible as the subtree summary payload.
+        "embeddings": embeddings_summary_result.get("result") if embeddings_summary_result.get("success") else None,
+        "embeddings_error": embeddings_summary_result.get("error") if not embeddings_summary_result.get("success") else None,
+        "embeddings_status": embeddings_result.get("result") if embeddings_result.get("success") else None,
+        "embeddings_status_error": embeddings_result.get("error") if not embeddings_result.get("success") else None,
+        "embeddings_summary": embeddings_summary_result.get("result") if embeddings_summary_result.get("success") else None,
+        "embeddings_summary_error": embeddings_summary_result.get("error") if not embeddings_summary_result.get("success") else None,
    }

    if json_mode:
@@ -3770,13 +4167,39 @@ def index_status(
        console.print("[bold]Dense Embeddings (HNSW):[/bold]")
        if embeddings_result.get("success"):
            data = embeddings_result["result"]
-            total = data.get("total_indexes", 0)
-            with_emb = data.get("indexes_with_embeddings", 0)
-            total_chunks = data.get("total_chunks", 0)
+            root = data.get("root") or data
+            subtree = data.get("subtree") or {}
+            centralized = data.get("centralized") or {}

-            console.print(f"  Total indexes: {total}")
-            console.print(f"  Indexes with embeddings: [{'green' if with_emb > 0 else 'yellow'}]{with_emb}[/]/{total}")
-            console.print(f"  Total chunks: {total_chunks:,}")
+            console.print(f"  Root files: {root.get('total_files', 0)}")
+            console.print(
+                f"  Root files with embeddings: "
+                f"[{'green' if root.get('has_embeddings') else 'yellow'}]{root.get('files_with_embeddings', 0)}[/]"
+                f"/{root.get('total_files', 0)}"
+            )
+            console.print(f"  Root coverage: {root.get('coverage_percent', 0):.1f}%")
+            console.print(f"  Root chunks: {root.get('total_chunks', 0):,}")
+            console.print(f"  Root storage mode: {root.get('storage_mode', 'none')}")
+            console.print(
+                f"  Centralized dense: "
+                f"{'ready' if centralized.get('dense_ready') else ('present' if centralized.get('dense_index_exists') else 'missing')}"
+            )
+            console.print(
+                f"  Centralized binary: "
+                f"{'ready' if centralized.get('binary_ready') else ('present' if centralized.get('binary_index_exists') else 'missing')}"
+            )
+
+            subtree_total = subtree.get("total_indexes", 0)
+            subtree_with_embeddings = subtree.get("indexes_with_embeddings", 0)
+            subtree_chunks = subtree.get("total_chunks", 0)
+            if subtree_total:
+                console.print("\n[bold]Subtree Summary:[/bold]")
+                console.print(f"  Total indexes: {subtree_total}")
+                console.print(
+                    f"  Indexes with embeddings: "
+                    f"[{'green' if subtree_with_embeddings > 0 else 'yellow'}]{subtree_with_embeddings}[/]/{subtree_total}"
+                )
+                console.print(f"  Total chunks: {subtree_chunks:,}")
        else:
            console.print(f"  [yellow]--[/yellow] {embeddings_result.get('error', 'Not available')}")

--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -48,6 +48,8 @@ from itertools import islice
 from pathlib import Path
 from typing import Any, Dict, Generator, List, Optional, Tuple

+from codexlens.storage.index_filters import filter_index_paths
+
 try:
    from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
 except ImportError:
@@ -61,9 +63,15 @@ except ImportError:  # pragma: no cover
    VectorStore = None  # type: ignore[assignment]

 try:
-    from codexlens.config import VECTORS_META_DB_NAME
+    from codexlens.config import (
+        BINARY_VECTORS_MMAP_NAME,
+        VECTORS_HNSW_NAME,
+        VECTORS_META_DB_NAME,
+    )
 except ImportError:
+    VECTORS_HNSW_NAME = "_vectors.hnsw"
    VECTORS_META_DB_NAME = "_vectors_meta.db"
+    BINARY_VECTORS_MMAP_NAME = "_binary_vectors.mmap"

 try:
    from codexlens.search.ranking import get_file_category
@@ -410,6 +418,98 @@ def check_index_embeddings(index_path: Path) -> Dict[str, any]:
        }


+def _sqlite_table_exists(conn: sqlite3.Connection, table_name: str) -> bool:
+    """Return whether a SQLite table exists."""
+    cursor = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
+        (table_name,),
+    )
+    return cursor.fetchone() is not None
+
+
+def _sqlite_count_rows(conn: sqlite3.Connection, table_name: str) -> int:
+    """Return row count for a table, or 0 when the table is absent."""
+    if not _sqlite_table_exists(conn, table_name):
+        return 0
+    cursor = conn.execute(f"SELECT COUNT(*) FROM {table_name}")
+    return int(cursor.fetchone()[0] or 0)
+
+
+def _sqlite_count_distinct_rows(conn: sqlite3.Connection, table_name: str, column_name: str) -> int:
+    """Return distinct row count for a table column, or 0 when the table is absent."""
+    if not _sqlite_table_exists(conn, table_name):
+        return 0
+    cursor = conn.execute(f"SELECT COUNT(DISTINCT {column_name}) FROM {table_name}")
+    return int(cursor.fetchone()[0] or 0)
+
+
+def _get_model_info_from_index(index_path: Path) -> Optional[Dict[str, Any]]:
+    """Read embedding model metadata from an index if available."""
+    try:
+        with sqlite3.connect(index_path) as conn:
+            if not _sqlite_table_exists(conn, "embeddings_config"):
+                return None
+        from codexlens.semantic.vector_store import VectorStore
+        with VectorStore(index_path) as vs:
+            config = vs.get_model_config()
+            if not config:
+                return None
+            return {
+                "model_profile": config.get("model_profile"),
+                "model_name": config.get("model_name"),
+                "embedding_dim": config.get("embedding_dim"),
+                "backend": config.get("backend"),
+                "created_at": config.get("created_at"),
+                "updated_at": config.get("updated_at"),
+            }
+    except Exception:
+        return None
+
+
+def _inspect_centralized_embeddings(index_root: Path) -> Dict[str, Any]:
+    """Inspect centralized vector artifacts stored directly at the current root."""
+    dense_index_path = index_root / VECTORS_HNSW_NAME
+    meta_db_path = index_root / VECTORS_META_DB_NAME
+    binary_index_path = index_root / BINARY_VECTORS_MMAP_NAME
+
+    result: Dict[str, Any] = {
+        "index_root": str(index_root),
+        "dense_index_path": str(dense_index_path) if dense_index_path.exists() else None,
+        "binary_index_path": str(binary_index_path) if binary_index_path.exists() else None,
+        "meta_db_path": str(meta_db_path) if meta_db_path.exists() else None,
+        "dense_index_exists": dense_index_path.exists(),
+        "binary_index_exists": binary_index_path.exists(),
+        "meta_db_exists": meta_db_path.exists(),
+        "chunk_metadata_rows": 0,
+        "binary_vector_rows": 0,
+        "files_with_embeddings": 0,
+        "dense_ready": False,
+        "binary_ready": False,
+        "usable": False,
+    }
+
+    if not meta_db_path.exists():
+        return result
+
+    try:
+        with sqlite3.connect(meta_db_path) as conn:
+            result["chunk_metadata_rows"] = _sqlite_count_rows(conn, "chunk_metadata")
+            result["binary_vector_rows"] = _sqlite_count_rows(conn, "binary_vectors")
+            result["files_with_embeddings"] = _sqlite_count_distinct_rows(conn, "chunk_metadata", "file_path")
+    except Exception as exc:
+        result["error"] = f"Failed to inspect centralized metadata: {exc}"
+        return result
+
+    result["dense_ready"] = result["dense_index_exists"] and result["chunk_metadata_rows"] > 0
+    result["binary_ready"] = (
+        result["binary_index_exists"]
+        and result["chunk_metadata_rows"] > 0
+        and result["binary_vector_rows"] > 0
+    )
+    result["usable"] = result["dense_ready"] or result["binary_ready"]
+    return result
+
+
 def _get_embedding_defaults() -> tuple[str, str, bool, List, str, float]:
    """Get default embedding settings from config.

@@ -1024,7 +1124,7 @@ def _discover_index_dbs_internal(index_root: Path) -> List[Path]:
    if not index_root.exists():
        return []

-    return sorted(index_root.rglob("_index.db"))
+    return sorted(filter_index_paths(index_root.rglob("_index.db"), index_root))


 def build_centralized_binary_vectors_from_existing(
@@ -1353,7 +1453,7 @@ def find_all_indexes(scan_dir: Path) -> List[Path]:
    if not scan_dir.exists():
        return []

-    return list(scan_dir.rglob("_index.db"))
+    return _discover_index_dbs_internal(scan_dir)



@@ -1866,8 +1966,32 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
        Aggregated status with coverage statistics, model info, and timestamps
    """
    index_files = _discover_index_dbs_internal(index_root)
+    centralized = _inspect_centralized_embeddings(index_root)
+    root_index_path = index_root / "_index.db"
+    root_index_exists = root_index_path.exists()

    if not index_files:
+        root_result = {
+            "index_path": str(root_index_path),
+            "exists": root_index_exists,
+            "total_files": 0,
+            "files_with_embeddings": 0,
+            "files_without_embeddings": 0,
+            "total_chunks": 0,
+            "coverage_percent": 0.0,
+            "has_embeddings": False,
+            "storage_mode": "none",
+        }
+        subtree_result = {
+            "total_indexes": 0,
+            "total_files": 0,
+            "files_with_embeddings": 0,
+            "files_without_embeddings": 0,
+            "total_chunks": 0,
+            "coverage_percent": 0.0,
+            "indexes_with_embeddings": 0,
+            "indexes_without_embeddings": 0,
+        }
        return {
            "success": True,
            "result": {
@@ -1880,72 +2004,123 @@ def get_embeddings_status(index_root: Path) -> Dict[str, any]:
                "indexes_with_embeddings": 0,
                "indexes_without_embeddings": 0,
                "model_info": None,
+                "root": root_result,
+                "subtree": subtree_result,
+                "centralized": centralized,
            },
        }

-    total_files = 0
-    files_with_embeddings = 0
-    total_chunks = 0
-    indexes_with_embeddings = 0
-    model_info = None
+    subtree_total_files = 0
+    subtree_files_with_embeddings = 0
+    subtree_total_chunks = 0
+    subtree_indexes_with_embeddings = 0
+    subtree_model_info = None
    latest_updated_at = None

    for index_path in index_files:
        status = check_index_embeddings(index_path)
-        if status["success"]:
-            result = status["result"]
-            total_files += result["total_files"]
-            files_with_embeddings += result["files_with_chunks"]
-            total_chunks += result["total_chunks"]
-            if result["has_embeddings"]:
-                indexes_with_embeddings += 1
+        if not status["success"]:
+            continue

-                # Get model config from first index with embeddings (they should all match)
-                if model_info is None:
-                    try:
-                        from codexlens.semantic.vector_store import VectorStore
-                        with VectorStore(index_path) as vs:
-                            config = vs.get_model_config()
-                            if config:
-                                model_info = {
-                                    "model_profile": config.get("model_profile"),
-                                    "model_name": config.get("model_name"),
-                                    "embedding_dim": config.get("embedding_dim"),
-                                    "backend": config.get("backend"),
-                                    "created_at": config.get("created_at"),
-                                    "updated_at": config.get("updated_at"),
-                                }
-                                latest_updated_at = config.get("updated_at")
-                    except Exception:
-                        pass
-                else:
-                    # Track the latest updated_at across all indexes
-                    try:
-                        from codexlens.semantic.vector_store import VectorStore
-                        with VectorStore(index_path) as vs:
-                            config = vs.get_model_config()
-                            if config and config.get("updated_at"):
-                                if latest_updated_at is None or config["updated_at"] > latest_updated_at:
-                                    latest_updated_at = config["updated_at"]
-                    except Exception:
-                        pass
+        result = status["result"]
+        subtree_total_files += result["total_files"]
+        subtree_files_with_embeddings += result["files_with_chunks"]
+        subtree_total_chunks += result["total_chunks"]

-    # Update model_info with latest timestamp
-    if model_info and latest_updated_at:
-        model_info["updated_at"] = latest_updated_at
+        if not result["has_embeddings"]:
+            continue
+
+        subtree_indexes_with_embeddings += 1
+        candidate_model_info = _get_model_info_from_index(index_path)
+        if not candidate_model_info:
+            continue
+        if subtree_model_info is None:
+            subtree_model_info = candidate_model_info
+            latest_updated_at = candidate_model_info.get("updated_at")
+            continue
+        candidate_updated_at = candidate_model_info.get("updated_at")
+        if candidate_updated_at and (latest_updated_at is None or candidate_updated_at > latest_updated_at):
+            latest_updated_at = candidate_updated_at
+
+    if subtree_model_info and latest_updated_at:
+        subtree_model_info["updated_at"] = latest_updated_at
+
+    root_total_files = 0
+    root_files_with_embeddings = 0
+    root_total_chunks = 0
+    root_has_embeddings = False
+    root_storage_mode = "none"
+
+    if root_index_exists:
+        root_status = check_index_embeddings(root_index_path)
+        if root_status["success"]:
+            root_data = root_status["result"]
+            root_total_files = int(root_data["total_files"])
+            if root_data["has_embeddings"]:
+                root_files_with_embeddings = int(root_data["files_with_chunks"])
+                root_total_chunks = int(root_data["total_chunks"])
+                root_has_embeddings = True
+                root_storage_mode = "distributed"
+
+    if centralized["usable"]:
+        root_files_with_embeddings = int(centralized["files_with_embeddings"])
+        root_total_chunks = int(centralized["chunk_metadata_rows"])
+        root_has_embeddings = True
+        root_storage_mode = "centralized" if root_storage_mode == "none" else "mixed"
+
+    model_info = None
+    if root_has_embeddings:
+        if root_storage_mode in {"distributed", "mixed"} and root_index_exists:
+            model_info = _get_model_info_from_index(root_index_path)
+        if model_info is None and root_storage_mode in {"centralized", "mixed"}:
+            model_info = subtree_model_info
+
+    root_coverage_percent = round(
+        (root_files_with_embeddings / root_total_files * 100) if root_total_files > 0 else 0,
+        1,
+    )
+    root_files_without_embeddings = max(root_total_files - root_files_with_embeddings, 0)
+
+    root_result = {
+        "index_path": str(root_index_path),
+        "exists": root_index_exists,
+        "total_files": root_total_files,
+        "files_with_embeddings": root_files_with_embeddings,
+        "files_without_embeddings": root_files_without_embeddings,
+        "total_chunks": root_total_chunks,
+        "coverage_percent": root_coverage_percent,
+        "has_embeddings": root_has_embeddings,
+        "storage_mode": root_storage_mode,
+    }
+    subtree_result = {
+        "total_indexes": len(index_files),
+        "total_files": subtree_total_files,
+        "files_with_embeddings": subtree_files_with_embeddings,
+        "files_without_embeddings": subtree_total_files - subtree_files_with_embeddings,
+        "total_chunks": subtree_total_chunks,
+        "coverage_percent": round(
+            (subtree_files_with_embeddings / subtree_total_files * 100) if subtree_total_files > 0 else 0,
+            1,
+        ),
+        "indexes_with_embeddings": subtree_indexes_with_embeddings,
+        "indexes_without_embeddings": len(index_files) - subtree_indexes_with_embeddings,
+    }

    return {
        "success": True,
        "result": {
-            "total_indexes": len(index_files),
-            "total_files": total_files,
-            "files_with_embeddings": files_with_embeddings,
-            "files_without_embeddings": total_files - files_with_embeddings,
-            "total_chunks": total_chunks,
-            "coverage_percent": round((files_with_embeddings / total_files * 100) if total_files > 0 else 0, 1),
-            "indexes_with_embeddings": indexes_with_embeddings,
-            "indexes_without_embeddings": len(index_files) - indexes_with_embeddings,
+            "total_indexes": 1 if root_index_exists else 0,
+            "total_files": root_total_files,
+            "files_with_embeddings": root_files_with_embeddings,
+            "files_without_embeddings": root_files_without_embeddings,
+            "total_chunks": root_total_chunks,
+            "coverage_percent": root_coverage_percent,
+            "indexes_with_embeddings": 1 if root_has_embeddings else 0,
+            "indexes_without_embeddings": 1 if root_index_exists and not root_has_embeddings else 0,
            "model_info": model_info,
+            "root": root_result,
+            "subtree": subtree_result,
+            "centralized": centralized,
        },
    }

--- a/codex-lens/src/codexlens/config.py
+++ b/codex-lens/src/codexlens/config.py
@@ -126,11 +126,14 @@ class Config:
    enable_reranking: bool = False
    reranking_top_k: int = 50
    symbol_boost_factor: float = 1.5
+    test_file_penalty: float = 0.15  # Penalty for test/fixture paths during final ranking
+    generated_file_penalty: float = 0.35  # Penalty for generated/build artifact paths during final ranking

    # Optional cross-encoder reranking (second stage; requires optional reranker deps)
    enable_cross_encoder_rerank: bool = False
    reranker_backend: str = "onnx"
    reranker_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    reranker_use_gpu: bool = True  # Whether reranker backends should use GPU acceleration
    reranker_top_k: int = 50
    reranker_max_input_tokens: int = 8192  # Maximum tokens for reranker API batching
    reranker_chunk_type_weights: Optional[Dict[str, float]] = None  # Weights for chunk types: {"code": 1.0, "docstring": 0.7}
@@ -312,6 +315,7 @@ class Config:
                "enabled": self.enable_cross_encoder_rerank,
                "backend": self.reranker_backend,
                "model": self.reranker_model,
+                "use_gpu": self.reranker_use_gpu,
                "top_k": self.reranker_top_k,
                "max_input_tokens": self.reranker_max_input_tokens,
                "pool_enabled": self.reranker_pool_enabled,
@@ -418,6 +422,8 @@ class Config:
                        )
                if "model" in reranker:
                    self.reranker_model = reranker["model"]
+                if "use_gpu" in reranker:
+                    self.reranker_use_gpu = reranker["use_gpu"]
                if "top_k" in reranker:
                    self.reranker_top_k = reranker["top_k"]
                if "max_input_tokens" in reranker:
@@ -712,6 +718,7 @@ class Config:
            EMBEDDING_COOLDOWN: Rate limit cooldown for embedding
            RERANKER_MODEL: Override reranker model
            RERANKER_BACKEND: Override reranker backend
+            RERANKER_USE_GPU: Override reranker GPU usage (true/false)
            RERANKER_ENABLED: Override reranker enabled state (true/false)
            RERANKER_POOL_ENABLED: Enable reranker high availability pool
            RERANKER_STRATEGY: Load balance strategy for reranker
@@ -832,6 +839,11 @@ class Config:
            else:
                log.warning("Invalid RERANKER_BACKEND in .env: %r", reranker_backend)

+        reranker_use_gpu = get_env("RERANKER_USE_GPU")
+        if reranker_use_gpu:
+            self.reranker_use_gpu = _parse_bool(reranker_use_gpu)
+            log.debug("Overriding reranker_use_gpu from .env: %s", self.reranker_use_gpu)
+
        reranker_enabled = get_env("RERANKER_ENABLED")
        if reranker_enabled:
            value = reranker_enabled.lower()
@@ -878,6 +890,25 @@ class Config:
            except ValueError:
                log.warning("Invalid RERANKER_TEST_FILE_PENALTY in .env: %r", test_penalty)

+        ranking_test_penalty = get_env("TEST_FILE_PENALTY")
+        if ranking_test_penalty:
+            try:
+                self.test_file_penalty = float(ranking_test_penalty)
+                log.debug("Overriding test_file_penalty from .env: %s", self.test_file_penalty)
+            except ValueError:
+                log.warning("Invalid TEST_FILE_PENALTY in .env: %r", ranking_test_penalty)
+
+        generated_penalty = get_env("GENERATED_FILE_PENALTY")
+        if generated_penalty:
+            try:
+                self.generated_file_penalty = float(generated_penalty)
+                log.debug(
+                    "Overriding generated_file_penalty from .env: %s",
+                    self.generated_file_penalty,
+                )
+            except ValueError:
+                log.warning("Invalid GENERATED_FILE_PENALTY in .env: %r", generated_penalty)
+
        docstring_weight = get_env("RERANKER_DOCSTRING_WEIGHT")
        if docstring_weight:
            try:
--- a/codex-lens/src/codexlens/env_config.py
+++ b/codex-lens/src/codexlens/env_config.py
@@ -23,6 +23,7 @@ ENV_VARS = {
    # Reranker configuration (overrides settings.json)
    "RERANKER_MODEL": "Reranker model name (overrides settings.json)",
    "RERANKER_BACKEND": "Reranker backend: fastembed, onnx, api, litellm, legacy",
+    "RERANKER_USE_GPU": "Use GPU for local reranker backends: true/false",
    "RERANKER_ENABLED": "Enable reranker: true/false",
    "RERANKER_API_KEY": "API key for reranker service (SiliconFlow/Cohere/Jina)",
    "RERANKER_API_BASE": "Base URL for reranker API (overrides provider default)",
@@ -65,6 +66,9 @@ ENV_VARS = {
    # Chunking configuration
    "CHUNK_STRIP_COMMENTS": "Strip comments from code chunks for embedding: true/false (default: true)",
    "CHUNK_STRIP_DOCSTRINGS": "Strip docstrings from code chunks for embedding: true/false (default: true)",
+    # Search ranking tuning
+    "TEST_FILE_PENALTY": "Penalty for test/fixture paths in final search ranking: 0.0-1.0 (default: 0.15)",
+    "GENERATED_FILE_PENALTY": "Penalty for generated/build artifact paths in final search ranking: 0.0-1.0 (default: 0.35)",
    # Reranker tuning
    "RERANKER_TEST_FILE_PENALTY": "Penalty for test files in reranking: 0.0-1.0 (default: 0.0)",
    "RERANKER_DOCSTRING_WEIGHT": "Weight for docstring chunks in reranking: 0.0-1.0 (default: 1.0)",
--- a/codex-lens/src/codexlens/search/chain_search.py
+++ b/codex-lens/src/codexlens/search/chain_search.py
--- a/codex-lens/src/codexlens/search/hybrid_search.py
+++ b/codex-lens/src/codexlens/search/hybrid_search.py
@@ -7,6 +7,7 @@ results via Reciprocal Rank Fusion (RRF) algorithm.
 from __future__ import annotations

 import logging
+import threading
 import time
 from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError, as_completed
 from contextlib import contextmanager
@@ -34,19 +35,21 @@ from codexlens.config import Config
 from codexlens.config import VECTORS_HNSW_NAME
 from codexlens.entities import SearchResult
 from codexlens.search.ranking import (
-    DEFAULT_WEIGHTS,
+    DEFAULT_WEIGHTS as RANKING_DEFAULT_WEIGHTS,
    QueryIntent,
    apply_symbol_boost,
    cross_encoder_rerank,
    detect_query_intent,
    filter_results_by_category,
    get_rrf_weights,
+    query_prefers_lexical_search,
    reciprocal_rank_fusion,
    rerank_results,
    simple_weighted_fusion,
    tag_search_source,
 )
 from codexlens.storage.dir_index import DirIndexStore
+from codexlens.storage.index_filters import filter_index_paths

 # Optional LSP imports (for real-time graph expansion)
 try:
@@ -67,8 +70,13 @@ class HybridSearchEngine:
        default_weights: Default RRF weights for each source
    """

-    # NOTE: DEFAULT_WEIGHTS imported from ranking.py - single source of truth
-    # FTS + vector hybrid mode (exact: 0.3, fuzzy: 0.1, vector: 0.6)
+    # Public compatibility contract for callers/tests that expect the legacy
+    # three-backend defaults on the engine instance.
+    DEFAULT_WEIGHTS = {
+        "exact": 0.3,
+        "fuzzy": 0.1,
+        "vector": 0.6,
+    }

    def __init__(
        self,
@@ -95,11 +103,172 @@ class HybridSearchEngine:
                f"Did you mean to pass index_path to search() instead of __init__()?"
            )

-        self.weights = weights or DEFAULT_WEIGHTS.copy()
+        self.weights = weights
        self._config = config
        self.embedder = embedder
        self.reranker: Any = None
        self._use_gpu = config.embedding_use_gpu if config else True
+        self._centralized_cache_lock = threading.RLock()
+        self._centralized_model_config_cache: Dict[str, Any] = {}
+        self._centralized_embedder_cache: Dict[tuple[Any, ...], Any] = {}
+        self._centralized_ann_cache: Dict[tuple[str, int], Any] = {}
+        self._centralized_query_embedding_cache: Dict[tuple[Any, ...], Any] = {}
+
+    @property
+    def weights(self) -> Dict[str, float]:
+        """Public/default weights exposed for backwards compatibility."""
+        return dict(self._weights)
+
+    @weights.setter
+    def weights(self, value: Optional[Dict[str, float]]) -> None:
+        """Update public and internal fusion weights together."""
+        if value is None:
+            public_weights = self.DEFAULT_WEIGHTS.copy()
+            fusion_weights = dict(RANKING_DEFAULT_WEIGHTS)
+            fusion_weights.update(public_weights)
+        else:
+            if not isinstance(value, dict):
+                raise TypeError(f"weights must be a dict, got {type(value).__name__}")
+            public_weights = dict(value)
+            fusion_weights = dict(value)
+
+        self._weights = public_weights
+        self._fusion_weights = fusion_weights
+
+    @staticmethod
+    def _clamp_search_score(score: float) -> float:
+        """Keep ANN-derived similarity scores within SearchResult's valid domain."""
+
+        return max(0.0, float(score))
+
+    def _get_centralized_model_config(self, index_root: Path) -> Optional[Dict[str, Any]]:
+        """Load and cache the centralized embedding model config for an index root."""
+        root_key = str(Path(index_root).resolve())
+
+        with self._centralized_cache_lock:
+            if root_key in self._centralized_model_config_cache:
+                cached = self._centralized_model_config_cache[root_key]
+                return dict(cached) if isinstance(cached, dict) else None
+
+        model_config: Optional[Dict[str, Any]] = None
+        try:
+            from codexlens.semantic.vector_store import VectorStore
+
+            central_index_path = Path(root_key) / "_index.db"
+            if central_index_path.exists():
+                with VectorStore(central_index_path) as vs:
+                    loaded = vs.get_model_config()
+                if isinstance(loaded, dict):
+                    model_config = dict(loaded)
+                self.logger.debug(
+                    "Loaded model config from centralized index: %s",
+                    model_config,
+                )
+        except Exception as exc:
+            self.logger.debug(
+                "Failed to load model config from centralized index: %s",
+                exc,
+            )
+
+        with self._centralized_cache_lock:
+            self._centralized_model_config_cache[root_key] = (
+                dict(model_config) if isinstance(model_config, dict) else None
+            )
+
+        return dict(model_config) if isinstance(model_config, dict) else None
+
+    def _get_centralized_embedder(
+        self,
+        model_config: Optional[Dict[str, Any]],
+    ) -> tuple[Any, int, tuple[Any, ...]]:
+        """Resolve and cache the embedder used for centralized vector search."""
+        from codexlens.semantic.factory import get_embedder
+
+        backend = "fastembed"
+        model_name: Optional[str] = None
+        model_profile = "code"
+        use_gpu = bool(self._use_gpu)
+        embedding_dim: Optional[int] = None
+
+        if model_config:
+            backend = str(model_config.get("backend", "fastembed") or "fastembed")
+            model_name = model_config.get("model_name")
+            model_profile = str(model_config.get("model_profile", "code") or "code")
+            raw_dim = model_config.get("embedding_dim")
+            embedding_dim = int(raw_dim) if raw_dim else None
+
+        if backend == "litellm":
+            embedder_key: tuple[Any, ...] = ("litellm", model_name or "", None)
+        else:
+            embedder_key = ("fastembed", model_profile, use_gpu)
+
+        with self._centralized_cache_lock:
+            cached = self._centralized_embedder_cache.get(embedder_key)
+        if cached is None:
+            if backend == "litellm":
+                cached = get_embedder(backend="litellm", model=model_name)
+            else:
+                cached = get_embedder(
+                    backend="fastembed",
+                    profile=model_profile,
+                    use_gpu=use_gpu,
+                )
+            with self._centralized_cache_lock:
+                existing = self._centralized_embedder_cache.get(embedder_key)
+                if existing is None:
+                    self._centralized_embedder_cache[embedder_key] = cached
+                else:
+                    cached = existing
+
+        if embedding_dim is None:
+            embedding_dim = int(getattr(cached, "embedding_dim", 0) or 0)
+
+        return cached, embedding_dim, embedder_key
+
+    def _get_centralized_ann_index(self, index_root: Path, dim: int) -> Any:
+        """Load and cache a centralized ANN index for repeated searches."""
+        from codexlens.semantic.ann_index import ANNIndex
+
+        resolved_root = Path(index_root).resolve()
+        cache_key = (str(resolved_root), int(dim))
+
+        with self._centralized_cache_lock:
+            cached = self._centralized_ann_cache.get(cache_key)
+        if cached is not None:
+            return cached
+
+        ann_index = ANNIndex.create_central(index_root=resolved_root, dim=int(dim))
+        if not ann_index.load():
+            return None
+
+        with self._centralized_cache_lock:
+            existing = self._centralized_ann_cache.get(cache_key)
+            if existing is None:
+                self._centralized_ann_cache[cache_key] = ann_index
+                return ann_index
+            return existing
+
+    def _get_cached_query_embedding(
+        self,
+        query: str,
+        embedder: Any,
+        embedder_key: tuple[Any, ...],
+    ) -> Any:
+        """Cache repeated query embeddings for the same embedder settings."""
+        cache_key = embedder_key + (query,)
+
+        with self._centralized_cache_lock:
+            cached = self._centralized_query_embedding_cache.get(cache_key)
+        if cached is not None:
+            return cached
+
+        query_embedding = embedder.embed_single(query)
+        with self._centralized_cache_lock:
+            existing = self._centralized_query_embedding_cache.get(cache_key)
+            if existing is None:
+                self._centralized_query_embedding_cache[cache_key] = query_embedding
+                return query_embedding
+            return existing

    def search(
        self,
@@ -154,6 +323,7 @@ class HybridSearchEngine:

        # Detect query intent early for category filtering at index level
        query_intent = detect_query_intent(query)
+        lexical_priority_query = query_prefers_lexical_search(query)
        # Map intent to category for vector search:
        # - KEYWORD (code intent) -> filter to 'code' only
        # - SEMANTIC (doc intent) -> no filter (allow docs to surface)
@@ -182,11 +352,11 @@ class HybridSearchEngine:
            backends["exact"] = True
            if enable_fuzzy:
                backends["fuzzy"] = True
-            if enable_vector:
+            if enable_vector and not lexical_priority_query:
                backends["vector"] = True

        # Add LSP graph expansion if requested and available
-        if enable_lsp_graph and HAS_LSP:
+        if enable_lsp_graph and HAS_LSP and not lexical_priority_query:
            backends["lsp_graph"] = True
        elif enable_lsp_graph and not HAS_LSP:
            self.logger.warning(
@@ -214,7 +384,7 @@ class HybridSearchEngine:
        # Filter weights to only active backends
        active_weights = {
            source: weight
-            for source, weight in self.weights.items()
+            for source, weight in self._fusion_weights.items()
            if source in results_map
        }

@@ -247,10 +417,16 @@ class HybridSearchEngine:
            )

        # Optional: embedding-based reranking on top results
-        if self._config is not None and self._config.enable_reranking:
+        if (
+            self._config is not None
+            and self._config.enable_reranking
+            and not lexical_priority_query
+        ):
            with timer("reranking", self.logger):
                if self.embedder is None:
-                    self.embedder = self._get_reranking_embedder()
+                    with self._centralized_cache_lock:
+                        if self.embedder is None:
+                            self.embedder = self._get_reranking_embedder()
                fused_results = rerank_results(
                    query,
                    fused_results[:100],
@@ -267,10 +443,13 @@ class HybridSearchEngine:
            self._config is not None
            and self._config.enable_reranking
            and self._config.enable_cross_encoder_rerank
+            and not lexical_priority_query
        ):
            with timer("cross_encoder_rerank", self.logger):
                if self.reranker is None:
-                    self.reranker = self._get_cross_encoder_reranker()
+                    with self._centralized_cache_lock:
+                        if self.reranker is None:
+                            self.reranker = self._get_cross_encoder_reranker()
                if self.reranker is not None:
                    fused_results = cross_encoder_rerank(
                        query,
@@ -363,11 +542,18 @@ class HybridSearchEngine:

            device: str | None = None
            kwargs: dict[str, Any] = {}
+            reranker_use_gpu = bool(
+                getattr(
+                    self._config,
+                    "reranker_use_gpu",
+                    getattr(self._config, "embedding_use_gpu", True),
+                )
+            )

            if backend == "onnx":
-                kwargs["use_gpu"] = bool(getattr(self._config, "embedding_use_gpu", True))
+                kwargs["use_gpu"] = reranker_use_gpu
            elif backend == "legacy":
-                if not bool(getattr(self._config, "embedding_use_gpu", True)):
+                if not reranker_use_gpu:
                    device = "cpu"
            elif backend == "api":
                # Pass max_input_tokens for adaptive batching
@@ -573,60 +759,16 @@ class HybridSearchEngine:
            List of SearchResult objects ordered by semantic similarity
        """
        try:
-            import sqlite3
-            import json
-            from codexlens.semantic.factory import get_embedder
-            from codexlens.semantic.ann_index import ANNIndex
-
-            # Get model config from the first index database we can find
-            # (all indexes should use the same embedding model)
            index_root = hnsw_path.parent
-            model_config = None
-
-            # Try to get model config from the centralized index root first
-            # (not the sub-directory index_path, which may have outdated config)
-            try:
-                from codexlens.semantic.vector_store import VectorStore
-                central_index_path = index_root / "_index.db"
-                if central_index_path.exists():
-                    with VectorStore(central_index_path) as vs:
-                        model_config = vs.get_model_config()
-                    self.logger.debug(
-                        "Loaded model config from centralized index: %s",
-                        model_config
-                    )
-            except Exception as e:
-                self.logger.debug("Failed to load model config from centralized index: %s", e)
-
-            # Detect dimension from HNSW file if model config not found
+            model_config = self._get_centralized_model_config(index_root)
            if model_config is None:
-                self.logger.debug("Model config not found, will detect from HNSW index")
-                # Create a temporary ANNIndex to load and detect dimension
-                # We need to know the dimension to properly load the index
-
-            # Get embedder based on model config or default
-            if model_config:
-                backend = model_config.get("backend", "fastembed")
-                model_name = model_config["model_name"]
-                model_profile = model_config["model_profile"]
-                embedding_dim = model_config["embedding_dim"]
-
-                if backend == "litellm":
-                    embedder = get_embedder(backend="litellm", model=model_name)
-                else:
-                    embedder = get_embedder(backend="fastembed", profile=model_profile)
-            else:
-                # Default to code profile
-                embedder = get_embedder(backend="fastembed", profile="code")
-                embedding_dim = embedder.embedding_dim
+                self.logger.debug("Model config not found, will detect from cached embedder")
+            embedder, embedding_dim, embedder_key = self._get_centralized_embedder(model_config)

            # Load centralized ANN index
            start_load = time.perf_counter()
-            ann_index = ANNIndex.create_central(
-                index_root=index_root,
-                dim=embedding_dim,
-            )
-            if not ann_index.load():
+            ann_index = self._get_centralized_ann_index(index_root=index_root, dim=embedding_dim)
+            if ann_index is None:
                self.logger.warning("Failed to load centralized vector index from %s", hnsw_path)
                return []
            self.logger.debug(
@@ -637,7 +779,7 @@ class HybridSearchEngine:

            # Generate query embedding
            start_embed = time.perf_counter()
-            query_embedding = embedder.embed_single(query)
+            query_embedding = self._get_cached_query_embedding(query, embedder, embedder_key)
            self.logger.debug(
                "[TIMING] query_embedding: %.2fms",
                (time.perf_counter() - start_embed) * 1000
@@ -658,7 +800,7 @@ class HybridSearchEngine:
                return []

            # Convert distances to similarity scores (for cosine: score = 1 - distance)
-            scores = [1.0 - d for d in distances]
+            scores = [self._clamp_search_score(1.0 - d) for d in distances]

            # Fetch chunk metadata from semantic_chunks tables
            # We need to search across all _index.db files in the project
@@ -755,7 +897,7 @@ class HybridSearchEngine:
                start_line = row.get("start_line")
                end_line = row.get("end_line")

-                score = score_map.get(chunk_id, 0.0)
+                score = self._clamp_search_score(score_map.get(chunk_id, 0.0))

                # Build excerpt
                excerpt = content[:200] + "..." if len(content) > 200 else content
@@ -818,7 +960,7 @@ class HybridSearchEngine:
        import json

        # Find all _index.db files
-        index_files = list(index_root.rglob("_index.db"))
+        index_files = filter_index_paths(index_root.rglob("_index.db"), index_root)

        results = []
        found_ids = set()
@@ -870,7 +1012,7 @@ class HybridSearchEngine:
                        metadata_json = row["metadata"]
                        metadata = json.loads(metadata_json) if metadata_json else {}

-                        score = score_map.get(chunk_id, 0.0)
+                        score = self._clamp_search_score(score_map.get(chunk_id, 0.0))

                        # Build excerpt
                        excerpt = content[:200] + "..." if len(content) > 200 else content
--- a/codex-lens/src/codexlens/search/ranking.py
+++ b/codex-lens/src/codexlens/search/ranking.py
@@ -6,6 +6,7 @@ for combining results from heterogeneous search backends (exact FTS, fuzzy FTS,

 from __future__ import annotations

+import logging
 import re
 import math
 from enum import Enum
@@ -14,6 +15,8 @@ from typing import Any, Dict, List, Optional

 from codexlens.entities import SearchResult, AdditionalLocation

+logger = logging.getLogger(__name__)
+

 # Default RRF weights for hybrid search
 DEFAULT_WEIGHTS = {
@@ -32,6 +35,229 @@ class QueryIntent(str, Enum):
    MIXED = "mixed"


+_TEST_QUERY_RE = re.compile(
+    r"\b(test|tests|spec|specs|fixture|fixtures|benchmark|benchmarks)\b",
+    flags=re.IGNORECASE,
+)
+_AUXILIARY_QUERY_RE = re.compile(
+    r"\b(example|examples|demo|demos|sample|samples|debug|benchmark|benchmarks|profile|profiling)\b",
+    flags=re.IGNORECASE,
+)
+_ARTIFACT_QUERY_RE = re.compile(
+    r"(?<!\w)(dist|build|out|coverage|htmlcov|generated|bundle|compiled|artifact|artifacts|\.workflow)(?!\w)",
+    flags=re.IGNORECASE,
+)
+_ENV_STYLE_QUERY_RE = re.compile(r"\b[A-Z][A-Z0-9]+(?:_[A-Z0-9]+)+\b")
+_AUXILIARY_DIR_NAMES = frozenset(
+    {
+        "example",
+        "examples",
+        "demo",
+        "demos",
+        "sample",
+        "samples",
+        "benchmark",
+        "benchmarks",
+        "profile",
+        "profiles",
+    }
+)
+_GENERATED_DIR_NAMES = frozenset(
+    {
+        "dist",
+        "build",
+        "out",
+        "coverage",
+        "htmlcov",
+        ".cache",
+        ".workflow",
+        ".next",
+        ".nuxt",
+        ".parcel-cache",
+        ".turbo",
+        "tmp",
+        "temp",
+        "generated",
+    }
+)
+_GENERATED_FILE_SUFFIXES = (
+    ".generated.ts",
+    ".generated.tsx",
+    ".generated.js",
+    ".generated.jsx",
+    ".generated.py",
+    ".gen.ts",
+    ".gen.tsx",
+    ".gen.js",
+    ".gen.jsx",
+    ".min.js",
+    ".min.css",
+    ".bundle.js",
+    ".bundle.css",
+)
+_SOURCE_DIR_NAMES = frozenset(
+    {
+        "src",
+        "lib",
+        "core",
+        "app",
+        "server",
+        "client",
+        "services",
+    }
+)
+_IDENTIFIER_QUERY_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
+_TOPIC_TOKEN_RE = re.compile(r"[A-Za-z][A-Za-z0-9]*")
+_EXPLICIT_PATH_HINT_MARKER_RE = re.compile(r"[_\-/\\.]")
+_SEMANTIC_QUERY_STOPWORDS = frozenset(
+    {
+        "the",
+        "a",
+        "an",
+        "is",
+        "are",
+        "was",
+        "were",
+        "be",
+        "been",
+        "being",
+        "have",
+        "has",
+        "had",
+        "do",
+        "does",
+        "did",
+        "will",
+        "would",
+        "could",
+        "should",
+        "may",
+        "might",
+        "must",
+        "can",
+        "to",
+        "of",
+        "in",
+        "for",
+        "on",
+        "with",
+        "at",
+        "by",
+        "from",
+        "as",
+        "into",
+        "through",
+        "and",
+        "but",
+        "if",
+        "or",
+        "not",
+        "this",
+        "that",
+        "these",
+        "those",
+        "it",
+        "its",
+        "how",
+        "what",
+        "where",
+        "when",
+        "why",
+        "which",
+        "who",
+        "whom",
+    }
+)
+_PATH_TOPIC_STOPWORDS = frozenset(
+    {
+        *_SOURCE_DIR_NAMES,
+        *_AUXILIARY_DIR_NAMES,
+        *_GENERATED_DIR_NAMES,
+        "tool",
+        "tools",
+        "util",
+        "utils",
+        "test",
+        "tests",
+        "spec",
+        "specs",
+        "fixture",
+        "fixtures",
+        "index",
+        "main",
+        "ts",
+        "tsx",
+        "js",
+        "jsx",
+        "mjs",
+        "cjs",
+        "py",
+        "java",
+        "go",
+        "rs",
+        "rb",
+        "php",
+        "cs",
+        "cpp",
+        "cc",
+        "c",
+        "h",
+    }
+)
+_LEXICAL_PRIORITY_SURFACE_TOKENS = frozenset(
+    {
+        "config",
+        "configs",
+        "configuration",
+        "configurations",
+        "setting",
+        "settings",
+        "backend",
+        "backends",
+        "environment",
+        "env",
+        "variable",
+        "variables",
+        "factory",
+        "factories",
+        "override",
+        "overrides",
+        "option",
+        "options",
+        "flag",
+        "flags",
+        "mode",
+        "modes",
+    }
+)
+_LEXICAL_PRIORITY_FOCUS_TOKENS = frozenset(
+    {
+        "embedding",
+        "embeddings",
+        "reranker",
+        "rerankers",
+        "onnx",
+        "api",
+        "litellm",
+        "fastembed",
+        "local",
+        "legacy",
+        "stage",
+        "stage2",
+        "stage3",
+        "stage4",
+        "precomputed",
+        "realtime",
+        "static",
+        "global",
+        "graph",
+        "selection",
+        "model",
+        "models",
+    }
+)
+
+
 def normalize_weights(weights: Dict[str, float | None]) -> Dict[str, float | None]:
    """Normalize weights to sum to 1.0 (best-effort)."""
    total = sum(float(v) for v in weights.values() if v is not None)
@@ -66,6 +292,7 @@ def detect_query_intent(query: str) -> QueryIntent:
    has_code_signals = bool(
        re.search(r"(::|->|\.)", trimmed)
        or re.search(r"[A-Z][a-z]+[A-Z]", trimmed)
+        or re.search(r"\b[a-z]+[A-Z][A-Za-z0-9_]*\b", trimmed)
        or re.search(r"\b\w+_\w+\b", trimmed)
        or re.search(
            r"\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b",
@@ -119,6 +346,56 @@ def get_rrf_weights(
    return adjust_weights_by_intent(detect_query_intent(query), base_weights)


+def query_targets_test_files(query: str) -> bool:
+    """Return True when the query explicitly targets tests/spec fixtures."""
+    return bool(_TEST_QUERY_RE.search((query or "").strip()))
+
+
+def query_targets_generated_files(query: str) -> bool:
+    """Return True when the query explicitly targets generated/build artifacts."""
+    return bool(_ARTIFACT_QUERY_RE.search((query or "").strip()))
+
+
+def query_targets_auxiliary_files(query: str) -> bool:
+    """Return True when the query explicitly targets examples, benchmarks, or debug files."""
+    return bool(_AUXILIARY_QUERY_RE.search((query or "").strip()))
+
+
+def query_prefers_lexical_search(query: str) -> bool:
+    """Return True when config/env/factory style queries are safer with lexical-first search."""
+    trimmed = (query or "").strip()
+    if not trimmed:
+        return False
+
+    if _ENV_STYLE_QUERY_RE.search(trimmed):
+        return True
+
+    query_tokens = set(_semantic_query_topic_tokens(trimmed))
+    if not query_tokens:
+        return False
+
+    if query_tokens.intersection({"factory", "factories"}):
+        return True
+
+    if query_tokens.intersection({"environment", "env"}) and query_tokens.intersection({"variable", "variables"}):
+        return True
+
+    if "backend" in query_tokens and query_tokens.intersection(
+        {"embedding", "embeddings", "reranker", "rerankers", "onnx", "api", "litellm", "fastembed", "local", "legacy"}
+    ):
+        return True
+
+    surface_hits = query_tokens.intersection(_LEXICAL_PRIORITY_SURFACE_TOKENS)
+    focus_hits = query_tokens.intersection(_LEXICAL_PRIORITY_FOCUS_TOKENS)
+    return bool(surface_hits and focus_hits)
+
+
+def _normalized_path_parts(path: str) -> List[str]:
+    """Normalize a path string into casefolded components for heuristics."""
+    normalized = (path or "").replace("\\", "/")
+    return [part.casefold() for part in normalized.split("/") if part and part != "."]
+
+
 # File extensions to category mapping for fast lookup
 _EXT_TO_CATEGORY: Dict[str, str] = {
    # Code extensions
@@ -196,6 +473,482 @@ def filter_results_by_category(
    return filtered


+def is_test_file(path: str) -> bool:
+    """Return True when a path clearly refers to a test/spec file."""
+    parts = _normalized_path_parts(path)
+    if not parts:
+        return False
+    basename = parts[-1]
+    return (
+        basename.startswith("test_")
+        or basename.endswith("_test.py")
+        or basename.endswith(".test.ts")
+        or basename.endswith(".test.tsx")
+        or basename.endswith(".test.js")
+        or basename.endswith(".test.jsx")
+        or basename.endswith(".spec.ts")
+        or basename.endswith(".spec.tsx")
+        or basename.endswith(".spec.js")
+        or basename.endswith(".spec.jsx")
+        or "tests" in parts[:-1]
+        or "test" in parts[:-1]
+        or "__fixtures__" in parts[:-1]
+        or "fixtures" in parts[:-1]
+    )
+
+
+def is_generated_artifact_path(path: str) -> bool:
+    """Return True when a path clearly points at generated/build artifacts."""
+    parts = _normalized_path_parts(path)
+    if not parts:
+        return False
+    basename = parts[-1]
+    return any(part in _GENERATED_DIR_NAMES for part in parts[:-1]) or basename.endswith(
+        _GENERATED_FILE_SUFFIXES
+    )
+
+
+def is_auxiliary_reference_path(path: str) -> bool:
+    """Return True for examples, benchmarks, demos, and debug helper files."""
+    parts = _normalized_path_parts(path)
+    if not parts:
+        return False
+    basename = parts[-1]
+    if any(part in _AUXILIARY_DIR_NAMES for part in parts[:-1]):
+        return True
+    return (
+        basename.startswith("debug_")
+        or basename.startswith("benchmark")
+        or basename.startswith("profile_")
+        or "_benchmark" in basename
+        or "_profile" in basename
+    )
+
+
+def _extract_identifier_query(query: str) -> Optional[str]:
+    """Return a single-token identifier query when definition boosting is safe."""
+    trimmed = (query or "").strip()
+    if not trimmed or " " in trimmed:
+        return None
+    if not _IDENTIFIER_QUERY_RE.fullmatch(trimmed):
+        return None
+    return trimmed
+
+
+def extract_explicit_path_hints(query: str) -> List[List[str]]:
+    """Extract explicit path/file hints from separator-style query tokens.
+
+    Natural-language queries often contain one or two high-signal feature/file
+    hints such as ``smart_search`` or ``smart-search.ts`` alongside broader
+    platform words like ``CodexLens``. These hints should be treated as more
+    specific than the surrounding prose.
+    """
+    hints: List[List[str]] = []
+    seen: set[tuple[str, ...]] = set()
+    for raw_part in re.split(r"\s+", query or ""):
+        candidate = raw_part.strip().strip("\"'`()[]{}<>:,;")
+        if not candidate or not _EXPLICIT_PATH_HINT_MARKER_RE.search(candidate):
+            continue
+        tokens = [
+            token
+            for token in _split_identifier_like_tokens(candidate)
+            if token not in _PATH_TOPIC_STOPWORDS
+        ]
+        if len(tokens) < 2:
+            continue
+        key = tuple(tokens)
+        if key in seen:
+            continue
+        seen.add(key)
+        hints.append(list(key))
+    return hints
+
+
+def _is_source_implementation_path(path: str) -> bool:
+    """Return True when a path looks like an implementation file under a source dir."""
+    parts = _normalized_path_parts(path)
+    if not parts:
+        return False
+    return any(part in _SOURCE_DIR_NAMES for part in parts[:-1])
+
+
+def _result_text_candidates(result: SearchResult) -> List[str]:
+    """Collect short text snippets that may contain a symbol definition."""
+    candidates: List[str] = []
+    for text in (result.excerpt, result.content):
+        if not isinstance(text, str) or not text.strip():
+            continue
+        for line in text.splitlines():
+            stripped = line.strip()
+            if stripped:
+                candidates.append(stripped)
+            if len(candidates) >= 6:
+                break
+        if len(candidates) >= 6:
+            break
+
+    symbol_name = result.symbol_name
+    if not symbol_name and result.symbol is not None:
+        symbol_name = getattr(result.symbol, "name", None)
+    if isinstance(symbol_name, str) and symbol_name.strip():
+        candidates.append(symbol_name.strip())
+    return candidates
+
+
+def _result_defines_identifier(result: SearchResult, symbol: str) -> bool:
+    """Best-effort check for whether a result snippet looks like a symbol definition."""
+    escaped_symbol = re.escape(symbol)
+    definition_patterns = (
+        rf"^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?def\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:default\s+)?class\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:default\s+)?interface\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:default\s+)?type\s+{escaped_symbol}\b",
+        rf"^\s*(?:export\s+)?(?:default\s+)?(?:const|let|var)\s+{escaped_symbol}\b",
+        rf"^\s*{escaped_symbol}\s*=\s*(?:async\s+)?\(",
+        rf"^\s*{escaped_symbol}\s*=\s*(?:async\s+)?[^=]*=>",
+    )
+    for candidate in _result_text_candidates(result):
+        if any(re.search(pattern, candidate) for pattern in definition_patterns):
+            return True
+    return False
+
+
+def _split_identifier_like_tokens(text: str) -> List[str]:
+    """Split identifier-like text into normalized word tokens."""
+    if not text:
+        return []
+
+    tokens: List[str] = []
+    for raw_token in _TOPIC_TOKEN_RE.findall(text):
+        expanded = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", raw_token)
+        expanded = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", expanded)
+        for token in expanded.split():
+            normalized = _normalize_topic_token(token)
+            if normalized:
+                tokens.append(normalized)
+    return tokens
+
+
+def _normalize_topic_token(token: str) -> Optional[str]:
+    """Normalize lightweight topic tokens for query/path overlap heuristics."""
+    normalized = (token or "").casefold()
+    if len(normalized) < 2 or normalized.isdigit():
+        return None
+    if len(normalized) > 4 and normalized.endswith("ies"):
+        normalized = f"{normalized[:-3]}y"
+    elif len(normalized) > 3 and normalized.endswith("s") and not normalized.endswith("ss"):
+        normalized = normalized[:-1]
+    return normalized or None
+
+
+def _dedupe_preserve_order(tokens: List[str]) -> List[str]:
+    """Deduplicate tokens while preserving the first-seen order."""
+    deduped: List[str] = []
+    seen: set[str] = set()
+    for token in tokens:
+        if token in seen:
+            continue
+        seen.add(token)
+        deduped.append(token)
+    return deduped
+
+
+def _semantic_query_topic_tokens(query: str) -> List[str]:
+    """Extract salient natural-language tokens for lightweight topic matching."""
+    tokens = [
+        token
+        for token in _split_identifier_like_tokens(query)
+        if token not in _SEMANTIC_QUERY_STOPWORDS
+    ]
+    return _dedupe_preserve_order(tokens)
+
+
+def _path_topic_tokens(path: str) -> tuple[List[str], List[str]]:
+    """Extract normalized topic tokens from a path and its basename."""
+    parts = _normalized_path_parts(path)
+    if not parts:
+        return [], []
+
+    path_tokens: List[str] = []
+    basename_tokens: List[str] = []
+    last_index = len(parts) - 1
+    for index, part in enumerate(parts):
+        target = basename_tokens if index == last_index else path_tokens
+        for token in _split_identifier_like_tokens(part):
+            if token in _PATH_TOPIC_STOPWORDS:
+                continue
+            target.append(token)
+    return _dedupe_preserve_order(path_tokens), _dedupe_preserve_order(basename_tokens)
+
+
+def _source_path_topic_boost(
+    query: str,
+    path: str,
+    query_intent: QueryIntent,
+) -> tuple[float, List[str]]:
+    """Return a path/topic boost when a query strongly overlaps a source path."""
+    query_tokens = _semantic_query_topic_tokens(query)
+    if len(query_tokens) < 2:
+        return 1.0, []
+
+    path_tokens, basename_tokens = _path_topic_tokens(path)
+    if not path_tokens and not basename_tokens:
+        return 1.0, []
+
+    path_token_set = set(path_tokens) | set(basename_tokens)
+    basename_overlap = [token for token in query_tokens if token in basename_tokens]
+    all_overlap = [token for token in query_tokens if token in path_token_set]
+    explicit_hint_tokens = extract_explicit_path_hints(query)
+
+    for hint_tokens in explicit_hint_tokens:
+        if basename_tokens == hint_tokens:
+            if query_intent == QueryIntent.KEYWORD:
+                return 4.5, hint_tokens[:3]
+            return 2.4, hint_tokens[:3]
+        if all(token in basename_tokens for token in hint_tokens):
+            if query_intent == QueryIntent.KEYWORD:
+                return 4.5, hint_tokens[:3]
+            return 1.6, hint_tokens[:3]
+
+    if query_prefers_lexical_search(query):
+        lexical_surface_overlap = [
+            token for token in basename_tokens if token in query_tokens and token in _LEXICAL_PRIORITY_SURFACE_TOKENS
+        ]
+        if lexical_surface_overlap:
+            lexical_overlap = lexical_surface_overlap[:3]
+            if query_intent == QueryIntent.KEYWORD:
+                return 5.5, lexical_overlap
+            return 5.0, lexical_overlap
+
+    if query_intent == QueryIntent.KEYWORD:
+        if len(basename_overlap) >= 2:
+            # Multi-token identifier-style queries often name the feature/file directly.
+            # Give basename matches a stronger lift so they can survive workspace fan-out.
+            multiplier = min(4.5, 2.0 + 1.25 * float(len(basename_overlap)))
+            return multiplier, basename_overlap[:3]
+        if len(all_overlap) >= 3:
+            multiplier = min(2.0, 1.1 + 0.2 * len(all_overlap))
+            return multiplier, all_overlap[:3]
+        return 1.0, []
+
+    if len(basename_overlap) >= 2:
+        multiplier = min(1.45, 1.15 + 0.1 * len(basename_overlap))
+        return multiplier, basename_overlap[:3]
+    if len(all_overlap) >= 3:
+        multiplier = min(1.3, 1.05 + 0.05 * len(all_overlap))
+        return multiplier, all_overlap[:3]
+    return 1.0, []
+
+
+def apply_path_penalties(
+    results: List[SearchResult],
+    query: str,
+    *,
+    test_file_penalty: float = 0.15,
+    generated_file_penalty: float = 0.35,
+) -> List[SearchResult]:
+    """Apply lightweight path-based penalties to reduce noisy rankings."""
+    if not results or (test_file_penalty <= 0 and generated_file_penalty <= 0):
+        return results
+
+    query_intent = detect_query_intent(query)
+    skip_test_penalty = query_targets_test_files(query)
+    skip_auxiliary_penalty = query_targets_auxiliary_files(query)
+    skip_generated_penalty = query_targets_generated_files(query)
+    query_topic_tokens = _semantic_query_topic_tokens(query)
+    keyword_path_query = query_intent == QueryIntent.KEYWORD and len(query_topic_tokens) >= 2
+    explicit_feature_query = bool(extract_explicit_path_hints(query))
+    source_oriented_query = (
+        explicit_feature_query
+        or keyword_path_query
+        or (
+            query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED}
+            and len(query_topic_tokens) >= 2
+        )
+    )
+    identifier_query = None
+    if query_intent == QueryIntent.KEYWORD:
+        identifier_query = _extract_identifier_query(query)
+    effective_test_penalty = float(test_file_penalty)
+    if effective_test_penalty > 0 and not skip_test_penalty:
+        if query_intent == QueryIntent.KEYWORD:
+            # Identifier-style queries should prefer implementation files over test references.
+            effective_test_penalty = max(effective_test_penalty, 0.35)
+        elif query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED}:
+            # Natural-language code queries should still prefer implementation files over references.
+            effective_test_penalty = max(effective_test_penalty, 0.25)
+        if explicit_feature_query:
+            # Explicit feature/file hints should be even more biased toward source implementations.
+            effective_test_penalty = max(effective_test_penalty, 0.45)
+    effective_auxiliary_penalty = effective_test_penalty
+    if effective_auxiliary_penalty > 0 and not skip_auxiliary_penalty and explicit_feature_query:
+        # Examples/benchmarks are usually descriptive noise for feature-targeted implementation queries.
+        effective_auxiliary_penalty = max(effective_auxiliary_penalty, 0.5)
+    effective_generated_penalty = float(generated_file_penalty)
+    if effective_generated_penalty > 0 and not skip_generated_penalty:
+        if source_oriented_query:
+            effective_generated_penalty = max(effective_generated_penalty, 0.45)
+        if explicit_feature_query:
+            effective_generated_penalty = max(effective_generated_penalty, 0.6)
+
+    penalized: List[SearchResult] = []
+    for result in results:
+        multiplier = 1.0
+        penalty_multiplier = 1.0
+        boost_multiplier = 1.0
+        penalty_reasons: List[str] = []
+        boost_reasons: List[str] = []
+
+        if effective_test_penalty > 0 and not skip_test_penalty and is_test_file(result.path):
+            penalty_multiplier *= max(0.0, 1.0 - effective_test_penalty)
+            penalty_reasons.append("test_file")
+
+        if (
+            effective_auxiliary_penalty > 0
+            and not skip_auxiliary_penalty
+            and not is_test_file(result.path)
+            and is_auxiliary_reference_path(result.path)
+        ):
+            penalty_multiplier *= max(0.0, 1.0 - effective_auxiliary_penalty)
+            penalty_reasons.append("auxiliary_file")
+
+        if (
+            effective_generated_penalty > 0
+            and not skip_generated_penalty
+            and is_generated_artifact_path(result.path)
+        ):
+            penalty_multiplier *= max(0.0, 1.0 - effective_generated_penalty)
+            penalty_reasons.append("generated_artifact")
+
+        if (
+            identifier_query
+            and not is_test_file(result.path)
+            and not is_generated_artifact_path(result.path)
+            and _result_defines_identifier(result, identifier_query)
+        ):
+            if _is_source_implementation_path(result.path):
+                boost_multiplier *= 2.0
+                boost_reasons.append("source_definition")
+            else:
+                boost_multiplier *= 1.35
+                boost_reasons.append("symbol_definition")
+
+        if (
+            (query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED} or keyword_path_query)
+            and not skip_test_penalty
+            and not skip_auxiliary_penalty
+            and not skip_generated_penalty
+            and not is_test_file(result.path)
+            and not is_generated_artifact_path(result.path)
+            and not is_auxiliary_reference_path(result.path)
+            and _is_source_implementation_path(result.path)
+        ):
+                semantic_path_boost, overlap_tokens = _source_path_topic_boost(
+                    query,
+                    result.path,
+                    query_intent,
+                )
+                if semantic_path_boost > 1.0:
+                    boost_multiplier *= semantic_path_boost
+                    boost_reasons.append("source_path_topic_overlap")
+
+        multiplier = penalty_multiplier * boost_multiplier
+        if penalty_reasons or boost_reasons:
+            metadata = {
+                **result.metadata,
+                "path_rank_multiplier": multiplier,
+            }
+            if penalty_reasons:
+                metadata["path_penalty_reasons"] = penalty_reasons
+                metadata["path_penalty_multiplier"] = penalty_multiplier
+            if boost_reasons:
+                metadata["path_boost_reasons"] = boost_reasons
+                metadata["path_boost_multiplier"] = boost_multiplier
+            if "source_path_topic_overlap" in boost_reasons and overlap_tokens:
+                metadata["path_boost_overlap_tokens"] = overlap_tokens
+            penalized.append(
+                result.model_copy(
+                    deep=True,
+                    update={
+                        "score": max(0.0, float(result.score) * multiplier),
+                        "metadata": metadata,
+                    },
+                )
+            )
+        else:
+            penalized.append(result)
+
+    penalized.sort(key=lambda r: r.score, reverse=True)
+    return penalized
+
+
+def rebalance_noisy_results(
+    results: List[SearchResult],
+    query: str,
+) -> List[SearchResult]:
+    """Move noisy test/generated/auxiliary results behind implementation hits when safe."""
+    if not results:
+        return []
+
+    query_intent = detect_query_intent(query)
+    skip_test_penalty = query_targets_test_files(query)
+    skip_auxiliary_penalty = query_targets_auxiliary_files(query)
+    skip_generated_penalty = query_targets_generated_files(query)
+    query_topic_tokens = _semantic_query_topic_tokens(query)
+    keyword_path_query = query_intent == QueryIntent.KEYWORD and len(query_topic_tokens) >= 2
+    explicit_feature_query = bool(extract_explicit_path_hints(query))
+    source_oriented_query = (
+        explicit_feature_query
+        or keyword_path_query
+        or (
+            query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED}
+            and len(query_topic_tokens) >= 2
+        )
+    )
+    if not source_oriented_query:
+        return results
+
+    max_generated_results = len(results) if skip_generated_penalty else 0
+    max_test_results = len(results) if skip_test_penalty else (0 if explicit_feature_query else 1)
+    max_auxiliary_results = len(results) if skip_auxiliary_penalty else (0 if explicit_feature_query else 1)
+
+    selected: List[SearchResult] = []
+    deferred: List[SearchResult] = []
+    generated_count = 0
+    test_count = 0
+    auxiliary_count = 0
+
+    for result in results:
+        if not skip_generated_penalty and is_generated_artifact_path(result.path):
+            if generated_count >= max_generated_results:
+                deferred.append(result)
+                continue
+            generated_count += 1
+            selected.append(result)
+            continue
+
+        if not skip_test_penalty and is_test_file(result.path):
+            if test_count >= max_test_results:
+                deferred.append(result)
+                continue
+            test_count += 1
+            selected.append(result)
+            continue
+
+        if not skip_auxiliary_penalty and is_auxiliary_reference_path(result.path):
+            if auxiliary_count >= max_auxiliary_results:
+                deferred.append(result)
+                continue
+            auxiliary_count += 1
+            selected.append(result)
+            continue
+
+        selected.append(result)
+
+    return selected + deferred
+
+
 def simple_weighted_fusion(
    results_map: Dict[str, List[SearchResult]],
    weights: Dict[str, float] = None,
@@ -633,10 +1386,16 @@ def cross_encoder_rerank(
            raw_scores = reranker.predict(pairs, batch_size=int(batch_size))
        else:
            return results
-    except Exception:
+    except Exception as exc:
+        logger.debug("Cross-encoder rerank failed; returning original ranking: %s", exc)
        return results

    if not raw_scores or len(raw_scores) != rerank_count:
+        logger.debug(
+            "Cross-encoder rerank returned %d scores for %d candidates; returning original ranking",
+            len(raw_scores) if raw_scores else 0,
+            rerank_count,
+        )
        return results

    scores = [float(s) for s in raw_scores]
@@ -653,26 +1412,13 @@ def cross_encoder_rerank(
    else:
        probs = [sigmoid(s) for s in scores]

+    query_intent = detect_query_intent(query)
+    skip_test_penalty = query_targets_test_files(query)
+    skip_auxiliary_penalty = query_targets_auxiliary_files(query)
+    skip_generated_penalty = query_targets_generated_files(query)
+    keyword_path_query = query_intent == QueryIntent.KEYWORD and len(_semantic_query_topic_tokens(query)) >= 2
    reranked_results: List[SearchResult] = []

-    # Helper to detect test files
-    def is_test_file(path: str) -> bool:
-        if not path:
-            return False
-        basename = path.split("/")[-1].split("\\")[-1]
-        return (
-            basename.startswith("test_") or
-            basename.endswith("_test.py") or
-            basename.endswith(".test.ts") or
-            basename.endswith(".test.js") or
-            basename.endswith(".spec.ts") or
-            basename.endswith(".spec.js") or
-            "/tests/" in path or
-            "\\tests\\" in path or
-            "/test/" in path or
-            "\\test\\" in path
-        )
-
    for idx, result in enumerate(results):
        if idx < rerank_count:
            prev_score = float(result.score)
@@ -699,6 +1445,52 @@ def cross_encoder_rerank(
            if test_file_penalty > 0 and is_test_file(result.path):
                combined_score = combined_score * (1.0 - test_file_penalty)

+            cross_encoder_floor_reason = None
+            cross_encoder_floor_score = None
+            cross_encoder_floor_overlap_tokens: List[str] = []
+            if (
+                (query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED} or keyword_path_query)
+                and not skip_test_penalty
+                and not skip_auxiliary_penalty
+                and not skip_generated_penalty
+                and not is_test_file(result.path)
+                and not is_generated_artifact_path(result.path)
+                and not is_auxiliary_reference_path(result.path)
+                and _is_source_implementation_path(result.path)
+            ):
+                semantic_path_boost, overlap_tokens = _source_path_topic_boost(
+                    query,
+                    result.path,
+                    query_intent,
+                )
+                if semantic_path_boost > 1.0:
+                    floor_ratio = 0.8 if semantic_path_boost >= 1.35 else 0.75
+                    candidate_floor = prev_score * floor_ratio
+                    if candidate_floor > combined_score:
+                        combined_score = candidate_floor
+                        cross_encoder_floor_reason = (
+                            "keyword_source_path_overlap"
+                            if query_intent == QueryIntent.KEYWORD
+                            else "semantic_source_path_overlap"
+                        )
+                        cross_encoder_floor_score = candidate_floor
+                        cross_encoder_floor_overlap_tokens = overlap_tokens
+
+            metadata = {
+                **result.metadata,
+                "pre_cross_encoder_score": prev_score,
+                "cross_encoder_score": ce_score,
+                "cross_encoder_prob": ce_prob,
+                "cross_encoder_reranked": True,
+            }
+            if cross_encoder_floor_reason is not None:
+                metadata["cross_encoder_floor_reason"] = cross_encoder_floor_reason
+                metadata["cross_encoder_floor_score"] = cross_encoder_floor_score
+                if cross_encoder_floor_overlap_tokens:
+                    metadata["cross_encoder_floor_overlap_tokens"] = (
+                        cross_encoder_floor_overlap_tokens
+                    )
+
            reranked_results.append(
                SearchResult(
                    path=result.path,
@@ -707,13 +1499,7 @@ def cross_encoder_rerank(
                    content=result.content,
                    symbol=result.symbol,
                    chunk=result.chunk,
-                    metadata={
-                        **result.metadata,
-                        "pre_cross_encoder_score": prev_score,
-                        "cross_encoder_score": ce_score,
-                        "cross_encoder_prob": ce_prob,
-                        "cross_encoder_reranked": True,
-                    },
+                    metadata=metadata,
                    start_line=result.start_line,
                    end_line=result.end_line,
                    symbol_name=result.symbol_name,
--- a/codex-lens/src/codexlens/semantic/ann_index.py
+++ b/codex-lens/src/codexlens/semantic/ann_index.py
@@ -383,8 +383,37 @@ class ANNIndex:
                if self._index is None or self._current_count == 0:
                    return [], []  # Empty index

-                # Perform kNN search
-                labels, distances = self._index.knn_query(query, k=top_k)
+                effective_k = min(max(int(top_k), 0), self._current_count)
+                if effective_k == 0:
+                    return [], []
+
+                try:
+                    self._index.set_ef(max(self.ef, effective_k))
+                except Exception:
+                    pass
+
+                while True:
+                    try:
+                        labels, distances = self._index.knn_query(query, k=effective_k)
+                        break
+                    except Exception as exc:
+                        if "contiguous 2D array" in str(exc) and effective_k > 1:
+                            next_k = max(1, effective_k // 2)
+                            logger.debug(
+                                "ANN search knn_query failed for k=%d; retrying with k=%d: %s",
+                                effective_k,
+                                next_k,
+                                exc,
+                            )
+                            if next_k == effective_k:
+                                raise
+                            effective_k = next_k
+                            try:
+                                self._index.set_ef(max(self.ef, effective_k))
+                            except Exception:
+                                pass
+                            continue
+                        raise

                # Convert to lists and flatten (knn_query returns 2D arrays)
                ids = labels[0].tolist()
--- a/codex-lens/src/codexlens/semantic/reranker/factory.py
+++ b/codex-lens/src/codexlens/semantic/reranker/factory.py
@@ -15,7 +15,7 @@ def check_reranker_available(backend: str) -> tuple[bool, str | None]:

    Notes:
    - "fastembed" uses fastembed TextCrossEncoder (pip install fastembed>=0.4.0). [Recommended]
-    - "onnx" redirects to "fastembed" for backward compatibility.
+    - "onnx" uses Optimum + ONNX Runtime (pip install onnxruntime optimum[onnxruntime] transformers).
    - "legacy" uses sentence-transformers CrossEncoder (pip install codexlens[reranker-legacy]).
    - "api" uses a remote reranking HTTP API (requires httpx).
    - "litellm" uses `ccw-litellm` for unified access to LLM providers.
@@ -33,10 +33,9 @@ def check_reranker_available(backend: str) -> tuple[bool, str | None]:
        return check_fastembed_reranker_available()

    if backend == "onnx":
-        # Redirect to fastembed for backward compatibility
-        from .fastembed_reranker import check_fastembed_reranker_available
+        from .onnx_reranker import check_onnx_reranker_available

-        return check_fastembed_reranker_available()
+        return check_onnx_reranker_available()

    if backend == "litellm":
        try:
@@ -66,7 +65,7 @@ def check_reranker_available(backend: str) -> tuple[bool, str | None]:


 def get_reranker(
-    backend: str = "fastembed",
+    backend: str = "onnx",
    model_name: str | None = None,
    *,
    device: str | None = None,
@@ -76,18 +75,18 @@ def get_reranker(

    Args:
        backend: Reranker backend to use. Options:
-            - "fastembed": FastEmbed TextCrossEncoder backend (default, recommended)
-            - "onnx": Redirects to fastembed for backward compatibility
+            - "onnx": Optimum + ONNX Runtime backend (default)
+            - "fastembed": FastEmbed TextCrossEncoder backend
            - "api": HTTP API backend (remote providers)
            - "litellm": LiteLLM backend (LLM-based, for API mode)
            - "legacy": sentence-transformers CrossEncoder backend (optional)
        model_name: Model identifier for model-based backends. Defaults depend on backend:
+            - onnx: Xenova/ms-marco-MiniLM-L-6-v2
            - fastembed: Xenova/ms-marco-MiniLM-L-6-v2
-            - onnx: (redirects to fastembed)
            - api: BAAI/bge-reranker-v2-m3 (SiliconFlow)
            - legacy: cross-encoder/ms-marco-MiniLM-L-6-v2
            - litellm: default
-        device: Optional device string for backends that support it (legacy only).
+        device: Optional device string for backends that support it (legacy and onnx).
        **kwargs: Additional backend-specific arguments.

    Returns:
@@ -111,16 +110,17 @@ def get_reranker(
        return FastEmbedReranker(model_name=resolved_model_name, **kwargs)

    if backend == "onnx":
-        # Redirect to fastembed for backward compatibility
-        ok, err = check_reranker_available("fastembed")
+        ok, err = check_reranker_available("onnx")
        if not ok:
            raise ImportError(err)

-        from .fastembed_reranker import FastEmbedReranker
+        from .onnx_reranker import ONNXReranker

-        resolved_model_name = (model_name or "").strip() or FastEmbedReranker.DEFAULT_MODEL
-        _ = device  # Device selection is managed via fastembed providers.
-        return FastEmbedReranker(model_name=resolved_model_name, **kwargs)
+        resolved_model_name = (model_name or "").strip() or ONNXReranker.DEFAULT_MODEL
+        effective_kwargs = dict(kwargs)
+        if "use_gpu" not in effective_kwargs and device is not None:
+            effective_kwargs["use_gpu"] = str(device).strip().lower() not in {"cpu", "none"}
+        return ONNXReranker(model_name=resolved_model_name, **effective_kwargs)

    if backend == "legacy":
        ok, err = check_reranker_available("legacy")
--- a/codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py
+++ b/codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py
@@ -58,6 +58,38 @@ def _iter_batches(items: Sequence[Any], batch_size: int) -> Iterable[Sequence[An
        yield items[i : i + batch_size]


+def _normalize_provider_specs(
+    providers: Sequence[Any] | None,
+) -> tuple[list[str], list[dict[str, Any]]]:
+    """Split execution-provider specs into Optimum-compatible names and options."""
+    normalized_providers: list[str] = []
+    normalized_options: list[dict[str, Any]] = []
+
+    for provider in providers or ():
+        provider_name: str | None = None
+        provider_options: dict[str, Any] = {}
+
+        if isinstance(provider, tuple):
+            if provider:
+                provider_name = str(provider[0]).strip()
+            if len(provider) > 1 and isinstance(provider[1], dict):
+                provider_options = dict(provider[1])
+        elif provider is not None:
+            provider_name = str(provider).strip()
+
+        if not provider_name:
+            continue
+
+        normalized_providers.append(provider_name)
+        normalized_options.append(provider_options)
+
+    if not normalized_providers:
+        normalized_providers.append("CPUExecutionProvider")
+        normalized_options.append({})
+
+    return normalized_providers, normalized_options
+
+
 class ONNXReranker(BaseReranker):
    """Cross-encoder reranker using Optimum + ONNX Runtime with lazy loading."""

@@ -110,19 +142,21 @@ class ONNXReranker(BaseReranker):
                    use_gpu=self.use_gpu, with_device_options=True
                )

+            provider_names, provider_options = _normalize_provider_specs(self.providers)
+
            # Some Optimum versions accept `providers`, others accept a single `provider`.
            # Prefer passing the full providers list, with a conservative fallback.
            model_kwargs: dict[str, Any] = {}
            try:
                params = signature(ORTModelForSequenceClassification.from_pretrained).parameters
                if "providers" in params:
-                    model_kwargs["providers"] = self.providers
+                    model_kwargs["providers"] = provider_names
+                    if "provider_options" in params:
+                        model_kwargs["provider_options"] = provider_options
                elif "provider" in params:
-                    provider_name = "CPUExecutionProvider"
-                    if self.providers:
-                        first = self.providers[0]
-                        provider_name = first[0] if isinstance(first, tuple) else str(first)
-                    model_kwargs["provider"] = provider_name
+                    model_kwargs["provider"] = provider_names[0]
+                    if "provider_options" in params and provider_options[0]:
+                        model_kwargs["provider_options"] = provider_options[0]
            except Exception:
                model_kwargs = {}

--- a/codex-lens/src/codexlens/storage/index_filters.py
+++ b/codex-lens/src/codexlens/storage/index_filters.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Iterable, List, Optional, Set
+
+from codexlens.storage.index_tree import DEFAULT_IGNORE_DIRS
+
+
+EXTRA_IGNORED_INDEX_DIRS = frozenset({".workflow"})
+IGNORED_INDEX_DIRS = frozenset({name.casefold() for name in DEFAULT_IGNORE_DIRS | set(EXTRA_IGNORED_INDEX_DIRS)})
+
+
+def is_ignored_index_path(
+    index_path: Path,
+    scan_root: Path,
+    *,
+    ignored_dir_names: Optional[Set[str]] = None,
+) -> bool:
+    """Return True when an index lives under an ignored/generated subtree."""
+
+    ignored = (
+        {name.casefold() for name in ignored_dir_names}
+        if ignored_dir_names is not None
+        else IGNORED_INDEX_DIRS
+    )
+
+    try:
+        relative_parts = index_path.resolve().relative_to(scan_root.resolve()).parts[:-1]
+    except ValueError:
+        return False
+
+    return any(part.casefold() in ignored for part in relative_parts)
+
+
+def filter_index_paths(
+    index_paths: Iterable[Path],
+    scan_root: Path,
+    *,
+    ignored_dir_names: Optional[Set[str]] = None,
+) -> List[Path]:
+    """Filter out discovered indexes that belong to ignored/generated subtrees."""
+
+    return [
+        path
+        for path in index_paths
+        if not is_ignored_index_path(path, scan_root, ignored_dir_names=ignored_dir_names)
+    ]
--- a/codex-lens/src/codexlens/storage/index_tree.py
+++ b/codex-lens/src/codexlens/storage/index_tree.py
@@ -252,6 +252,18 @@ class IndexTreeBuilder:
        # Collect directories by depth
        dirs_by_depth = self._collect_dirs_by_depth(source_root, languages)

+        if force_full:
+            pruned_dirs = self._prune_stale_project_dirs(
+                project_id=project_info.id,
+                source_root=source_root,
+                dirs_by_depth=dirs_by_depth,
+            )
+            if pruned_dirs:
+                self.logger.info(
+                    "Pruned %d stale directory mappings before full rebuild",
+                    len(pruned_dirs),
+                )
+
        if not dirs_by_depth:
            self.logger.warning("No indexable directories found in %s", source_root)
            if global_index is not None:
@@ -450,6 +462,52 @@ class IndexTreeBuilder:

    # === Internal Methods ===

+    def _prune_stale_project_dirs(
+        self,
+        *,
+        project_id: int,
+        source_root: Path,
+        dirs_by_depth: Dict[int, List[Path]],
+    ) -> List[Path]:
+        """Remove registry mappings for directories no longer included in the index tree."""
+        source_root = source_root.resolve()
+        valid_dirs: Set[Path] = {
+            path.resolve()
+            for paths in dirs_by_depth.values()
+            for path in paths
+        }
+        valid_dirs.add(source_root)
+
+        stale_mappings = []
+        for mapping in self.registry.get_project_dirs(project_id):
+            mapping_path = mapping.source_path.resolve()
+            if mapping_path in valid_dirs:
+                continue
+            try:
+                mapping_path.relative_to(source_root)
+            except ValueError:
+                continue
+            stale_mappings.append(mapping)
+
+        stale_mappings.sort(
+            key=lambda mapping: len(mapping.source_path.resolve().relative_to(source_root).parts),
+            reverse=True,
+        )
+
+        pruned_paths: List[Path] = []
+        for mapping in stale_mappings:
+            try:
+                if self.registry.unregister_dir(mapping.source_path):
+                    pruned_paths.append(mapping.source_path.resolve())
+            except Exception as exc:
+                self.logger.warning(
+                    "Failed to prune stale mapping for %s: %s",
+                    mapping.source_path,
+                    exc,
+                )
+
+        return pruned_paths
+
    def _collect_dirs_by_depth(
        self, source_root: Path, languages: List[str] = None
    ) -> Dict[int, List[Path]]:
@@ -620,8 +678,9 @@ class IndexTreeBuilder:
            "static_graph_enabled": self.config.static_graph_enabled,
            "static_graph_relationship_types": self.config.static_graph_relationship_types,
            "use_astgrep": getattr(self.config, "use_astgrep", False),
-            "ignore_patterns": list(getattr(self.config, "ignore_patterns", [])),
-            "extension_filters": list(getattr(self.config, "extension_filters", [])),
+            "ignore_patterns": list(self.ignore_patterns),
+            "extension_filters": list(self.extension_filters),
+            "incremental": bool(self.incremental),
        }

        worker_args = [
@@ -693,6 +752,9 @@ class IndexTreeBuilder:
            # Ensure index directory exists
            index_db_path.parent.mkdir(parents=True, exist_ok=True)

+            if not self.incremental:
+                _reset_index_db_files(index_db_path)
+
            # Create directory index
            if self.config.global_symbol_index_enabled:
                global_index = GlobalSymbolIndex(global_index_db_path, project_id=project_id)
@@ -1100,6 +1162,18 @@ def _matches_extension_filters(path: Path, patterns: List[str], source_root: Opt
    return _matches_path_patterns(path, patterns, source_root)


+def _reset_index_db_files(index_db_path: Path) -> None:
+    """Best-effort removal of a directory index DB and common SQLite sidecars."""
+    for suffix in ("", "-wal", "-shm", "-journal"):
+        target = Path(f"{index_db_path}{suffix}") if suffix else index_db_path
+        try:
+            target.unlink()
+        except FileNotFoundError:
+            continue
+        except OSError:
+            continue
+
+
 def _build_dir_worker(args: tuple) -> DirBuildResult:
    """Worker function for parallel directory building.

@@ -1140,6 +1214,9 @@ def _build_dir_worker(args: tuple) -> DirBuildResult:
            global_index = GlobalSymbolIndex(Path(global_index_db_path), project_id=int(project_id))
            global_index.initialize()

+        if not bool(config_dict.get("incremental", True)):
+            _reset_index_db_files(index_db_path)
+
        store = DirIndexStore(index_db_path, config=config, global_index=global_index)
        store.initialize()

--- a/codex-lens/src/codexlens/storage/registry.py
+++ b/codex-lens/src/codexlens/storage/registry.py
@@ -591,6 +591,56 @@ class RegistryStore:

            return [self._row_to_dir_mapping(row) for row in rows]

+    def find_descendant_project_roots(self, source_root: Path) -> List[DirMapping]:
+        """Return root directory mappings for nested projects under ``source_root``."""
+        with self._lock:
+            conn = self._get_connection()
+            source_root_resolved = source_root.resolve()
+            source_root_str = self._normalize_path_for_comparison(source_root_resolved)
+
+            rows = conn.execute(
+                """
+                SELECT dm.*
+                FROM dir_mapping dm
+                INNER JOIN projects p ON p.id = dm.project_id
+                WHERE dm.source_path = p.source_root
+                  AND p.source_root LIKE ?
+                ORDER BY p.source_root ASC
+                """,
+                (f"{source_root_str}%",),
+            ).fetchall()
+
+            descendant_roots: List[DirMapping] = []
+            normalized_root_path = Path(source_root_str)
+
+            for row in rows:
+                mapping = self._row_to_dir_mapping(row)
+                normalized_mapping_path = Path(
+                    self._normalize_path_for_comparison(mapping.source_path.resolve())
+                )
+
+                if normalized_mapping_path == normalized_root_path:
+                    continue
+
+                try:
+                    normalized_mapping_path.relative_to(normalized_root_path)
+                except ValueError:
+                    continue
+
+                descendant_roots.append(mapping)
+
+            descendant_roots.sort(
+                key=lambda mapping: (
+                    len(
+                        mapping.source_path.resolve().relative_to(
+                            source_root_resolved
+                        ).parts
+                    ),
+                    self._normalize_path_for_comparison(mapping.source_path.resolve()),
+                )
+            )
+            return descendant_roots
+
    def update_dir_stats(self, source_path: Path, files_count: int) -> None:
        """Update directory statistics.

--- a/codex-lens/tests/conftest.py
+++ b/codex-lens/tests/conftest.py
@@ -11,12 +11,25 @@ Common Fixtures:
 - sample_code_files: Factory for creating sample code files
 """

-import pytest
-import tempfile
-import shutil
-from pathlib import Path
-from typing import Dict, Any
 import sqlite3
+import shutil
+import tempfile
+import warnings
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+warnings.filterwarnings(
+    "ignore",
+    message=r"'BaseCommand' is deprecated and will be removed in Click 9\.0\..*",
+    category=DeprecationWarning,
+)
+warnings.filterwarnings(
+    "ignore",
+    message=r"The '__version__' attribute is deprecated and will be removed in Click 9\.1\..*",
+    category=DeprecationWarning,
+)


@pytest.fixture
--- a/codex-lens/tests/test_ann_index.py
+++ b/codex-lens/tests/test_ann_index.py
@@ -98,6 +98,23 @@ class TestANNIndex:
        assert ids[0] == 1  # ID of first vector
        assert distances[0] < 0.01  # Very small distance (almost identical)

+    @pytest.mark.skipif(
+        not _hnswlib_available(),
+        reason="hnswlib not installed"
+    )
+    def test_search_clamps_top_k_to_available_vectors(self, temp_db, sample_vectors, sample_ids):
+        """Search should clamp top_k to the loaded vector count."""
+        from codexlens.semantic.ann_index import ANNIndex
+
+        index = ANNIndex(temp_db, dim=384)
+        index.add_vectors(sample_ids[:3], sample_vectors[:3])
+
+        ids, distances = index.search(sample_vectors[0], top_k=10)
+
+        assert len(ids) == 3
+        assert len(distances) == 3
+        assert ids[0] == 1
+
    @pytest.mark.skipif(
        not _hnswlib_available(),
        reason="hnswlib not installed"
--- a/codex-lens/tests/test_chain_search.py
+++ b/codex-lens/tests/test_chain_search.py
--- a/codex-lens/tests/test_compare_ccw_smart_search_stage2.py
+++ b/codex-lens/tests/test_compare_ccw_smart_search_stage2.py
@@ -0,0 +1,350 @@
+from __future__ import annotations
+
+import importlib.util
+import json
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+
+
+MODULE_PATH = Path(__file__).resolve().parents[1] / "benchmarks" / "compare_ccw_smart_search_stage2.py"
+MODULE_NAME = "compare_ccw_smart_search_stage2_test_module"
+MODULE_SPEC = importlib.util.spec_from_file_location(MODULE_NAME, MODULE_PATH)
+assert MODULE_SPEC is not None and MODULE_SPEC.loader is not None
+benchmark = importlib.util.module_from_spec(MODULE_SPEC)
+sys.modules[MODULE_NAME] = benchmark
+MODULE_SPEC.loader.exec_module(benchmark)
+
+
+class _FakeChainResult:
+    def __init__(self, paths: list[str]) -> None:
+        self.results = [SimpleNamespace(path=path) for path in paths]
+
+
+class _FakeEngine:
+    def __init__(
+        self,
+        *,
+        search_paths: list[str] | None = None,
+        cascade_paths: list[str] | None = None,
+    ) -> None:
+        self.search_paths = search_paths or []
+        self.cascade_paths = cascade_paths or []
+        self.search_calls: list[dict[str, object]] = []
+        self.cascade_calls: list[dict[str, object]] = []
+
+    def search(self, query: str, source_path: Path, options: object) -> _FakeChainResult:
+        self.search_calls.append(
+            {
+                "query": query,
+                "source_path": source_path,
+                "options": options,
+            }
+        )
+        return _FakeChainResult(self.search_paths)
+
+    def cascade_search(
+        self,
+        query: str,
+        source_path: Path,
+        *,
+        k: int,
+        coarse_k: int,
+        options: object,
+        strategy: str,
+    ) -> _FakeChainResult:
+        self.cascade_calls.append(
+            {
+                "query": query,
+                "source_path": source_path,
+                "k": k,
+                "coarse_k": coarse_k,
+                "options": options,
+                "strategy": strategy,
+            }
+        )
+        return _FakeChainResult(self.cascade_paths)
+
+
+def test_strategy_specs_include_baselines_before_stage2_modes() -> None:
+    specs = benchmark._strategy_specs(
+        ["realtime", "static_global_graph"],
+        include_dense_baseline=True,
+        baseline_methods=["auto", "fts", "hybrid"],
+    )
+
+    assert [spec.strategy_key for spec in specs] == [
+        "auto",
+        "fts",
+        "hybrid",
+        "dense_rerank",
+        "staged:realtime",
+        "staged:static_global_graph",
+    ]
+
+
+def test_select_effective_method_matches_cli_auto_routing() -> None:
+    assert benchmark._select_effective_method("find_descendant_project_roots", "auto") == "fts"
+    assert benchmark._select_effective_method("build dist artifact output", "auto") == "fts"
+    assert benchmark._select_effective_method("embedding backend fastembed local litellm api config", "auto") == "fts"
+    assert benchmark._select_effective_method("get_reranker factory onnx backend selection", "auto") == "fts"
+    assert benchmark._select_effective_method("how does the authentication flow work", "auto") == "dense_rerank"
+    assert benchmark._select_effective_method("how smart_search keyword routing works", "auto") == "hybrid"
+
+
+def test_filter_dataset_by_query_match_uses_case_insensitive_substring() -> None:
+    dataset = [
+        {"query": "embedding backend fastembed local litellm api config", "relevant_paths": ["a"]},
+        {"query": "get_reranker factory onnx backend selection", "relevant_paths": ["b"]},
+        {"query": "how does smart search route keyword queries", "relevant_paths": ["c"]},
+    ]
+
+    filtered = benchmark._filter_dataset_by_query_match(dataset, "BACKEND")
+    assert [item["query"] for item in filtered] == [
+        "embedding backend fastembed local litellm api config",
+        "get_reranker factory onnx backend selection",
+    ]
+
+    narrow_filtered = benchmark._filter_dataset_by_query_match(dataset, "FASTEMBED")
+    assert [item["query"] for item in narrow_filtered] == [
+        "embedding backend fastembed local litellm api config",
+    ]
+
+    unfiltered = benchmark._filter_dataset_by_query_match(dataset, None)
+    assert [item["query"] for item in unfiltered] == [item["query"] for item in dataset]
+
+
+def test_apply_query_limit_runs_after_filtering() -> None:
+    dataset = [
+        {"query": "executeHybridMode dense_rerank semantic smart_search", "relevant_paths": ["a"]},
+        {"query": "embedding backend fastembed local litellm api config", "relevant_paths": ["b"]},
+        {"query": "reranker backend onnx api legacy configuration", "relevant_paths": ["c"]},
+    ]
+
+    filtered = benchmark._filter_dataset_by_query_match(dataset, "backend")
+    limited = benchmark._apply_query_limit(filtered, 1)
+
+    assert [item["query"] for item in limited] == [
+        "embedding backend fastembed local litellm api config",
+    ]
+
+
+def test_make_progress_payload_reports_partial_completion() -> None:
+    args = SimpleNamespace(
+        queries_file=Path("queries.jsonl"),
+        k=10,
+        coarse_k=100,
+    )
+    strategy_specs = [
+        benchmark.StrategySpec(strategy_key="auto", strategy="auto", stage2_mode=None),
+        benchmark.StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None),
+    ]
+    evaluations = [
+        benchmark.QueryEvaluation(
+            query="embedding backend fastembed local litellm api config",
+            intent="config",
+            notes=None,
+            relevant_paths=["codex-lens/src/codexlens/config.py"],
+            runs={
+                "auto": benchmark.StrategyRun(
+                    strategy_key="auto",
+                    strategy="auto",
+                    stage2_mode=None,
+                    effective_method="fts",
+                    execution_method="fts",
+                    latency_ms=123.0,
+                    topk_paths=["config.py"],
+                    first_hit_rank=1,
+                    hit_at_k=True,
+                    recall_at_k=1.0,
+                    generated_artifact_count=0,
+                    test_file_count=0,
+                    error=None,
+                )
+            },
+        )
+    ]
+
+    payload = benchmark._make_progress_payload(
+        args=args,
+        source_root=Path("D:/repo"),
+        strategy_specs=strategy_specs,
+        evaluations=evaluations,
+        query_index=1,
+        total_queries=3,
+        run_index=2,
+        total_runs=6,
+        current_query="embedding backend fastembed local litellm api config",
+        current_strategy_key="complete",
+    )
+
+    assert payload["status"] == "running"
+    assert payload["progress"]["completed_queries"] == 1
+    assert payload["progress"]["completed_runs"] == 2
+    assert payload["progress"]["total_runs"] == 6
+    assert payload["strategy_keys"] == ["auto", "dense_rerank"]
+    assert payload["evaluations"][0]["runs"]["auto"]["effective_method"] == "fts"
+
+
+def test_write_final_outputs_updates_progress_snapshot(tmp_path: Path) -> None:
+    output_path = tmp_path / "results.json"
+    progress_path = tmp_path / "progress.json"
+    payload = {
+        "status": "completed",
+        "query_count": 1,
+        "strategies": {"auto": {"effective_methods": {"fts": 1}}},
+    }
+
+    benchmark._write_final_outputs(
+        output_path=output_path,
+        progress_output=progress_path,
+        payload=payload,
+    )
+
+    assert json.loads(output_path.read_text(encoding="utf-8")) == payload
+    assert json.loads(progress_path.read_text(encoding="utf-8")) == payload
+
+
+def test_build_parser_defaults_reranker_gpu_to_disabled() -> None:
+    parser = benchmark.build_parser()
+    args = parser.parse_args([])
+
+    assert args.embedding_use_gpu is False
+    assert args.reranker_use_gpu is False
+    assert args.reranker_model == benchmark.DEFAULT_LOCAL_ONNX_RERANKER_MODEL
+
+
+def test_build_strategy_runtime_clones_config(monkeypatch, tmp_path: Path) -> None:
+    class _FakeRegistry:
+        def __init__(self) -> None:
+            self.initialized = False
+
+        def initialize(self) -> None:
+            self.initialized = True
+
+    class _FakeMapper:
+        pass
+
+    class _FakeEngine:
+        def __init__(self, *, registry, mapper, config) -> None:
+            self.registry = registry
+            self.mapper = mapper
+            self.config = config
+
+    monkeypatch.setattr(benchmark, "RegistryStore", _FakeRegistry)
+    monkeypatch.setattr(benchmark, "PathMapper", _FakeMapper)
+    monkeypatch.setattr(benchmark, "ChainSearchEngine", _FakeEngine)
+
+    base_config = benchmark.Config(data_dir=tmp_path, reranker_use_gpu=False)
+    strategy_spec = benchmark.StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None)
+
+    runtime = benchmark._build_strategy_runtime(base_config, strategy_spec)
+
+    assert runtime.strategy_spec == strategy_spec
+    assert runtime.config is not base_config
+    assert runtime.config.reranker_use_gpu is False
+    assert runtime.registry.initialized is True
+    assert runtime.engine.config is runtime.config
+
+
+def test_run_strategy_routes_auto_keyword_queries_to_fts_search() -> None:
+    engine = _FakeEngine(
+        search_paths=[
+            "D:/repo/src/codexlens/storage/registry.py",
+            "D:/repo/build/lib/codexlens/storage/registry.py",
+        ]
+    )
+    config = SimpleNamespace(cascade_strategy="staged", staged_stage2_mode="realtime")
+    relevant = {benchmark._normalize_path_key("D:/repo/src/codexlens/storage/registry.py")}
+
+    run = benchmark._run_strategy(
+        engine,
+        config,
+        strategy_spec=benchmark.StrategySpec(strategy_key="auto", strategy="auto", stage2_mode=None),
+        query="find_descendant_project_roots",
+        source_path=Path("D:/repo"),
+        k=5,
+        coarse_k=20,
+        relevant=relevant,
+    )
+
+    assert len(engine.search_calls) == 1
+    assert len(engine.cascade_calls) == 0
+    assert run.effective_method == "fts"
+    assert run.execution_method == "fts"
+    assert run.hit_at_k is True
+    assert run.generated_artifact_count == 1
+    assert run.test_file_count == 0
+
+
+def test_run_strategy_uses_cascade_for_dense_rerank_and_restores_config() -> None:
+    engine = _FakeEngine(cascade_paths=["D:/repo/src/tools/smart-search.ts"])
+    config = SimpleNamespace(cascade_strategy="staged", staged_stage2_mode="static_global_graph")
+    relevant = {benchmark._normalize_path_key("D:/repo/src/tools/smart-search.ts")}
+
+    run = benchmark._run_strategy(
+        engine,
+        config,
+        strategy_spec=benchmark.StrategySpec(
+            strategy_key="dense_rerank",
+            strategy="dense_rerank",
+            stage2_mode=None,
+        ),
+        query="how does smart search route keyword queries",
+        source_path=Path("D:/repo"),
+        k=5,
+        coarse_k=20,
+        relevant=relevant,
+    )
+
+    assert len(engine.search_calls) == 0
+    assert len(engine.cascade_calls) == 1
+    assert engine.cascade_calls[0]["strategy"] == "dense_rerank"
+    assert run.effective_method == "dense_rerank"
+    assert run.execution_method == "cascade"
+    assert run.hit_at_k is True
+    assert config.cascade_strategy == "staged"
+    assert config.staged_stage2_mode == "static_global_graph"
+
+
+def test_summarize_runs_tracks_effective_method_and_artifact_pressure() -> None:
+    summary = benchmark._summarize_runs(
+        [
+            benchmark.StrategyRun(
+                strategy_key="auto",
+                strategy="auto",
+                stage2_mode=None,
+                effective_method="fts",
+                execution_method="fts",
+                latency_ms=10.0,
+                topk_paths=["a"],
+                first_hit_rank=1,
+                hit_at_k=True,
+                recall_at_k=1.0,
+                generated_artifact_count=1,
+                test_file_count=0,
+                error=None,
+            ),
+            benchmark.StrategyRun(
+                strategy_key="auto",
+                strategy="auto",
+                stage2_mode=None,
+                effective_method="hybrid",
+                execution_method="hybrid",
+                latency_ms=30.0,
+                topk_paths=["b"],
+                first_hit_rank=None,
+                hit_at_k=False,
+                recall_at_k=0.0,
+                generated_artifact_count=0,
+                test_file_count=2,
+                error=None,
+            ),
+        ]
+    )
+
+    assert summary["effective_methods"] == {"fts": 1, "hybrid": 1}
+    assert summary["runs_with_generated_artifacts"] == 1
+    assert summary["runs_with_test_files"] == 1
+    assert summary["avg_generated_artifact_count"] == 0.5
+    assert summary["avg_test_file_count"] == 1.0
--- a/codex-lens/tests/test_config_search_env_overrides.py
+++ b/codex-lens/tests/test_config_search_env_overrides.py
@@ -0,0 +1,83 @@
+"""Unit tests for Config .env overrides for final search ranking penalties."""
+
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from codexlens.config import Config
+
+
+@pytest.fixture
+def temp_config_dir() -> Path:
+    """Create temporary directory for config data_dir."""
+    tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
+    yield Path(tmpdir.name)
+    try:
+        tmpdir.cleanup()
+    except (PermissionError, OSError):
+        pass
+
+
+def test_search_penalty_env_overrides_apply(temp_config_dir: Path) -> None:
+    config = Config(data_dir=temp_config_dir)
+
+    env_path = temp_config_dir / ".env"
+    env_path.write_text(
+        "\n".join(
+            [
+                "TEST_FILE_PENALTY=0.25",
+                "GENERATED_FILE_PENALTY=0.4",
+                "",
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    config.load_settings()
+
+    assert config.test_file_penalty == 0.25
+    assert config.generated_file_penalty == 0.4
+
+
+def test_reranker_gpu_env_override_apply(temp_config_dir: Path) -> None:
+    config = Config(data_dir=temp_config_dir)
+
+    env_path = temp_config_dir / ".env"
+    env_path.write_text(
+        "\n".join(
+            [
+                "RERANKER_USE_GPU=false",
+                "",
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    config.load_settings()
+
+    assert config.reranker_use_gpu is False
+
+
+def test_search_penalty_env_overrides_invalid_ignored(temp_config_dir: Path) -> None:
+    config = Config(data_dir=temp_config_dir)
+
+    env_path = temp_config_dir / ".env"
+    env_path.write_text(
+        "\n".join(
+            [
+                "TEST_FILE_PENALTY=oops",
+                "GENERATED_FILE_PENALTY=nope",
+                "",
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    config.load_settings()
+
+    assert config.test_file_penalty == 0.15
+    assert config.generated_file_penalty == 0.35
+    assert config.reranker_use_gpu is True
--- a/codex-lens/tests/test_embedding_status_root_model.py
+++ b/codex-lens/tests/test_embedding_status_root_model.py
@@ -0,0 +1,204 @@
+import gc
+import gc
+import shutil
+import sqlite3
+import tempfile
+import time
+from pathlib import Path
+
+import pytest
+
+import codexlens.cli.embedding_manager as embedding_manager
+from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status
+
+
+@pytest.fixture
+def status_temp_dir() -> Path:
+    temp_path = Path(tempfile.mkdtemp())
+    try:
+        yield temp_path
+    finally:
+        gc.collect()
+        for _ in range(5):
+            try:
+                if temp_path.exists():
+                    shutil.rmtree(temp_path)
+                break
+            except PermissionError:
+                time.sleep(0.1)
+
+
+def _create_index_db(index_path: Path, files: list[str], embedded_files: list[str] | None = None) -> None:
+    index_path.parent.mkdir(parents=True, exist_ok=True)
+    with sqlite3.connect(index_path) as conn:
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            CREATE TABLE files (
+                id INTEGER PRIMARY KEY,
+                path TEXT NOT NULL UNIQUE,
+                content TEXT,
+                language TEXT,
+                hash TEXT
+            )
+            """
+        )
+        cursor.executemany(
+            "INSERT INTO files (path, content, language, hash) VALUES (?, ?, ?, ?)",
+            [(file_path, "", "python", f"hash-{idx}") for idx, file_path in enumerate(files)],
+        )
+
+        if embedded_files is not None:
+            cursor.execute(
+                """
+                CREATE TABLE semantic_chunks (
+                    id INTEGER PRIMARY KEY,
+                    file_path TEXT NOT NULL,
+                    content TEXT,
+                    embedding BLOB,
+                    metadata TEXT,
+                    category TEXT
+                )
+                """
+            )
+            cursor.executemany(
+                "INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) VALUES (?, ?, ?, ?, ?)",
+                [(file_path, "chunk", b"vec", "{}", "code") for file_path in embedded_files],
+            )
+        conn.commit()
+
+
+def _create_vectors_meta_db(meta_path: Path, embedded_files: list[str], binary_vector_count: int = 0) -> None:
+    meta_path.parent.mkdir(parents=True, exist_ok=True)
+    with sqlite3.connect(meta_path) as conn:
+        cursor = conn.cursor()
+        cursor.execute(
+            """
+            CREATE TABLE chunk_metadata (
+                chunk_id INTEGER PRIMARY KEY,
+                file_path TEXT NOT NULL,
+                content TEXT,
+                start_line INTEGER,
+                end_line INTEGER,
+                category TEXT,
+                metadata TEXT,
+                source_index_db TEXT
+            )
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TABLE binary_vectors (
+                chunk_id INTEGER PRIMARY KEY,
+                vector BLOB NOT NULL
+            )
+            """
+        )
+        cursor.executemany(
+            """
+            INSERT INTO chunk_metadata (
+                chunk_id, file_path, content, start_line, end_line, category, metadata, source_index_db
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            [
+                (idx, file_path, "chunk", 1, 1, "code", "{}", str(meta_path.parent / "_index.db"))
+                for idx, file_path in enumerate(embedded_files, start=1)
+            ],
+        )
+        cursor.executemany(
+            "INSERT INTO binary_vectors (chunk_id, vector) VALUES (?, ?)",
+            [(idx, b"\x01") for idx in range(1, binary_vector_count + 1)],
+        )
+        conn.commit()
+
+
+def test_root_status_does_not_inherit_child_embeddings(
+    monkeypatch: pytest.MonkeyPatch, status_temp_dir: Path
+) -> None:
+    workspace = status_temp_dir / "workspace"
+    workspace.mkdir()
+    _create_index_db(workspace / "_index.db", ["a.py", "b.py"])
+    _create_index_db(workspace / "child" / "_index.db", ["child.py"], embedded_files=["child.py"])
+
+    monkeypatch.setattr(
+        embedding_manager,
+        "_get_model_info_from_index",
+        lambda index_path: {
+            "model_profile": "fast",
+            "model_name": "unit-test-model",
+            "embedding_dim": 384,
+            "backend": "fastembed",
+            "created_at": "2026-03-13T00:00:00Z",
+            "updated_at": "2026-03-13T00:00:00Z",
+        } if index_path.parent.name == "child" else None,
+    )
+
+    status = get_embeddings_status(workspace)
+    assert status["success"] is True
+
+    result = status["result"]
+    assert result["coverage_percent"] == 0.0
+    assert result["files_with_embeddings"] == 0
+    assert result["root"]["has_embeddings"] is False
+    assert result["model_info"] is None
+    assert result["subtree"]["indexes_with_embeddings"] == 1
+    assert result["subtree"]["coverage_percent"] > 0
+
+
+def test_root_status_uses_validated_centralized_metadata(status_temp_dir: Path) -> None:
+    workspace = status_temp_dir / "workspace"
+    workspace.mkdir()
+    _create_index_db(workspace / "_index.db", ["a.py", "b.py"])
+    _create_vectors_meta_db(workspace / "_vectors_meta.db", ["a.py"])
+    (workspace / "_vectors.hnsw").write_bytes(b"hnsw")
+
+    status = get_embeddings_status(workspace)
+    assert status["success"] is True
+
+    result = status["result"]
+    assert result["coverage_percent"] == 50.0
+    assert result["files_with_embeddings"] == 1
+    assert result["total_chunks"] == 1
+    assert result["root"]["has_embeddings"] is True
+    assert result["root"]["storage_mode"] == "centralized"
+    assert result["centralized"]["dense_ready"] is True
+    assert result["centralized"]["usable"] is True
+
+
+def test_embedding_stats_summary_skips_ignored_artifact_indexes(status_temp_dir: Path) -> None:
+    workspace = status_temp_dir / "workspace"
+    workspace.mkdir()
+    _create_index_db(workspace / "_index.db", ["root.py"])
+    _create_index_db(workspace / "src" / "_index.db", ["src.py"])
+    _create_index_db(workspace / "dist" / "_index.db", ["bundle.py"], embedded_files=["bundle.py"])
+    _create_index_db(workspace / ".workflow" / "_index.db", ["trace.py"], embedded_files=["trace.py"])
+
+    summary = get_embedding_stats_summary(workspace)
+
+    assert summary["success"] is True
+    result = summary["result"]
+    assert result["total_indexes"] == 2
+    assert {Path(item["path"]).relative_to(workspace).as_posix() for item in result["indexes"]} == {
+        "_index.db",
+        "src/_index.db",
+    }
+
+
+def test_root_status_ignores_empty_centralized_artifacts(status_temp_dir: Path) -> None:
+    workspace = status_temp_dir / "workspace"
+    workspace.mkdir()
+    _create_index_db(workspace / "_index.db", ["a.py", "b.py"])
+    _create_vectors_meta_db(workspace / "_vectors_meta.db", [])
+    (workspace / "_vectors.hnsw").write_bytes(b"hnsw")
+    (workspace / "_binary_vectors.mmap").write_bytes(b"mmap")
+
+    status = get_embeddings_status(workspace)
+    assert status["success"] is True
+
+    result = status["result"]
+    assert result["coverage_percent"] == 0.0
+    assert result["files_with_embeddings"] == 0
+    assert result["root"]["has_embeddings"] is False
+    assert result["centralized"]["chunk_metadata_rows"] == 0
+    assert result["centralized"]["binary_vector_rows"] == 0
+    assert result["centralized"]["usable"] is False
--- a/codex-lens/tests/test_hybrid_search_e2e.py
+++ b/codex-lens/tests/test_hybrid_search_e2e.py
@@ -833,6 +833,36 @@ class TestHybridSearchAdaptiveWeights:

        assert captured["weights"]["vector"] > 0.6

+    def test_default_engine_weights_keep_lsp_graph_backend_available(self):
+        """Legacy public defaults should not discard LSP graph fusion weights internally."""
+        from unittest.mock import patch
+
+        engine = HybridSearchEngine()
+
+        results_map = {
+            "exact": [SearchResult(path="a.py", score=10.0, excerpt="a")],
+            "fuzzy": [SearchResult(path="b.py", score=9.0, excerpt="b")],
+            "vector": [SearchResult(path="c.py", score=0.9, excerpt="c")],
+            "lsp_graph": [SearchResult(path="d.py", score=0.8, excerpt="d")],
+        }
+
+        captured = {}
+        from codexlens.search import ranking as ranking_module
+
+        def capture_rrf(map_in, weights_in, k=60):
+            captured["weights"] = dict(weights_in)
+            return ranking_module.reciprocal_rank_fusion(map_in, weights_in, k=k)
+
+        with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch(
+            "codexlens.search.hybrid_search.reciprocal_rank_fusion",
+            side_effect=capture_rrf,
+        ):
+            engine.search(Path("dummy.db"), "auth flow", enable_vector=True, enable_lsp_graph=True)
+
+        assert engine.weights == HybridSearchEngine.DEFAULT_WEIGHTS
+        assert "lsp_graph" in captured["weights"]
+        assert captured["weights"]["lsp_graph"] > 0.0
+
    def test_reranking_enabled(self, tmp_path):
        """Reranking runs only when explicitly enabled via config."""
        from unittest.mock import patch
--- a/codex-lens/tests/test_hybrid_search_reranker_backend.py
+++ b/codex-lens/tests/test_hybrid_search_reranker_backend.py
@@ -93,7 +93,8 @@ def test_get_cross_encoder_reranker_uses_factory_backend_onnx_gpu_flag(
        enable_reranking=True,
        enable_cross_encoder_rerank=True,
        reranker_backend="onnx",
-        embedding_use_gpu=False,
+        embedding_use_gpu=True,
+        reranker_use_gpu=False,
    )
    engine = HybridSearchEngine(config=config)

@@ -109,6 +110,58 @@ def test_get_cross_encoder_reranker_uses_factory_backend_onnx_gpu_flag(
    assert get_args["kwargs"]["use_gpu"] is False


+def test_get_cross_encoder_reranker_uses_cpu_device_for_legacy_when_reranker_gpu_disabled(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path,
+) -> None:
+    calls: dict[str, object] = {}
+
+    def fake_check_reranker_available(backend: str):
+        calls["check_backend"] = backend
+        return True, None
+
+    sentinel = object()
+
+    def fake_get_reranker(*, backend: str, model_name=None, device=None, **kwargs):
+        calls["get_args"] = {
+            "backend": backend,
+            "model_name": model_name,
+            "device": device,
+            "kwargs": kwargs,
+        }
+        return sentinel
+
+    monkeypatch.setattr(
+        "codexlens.semantic.reranker.check_reranker_available",
+        fake_check_reranker_available,
+    )
+    monkeypatch.setattr(
+        "codexlens.semantic.reranker.get_reranker",
+        fake_get_reranker,
+    )
+
+    config = Config(
+        data_dir=tmp_path / "legacy-cpu",
+        enable_reranking=True,
+        enable_cross_encoder_rerank=True,
+        reranker_backend="legacy",
+        reranker_model="dummy-model",
+        embedding_use_gpu=True,
+        reranker_use_gpu=False,
+    )
+    engine = HybridSearchEngine(config=config)
+
+    reranker = engine._get_cross_encoder_reranker()
+    assert reranker is sentinel
+    assert calls["check_backend"] == "legacy"
+
+    get_args = calls["get_args"]
+    assert isinstance(get_args, dict)
+    assert get_args["backend"] == "legacy"
+    assert get_args["model_name"] == "dummy-model"
+    assert get_args["device"] == "cpu"
+
+
 def test_get_cross_encoder_reranker_returns_none_when_backend_unavailable(
    monkeypatch: pytest.MonkeyPatch,
    tmp_path,
--- a/codex-lens/tests/test_hybrid_search_unit.py
+++ b/codex-lens/tests/test_hybrid_search_unit.py
@@ -150,6 +150,30 @@ class TestHybridSearchBackends:
            assert "exact" in backends
            assert "vector" in backends

+    def test_search_lexical_priority_query_skips_vector_backend(self, temp_paths, mock_config):
+        """Config/env/factory queries should stay lexical-first in hybrid mode."""
+        engine = HybridSearchEngine(config=mock_config)
+        index_path = temp_paths / "_index.db"
+
+        with patch.object(engine, "_search_parallel") as mock_parallel:
+            mock_parallel.return_value = {
+                "exact": [SearchResult(path="config.py", score=10.0, excerpt="exact")],
+                "fuzzy": [SearchResult(path="env_config.py", score=8.0, excerpt="fuzzy")],
+            }
+
+            results = engine.search(
+                index_path,
+                "embedding backend fastembed local litellm api config",
+                enable_fuzzy=True,
+                enable_vector=True,
+            )
+
+            assert len(results) >= 1
+            backends = mock_parallel.call_args[0][2]
+            assert "exact" in backends
+            assert "fuzzy" in backends
+            assert "vector" not in backends
+
    def test_search_pure_vector(self, temp_paths, mock_config):
        """Pure vector mode should only use vector backend."""
        engine = HybridSearchEngine(config=mock_config)
@@ -257,6 +281,39 @@ class TestHybridSearchFusion:

                mock_rerank.assert_called_once()

+    def test_search_lexical_priority_query_skips_expensive_reranking(self, temp_paths, mock_config):
+        """Lexical-priority queries should bypass embedder and cross-encoder reranking."""
+        mock_config.enable_reranking = True
+        mock_config.enable_cross_encoder_rerank = True
+        mock_config.reranking_top_k = 50
+        mock_config.reranker_top_k = 20
+        engine = HybridSearchEngine(config=mock_config)
+        index_path = temp_paths / "_index.db"
+
+        with patch.object(engine, "_search_parallel") as mock_parallel:
+            mock_parallel.return_value = {
+                "exact": [SearchResult(path="config.py", score=10.0, excerpt="code")],
+                "fuzzy": [SearchResult(path="env_config.py", score=9.0, excerpt="env vars")],
+            }
+
+            with patch("codexlens.search.hybrid_search.rerank_results") as mock_rerank, patch(
+                "codexlens.search.hybrid_search.cross_encoder_rerank"
+            ) as mock_cross_encoder, patch.object(
+                engine,
+                "_get_cross_encoder_reranker",
+            ) as mock_get_reranker:
+                results = engine.search(
+                    index_path,
+                    "get_reranker factory onnx backend selection",
+                    enable_fuzzy=True,
+                    enable_vector=True,
+                )
+
+                assert len(results) >= 1
+                mock_rerank.assert_not_called()
+                mock_cross_encoder.assert_not_called()
+                mock_get_reranker.assert_not_called()
+
    def test_search_category_filtering(self, temp_paths, mock_config):
        """Category filtering should separate code/doc results by intent."""
        mock_config.enable_category_filter = True
@@ -316,6 +373,217 @@ class TestSearchParallel:
            mock_fuzzy.assert_called_once()


+class TestCentralizedMetadataFetch:
+    """Tests for centralized metadata retrieval helpers."""
+
+    def test_fetch_from_vector_meta_store_clamps_negative_scores(self, temp_paths, mock_config, monkeypatch):
+        engine = HybridSearchEngine(config=mock_config)
+
+        class FakeMetaStore:
+            def __init__(self, _path):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def get_chunks_by_ids(self, _chunk_ids, category=None):
+                assert category is None
+                return [
+                    {
+                        "chunk_id": 7,
+                        "file_path": "src/app.py",
+                        "content": "def app(): pass",
+                        "metadata": {},
+                        "start_line": 1,
+                        "end_line": 1,
+                    }
+                ]
+
+        import codexlens.storage.vector_meta_store as vector_meta_store
+
+        monkeypatch.setattr(vector_meta_store, "VectorMetadataStore", FakeMetaStore)
+
+        results = engine._fetch_from_vector_meta_store(
+            temp_paths / "_vectors_meta.db",
+            [7],
+            {7: -0.01},
+        )
+
+        assert len(results) == 1
+        assert results[0].path == "src/app.py"
+        assert results[0].score == 0.0
+
+
+class TestCentralizedVectorCaching:
+    """Tests for centralized vector search runtime caches."""
+
+    def test_search_vector_centralized_reuses_cached_resources(
+        self,
+        temp_paths,
+        mock_config,
+    ):
+        engine = HybridSearchEngine(config=mock_config)
+        hnsw_path = temp_paths / "_vectors.hnsw"
+        hnsw_path.write_bytes(b"hnsw")
+
+        vector_store_opened: List[Path] = []
+
+        class FakeVectorStore:
+            def __init__(self, path):
+                vector_store_opened.append(Path(path))
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def get_model_config(self):
+                return {
+                    "backend": "fastembed",
+                    "model_name": "BAAI/bge-small-en-v1.5",
+                    "model_profile": "fast",
+                    "embedding_dim": 384,
+                }
+
+        class FakeEmbedder:
+            embedding_dim = 384
+
+            def __init__(self):
+                self.embed_calls: List[str] = []
+
+            def embed_single(self, query):
+                self.embed_calls.append(query)
+                return [0.1, 0.2, 0.3]
+
+        class FakeAnnIndex:
+            def __init__(self):
+                self.load_calls = 0
+                self.search_calls = 0
+
+            def load(self):
+                self.load_calls += 1
+                return True
+
+            def count(self):
+                return 3
+
+            def search(self, _query_vec, top_k):
+                self.search_calls += 1
+                assert top_k == 10
+                return [7], [0.2]
+
+        fake_embedder = FakeEmbedder()
+        fake_ann_index = FakeAnnIndex()
+
+        with patch("codexlens.semantic.vector_store.VectorStore", FakeVectorStore), patch(
+            "codexlens.semantic.factory.get_embedder",
+            return_value=fake_embedder,
+        ) as mock_get_embedder, patch(
+            "codexlens.semantic.ann_index.ANNIndex.create_central",
+            return_value=fake_ann_index,
+        ) as mock_create_central, patch.object(
+            engine,
+            "_fetch_chunks_by_ids_centralized",
+            return_value=[SearchResult(path="src/app.py", score=0.8, excerpt="hit")],
+        ) as mock_fetch:
+            first = engine._search_vector_centralized(
+                temp_paths / "child-a" / "_index.db",
+                hnsw_path,
+                "smart search routing",
+                limit=5,
+            )
+            second = engine._search_vector_centralized(
+                temp_paths / "child-b" / "_index.db",
+                hnsw_path,
+                "smart search routing",
+                limit=5,
+            )
+
+        assert [result.path for result in first] == ["src/app.py"]
+        assert [result.path for result in second] == ["src/app.py"]
+        assert vector_store_opened == [temp_paths / "_index.db"]
+        assert mock_get_embedder.call_count == 1
+        assert mock_create_central.call_count == 1
+        assert fake_ann_index.load_calls == 1
+        assert fake_embedder.embed_calls == ["smart search routing"]
+        assert fake_ann_index.search_calls == 2
+        assert mock_fetch.call_count == 2
+
+    def test_search_vector_centralized_respects_embedding_use_gpu(
+        self,
+        temp_paths,
+        mock_config,
+    ):
+        engine = HybridSearchEngine(config=mock_config)
+        hnsw_path = temp_paths / "_vectors.hnsw"
+        hnsw_path.write_bytes(b"hnsw")
+
+        class FakeVectorStore:
+            def __init__(self, _path):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def get_model_config(self):
+                return {
+                    "backend": "fastembed",
+                    "model_name": "BAAI/bge-small-en-v1.5",
+                    "model_profile": "code",
+                    "embedding_dim": 384,
+                }
+
+        class FakeEmbedder:
+            embedding_dim = 384
+
+            def embed_single(self, _query):
+                return [0.1, 0.2]
+
+        class FakeAnnIndex:
+            def load(self):
+                return True
+
+            def count(self):
+                return 1
+
+            def search(self, _query_vec, top_k):
+                assert top_k == 6
+                return [9], [0.1]
+
+        with patch("codexlens.semantic.vector_store.VectorStore", FakeVectorStore), patch(
+            "codexlens.semantic.factory.get_embedder",
+            return_value=FakeEmbedder(),
+        ) as mock_get_embedder, patch(
+            "codexlens.semantic.ann_index.ANNIndex.create_central",
+            return_value=FakeAnnIndex(),
+        ), patch.object(
+            engine,
+            "_fetch_chunks_by_ids_centralized",
+            return_value=[SearchResult(path="src/app.py", score=0.9, excerpt="hit")],
+        ):
+            results = engine._search_vector_centralized(
+                temp_paths / "_index.db",
+                hnsw_path,
+                "semantic query",
+                limit=3,
+            )
+
+        assert len(results) == 1
+        assert mock_get_embedder.call_count == 1
+        assert mock_get_embedder.call_args.kwargs == {
+            "backend": "fastembed",
+            "profile": "code",
+            "use_gpu": False,
+        }
+
+
 # =============================================================================
 # Tests: _search_lsp_graph
 # =============================================================================
--- a/codex-lens/tests/test_index_status_cli_contract.py
+++ b/codex-lens/tests/test_index_status_cli_contract.py
@@ -0,0 +1,674 @@
+import json
+
+from typer.testing import CliRunner
+
+import codexlens.cli.commands as commands
+from codexlens.cli.commands import app
+import codexlens.cli.embedding_manager as embedding_manager
+from codexlens.config import Config
+from codexlens.entities import SearchResult
+from codexlens.search.chain_search import ChainSearchResult, SearchStats
+
+
+def test_index_status_json_preserves_legacy_embeddings_contract(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+    (workspace / "_index.db").touch()
+
+    legacy_summary = {
+        "total_indexes": 3,
+        "indexes_with_embeddings": 1,
+        "total_chunks": 42,
+        "indexes": [
+            {
+                "project": "child",
+                "path": str(workspace / "child" / "_index.db"),
+                "has_embeddings": True,
+                "total_chunks": 42,
+                "total_files": 1,
+                "coverage_percent": 100.0,
+            }
+        ],
+    }
+    root_status = {
+        "total_indexes": 3,
+        "total_files": 2,
+        "files_with_embeddings": 0,
+        "files_without_embeddings": 2,
+        "total_chunks": 0,
+        "coverage_percent": 0.0,
+        "indexes_with_embeddings": 1,
+        "indexes_without_embeddings": 2,
+        "model_info": None,
+        "root": {
+            "index_path": str(workspace / "_index.db"),
+            "exists": False,
+            "total_files": 2,
+            "files_with_embeddings": 0,
+            "files_without_embeddings": 2,
+            "total_chunks": 0,
+            "coverage_percent": 0.0,
+            "has_embeddings": False,
+            "storage_mode": "none",
+        },
+        "subtree": {
+            "total_indexes": 3,
+            "total_files": 3,
+            "files_with_embeddings": 1,
+            "files_without_embeddings": 2,
+            "total_chunks": 42,
+            "coverage_percent": 33.3,
+            "indexes_with_embeddings": 1,
+            "indexes_without_embeddings": 2,
+        },
+        "centralized": {
+            "dense_index_exists": False,
+            "binary_index_exists": False,
+            "dense_ready": False,
+            "binary_ready": False,
+            "usable": False,
+            "chunk_metadata_rows": 0,
+            "binary_vector_rows": 0,
+            "files_with_embeddings": 0,
+        },
+    }
+
+    monkeypatch.setattr(
+        embedding_manager,
+        "get_embeddings_status",
+        lambda _index_root: {"success": True, "result": root_status},
+    )
+    monkeypatch.setattr(
+        embedding_manager,
+        "get_embedding_stats_summary",
+        lambda _index_root: {"success": True, "result": legacy_summary},
+    )
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type(
+            "FakeRegistryStore",
+            (),
+            {
+                "initialize": lambda self: None,
+                "close": lambda self: None,
+            },
+        ),
+    )
+    monkeypatch.setattr(
+        commands,
+        "PathMapper",
+        type(
+            "FakePathMapper",
+            (),
+            {
+                "source_to_index_db": lambda self, _target_path: workspace / "_index.db",
+            },
+        ),
+    )
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["index", "status", str(workspace), "--json"])
+
+    assert result.exit_code == 0, result.output
+    payload = json.loads(result.stdout)
+    body = payload["result"]
+    assert body["embeddings"] == legacy_summary
+    assert body["embeddings_error"] is None
+    assert body["embeddings_status"] == root_status
+    assert body["embeddings_status_error"] is None
+    assert body["embeddings_summary"] == legacy_summary
+
+
+def test_search_json_preserves_dense_rerank_method_label(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    search_result = ChainSearchResult(
+        query="greet function",
+        results=[
+            SearchResult(
+                path=str(workspace / "src" / "app.py"),
+                score=0.97,
+                excerpt="def greet(name):",
+                content="def greet(name):\n    return f'hello {name}'\n",
+            )
+        ],
+        symbols=[],
+        stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=12.5),
+    )
+    captured: dict[str, object] = {}
+
+    monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data")))
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type(
+            "FakeRegistryStore",
+            (),
+            {
+                "initialize": lambda self: None,
+                "close": lambda self: None,
+            },
+        ),
+    )
+    monkeypatch.setattr(
+        commands,
+        "PathMapper",
+        type(
+            "FakePathMapper",
+            (),
+            {},
+        ),
+    )
+
+    class FakeChainSearchEngine:
+        def __init__(self, registry, mapper, config=None):
+            captured["registry"] = registry
+            captured["mapper"] = mapper
+            captured["config"] = config
+
+        def search(self, *_args, **_kwargs):
+            raise AssertionError("dense_rerank should dispatch via cascade_search")
+
+        def cascade_search(self, query, source_path, k=10, options=None, strategy=None):
+            captured["query"] = query
+            captured["source_path"] = source_path
+            captured["limit"] = k
+            captured["options"] = options
+            captured["strategy"] = strategy
+            return search_result
+
+    monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["search", "greet function", "--path", str(workspace), "--method", "dense_rerank", "--json"],
+    )
+
+    assert result.exit_code == 0, result.output
+    payload = json.loads(result.stdout)
+    body = payload["result"]
+    assert body["method"] == "dense_rerank"
+    assert body["count"] == 1
+    assert body["results"][0]["path"] == str(workspace / "src" / "app.py")
+    assert captured["strategy"] == "dense_rerank"
+    assert captured["limit"] == 20
+
+
+def test_search_json_auto_routes_keyword_queries_to_fts(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    search_result = ChainSearchResult(
+        query="windowsHide",
+        results=[
+            SearchResult(
+                path=str(workspace / "src" / "spawn.ts"),
+                score=0.91,
+                excerpt="windowsHide: true",
+                content="spawn('node', [], { windowsHide: true })",
+            )
+        ],
+        symbols=[],
+        stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=8.0),
+    )
+    captured: dict[str, object] = {}
+
+    monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data")))
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}),
+    )
+    monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {}))
+
+    class FakeChainSearchEngine:
+        def __init__(self, registry, mapper, config=None):
+            captured["config"] = config
+
+        def search(self, query, source_path, options=None):
+            captured["query"] = query
+            captured["source_path"] = source_path
+            captured["options"] = options
+            return search_result
+
+        def cascade_search(self, *_args, **_kwargs):
+            raise AssertionError("auto keyword queries should not dispatch to cascade_search")
+
+    monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["search", "windowsHide", "--path", str(workspace), "--json"],
+    )
+
+    assert result.exit_code == 0, result.output
+    body = json.loads(result.stdout)["result"]
+    assert body["method"] == "fts"
+    assert captured["options"].enable_vector is False
+    assert captured["options"].hybrid_mode is False
+
+
+def test_search_json_auto_routes_mixed_queries_to_hybrid(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    search_result = ChainSearchResult(
+        query="how does my_function work",
+        results=[
+            SearchResult(
+                path=str(workspace / "src" / "app.py"),
+                score=0.81,
+                excerpt="def my_function():",
+                content="def my_function():\n    return 1\n",
+            )
+        ],
+        symbols=[],
+        stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=10.0),
+    )
+    captured: dict[str, object] = {}
+
+    monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data")))
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}),
+    )
+    monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {}))
+
+    class FakeChainSearchEngine:
+        def __init__(self, registry, mapper, config=None):
+            captured["config"] = config
+
+        def search(self, query, source_path, options=None):
+            captured["query"] = query
+            captured["source_path"] = source_path
+            captured["options"] = options
+            return search_result
+
+        def cascade_search(self, *_args, **_kwargs):
+            raise AssertionError("mixed auto queries should not dispatch to cascade_search")
+
+    monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["search", "how does my_function work", "--path", str(workspace), "--json"],
+    )
+
+    assert result.exit_code == 0, result.output
+    body = json.loads(result.stdout)["result"]
+    assert body["method"] == "hybrid"
+    assert captured["options"].enable_vector is True
+    assert captured["options"].hybrid_mode is True
+    assert captured["options"].enable_cascade is False
+
+
+def test_search_json_auto_routes_generated_artifact_queries_to_fts(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    search_result = ChainSearchResult(
+        query="dist bundle output",
+        results=[
+            SearchResult(
+                path=str(workspace / "dist" / "bundle.js"),
+                score=0.77,
+                excerpt="bundle output",
+                content="console.log('bundle')",
+            )
+        ],
+        symbols=[],
+        stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=9.0),
+    )
+    captured: dict[str, object] = {}
+
+    monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data")))
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}),
+    )
+    monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {}))
+
+    class FakeChainSearchEngine:
+        def __init__(self, registry, mapper, config=None):
+            captured["config"] = config
+
+        def search(self, query, source_path, options=None):
+            captured["query"] = query
+            captured["source_path"] = source_path
+            captured["options"] = options
+            return search_result
+
+        def cascade_search(self, *_args, **_kwargs):
+            raise AssertionError("generated artifact auto queries should not dispatch to cascade_search")
+
+    monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["search", "dist bundle output", "--path", str(workspace), "--json"],
+    )
+
+    assert result.exit_code == 0, result.output
+    body = json.loads(result.stdout)["result"]
+    assert body["method"] == "fts"
+    assert captured["options"].enable_vector is False
+    assert captured["options"].hybrid_mode is False
+
+
+def test_auto_select_search_method_prefers_fts_for_lexical_config_queries() -> None:
+    assert commands._auto_select_search_method("embedding backend fastembed local litellm api config") == "fts"
+    assert commands._auto_select_search_method("get_reranker factory onnx backend selection") == "fts"
+    assert commands._auto_select_search_method("how to authenticate users safely?") == "dense_rerank"
+
+
+def test_search_json_fts_zero_results_uses_filesystem_fallback(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    indexed_result = ChainSearchResult(
+        query="find_descendant_project_roots",
+        results=[],
+        symbols=[],
+        stats=SearchStats(dirs_searched=3, files_matched=0, time_ms=7.5),
+    )
+    fallback_result = SearchResult(
+        path=str(workspace / "src" / "registry.py"),
+        score=1.0,
+        excerpt="def find_descendant_project_roots(...):",
+        content=None,
+        metadata={
+            "filesystem_fallback": True,
+            "backend": "ripgrep-fallback",
+            "stale_index_suspected": True,
+        },
+        start_line=12,
+        end_line=12,
+    )
+    captured: dict[str, object] = {"fallback_calls": 0}
+
+    monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data")))
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}),
+    )
+    monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {}))
+
+    class FakeChainSearchEngine:
+        def __init__(self, registry, mapper, config=None):
+            captured["config"] = config
+
+        def search(self, query, source_path, options=None):
+            captured["query"] = query
+            captured["source_path"] = source_path
+            captured["options"] = options
+            return indexed_result
+
+        def cascade_search(self, *_args, **_kwargs):
+            raise AssertionError("fts zero-result queries should not dispatch to cascade_search")
+
+    def fake_fallback(query, source_path, *, limit, config, code_only=False, exclude_extensions=None):
+        captured["fallback_calls"] = int(captured["fallback_calls"]) + 1
+        captured["fallback_query"] = query
+        captured["fallback_path"] = source_path
+        captured["fallback_limit"] = limit
+        captured["fallback_code_only"] = code_only
+        captured["fallback_exclude_extensions"] = exclude_extensions
+        return {
+            "results": [fallback_result],
+            "time_ms": 2.5,
+            "fallback": {
+                "backend": "ripgrep-fallback",
+                "stale_index_suspected": True,
+                "reason": "Indexed FTS search returned no results; filesystem fallback used.",
+            },
+        }
+
+    monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine)
+    monkeypatch.setattr(commands, "_filesystem_fallback_search", fake_fallback)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["search", "find_descendant_project_roots", "--method", "fts", "--path", str(workspace), "--json"],
+    )
+
+    assert result.exit_code == 0, result.output
+    body = json.loads(result.stdout)["result"]
+    assert body["method"] == "fts"
+    assert body["count"] == 1
+    assert body["results"][0]["path"] == str(workspace / "src" / "registry.py")
+    assert body["results"][0]["excerpt"] == "def find_descendant_project_roots(...):"
+    assert body["stats"]["files_matched"] == 1
+    assert body["stats"]["time_ms"] == 10.0
+    assert body["fallback"] == {
+        "backend": "ripgrep-fallback",
+        "stale_index_suspected": True,
+        "reason": "Indexed FTS search returned no results; filesystem fallback used.",
+    }
+    assert captured["fallback_calls"] == 1
+    assert captured["fallback_query"] == "find_descendant_project_roots"
+    assert captured["fallback_path"] == workspace
+    assert captured["fallback_limit"] == 20
+    assert captured["options"].enable_vector is False
+    assert captured["options"].hybrid_mode is False
+
+
+def test_search_json_hybrid_zero_results_does_not_use_filesystem_fallback(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    indexed_result = ChainSearchResult(
+        query="how does my_function work",
+        results=[],
+        symbols=[],
+        stats=SearchStats(dirs_searched=4, files_matched=0, time_ms=11.0),
+    )
+    captured: dict[str, object] = {"fallback_calls": 0}
+
+    monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data")))
+    monkeypatch.setattr(
+        commands,
+        "RegistryStore",
+        type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}),
+    )
+    monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {}))
+
+    class FakeChainSearchEngine:
+        def __init__(self, registry, mapper, config=None):
+            captured["config"] = config
+
+        def search(self, query, source_path, options=None):
+            captured["query"] = query
+            captured["source_path"] = source_path
+            captured["options"] = options
+            return indexed_result
+
+        def cascade_search(self, *_args, **_kwargs):
+            raise AssertionError("hybrid queries should not dispatch to cascade_search")
+
+    def fake_fallback(*_args, **_kwargs):
+        captured["fallback_calls"] = int(captured["fallback_calls"]) + 1
+        return None
+
+    monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine)
+    monkeypatch.setattr(commands, "_filesystem_fallback_search", fake_fallback)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        app,
+        ["search", "how does my_function work", "--path", str(workspace), "--json"],
+    )
+
+    assert result.exit_code == 0, result.output
+    body = json.loads(result.stdout)["result"]
+    assert body["method"] == "hybrid"
+    assert body["count"] == 0
+    assert "fallback" not in body
+    assert body["stats"]["files_matched"] == 0
+    assert body["stats"]["time_ms"] == 11.0
+    assert captured["fallback_calls"] == 0
+    assert captured["options"].enable_vector is True
+    assert captured["options"].hybrid_mode is True
+
+
+def test_filesystem_fallback_search_prefers_source_definitions_for_keyword_queries(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+
+    source_path = workspace / "src" / "registry.py"
+    test_path = workspace / "tests" / "test_registry.py"
+    ref_path = workspace / "src" / "chain_search.py"
+
+    match_lines = [
+        {
+            "type": "match",
+            "data": {
+                "path": {"text": str(test_path)},
+                "lines": {"text": "def test_find_descendant_project_roots_returns_nested_project_roots():\n"},
+                "line_number": 12,
+            },
+        },
+        {
+            "type": "match",
+            "data": {
+                "path": {"text": str(source_path)},
+                "lines": {"text": "def find_descendant_project_roots(self, source_root: Path) -> List[DirMapping]:\n"},
+                "line_number": 48,
+            },
+        },
+        {
+            "type": "match",
+            "data": {
+                "path": {"text": str(ref_path)},
+                "lines": {"text": "descendant_roots = self.registry.find_descendant_project_roots(source_root)\n"},
+                "line_number": 91,
+            },
+        },
+    ]
+
+    monkeypatch.setattr(commands.shutil, "which", lambda _name: "rg")
+    monkeypatch.setattr(
+        commands.subprocess,
+        "run",
+        lambda *_args, **_kwargs: type(
+            "FakeCompletedProcess",
+            (),
+            {
+                "returncode": 0,
+                "stdout": "\n".join(json.dumps(line) for line in match_lines),
+                "stderr": "",
+            },
+        )(),
+    )
+
+    fallback = commands._filesystem_fallback_search(
+        "find_descendant_project_roots",
+        workspace,
+        limit=5,
+        config=Config(data_dir=tmp_path / "data"),
+    )
+
+    assert fallback is not None
+    assert fallback["fallback"]["backend"] == "ripgrep-fallback"
+    assert fallback["results"][0].path == str(source_path)
+    assert fallback["results"][1].path == str(ref_path)
+    assert fallback["results"][2].path == str(test_path)
+    assert fallback["results"][0].score > fallback["results"][1].score > fallback["results"][2].score
+
+
+def test_clean_json_reports_partial_success_when_locked_files_remain(
+    monkeypatch,
+    tmp_path,
+) -> None:
+    workspace = tmp_path / "workspace"
+    project_index = tmp_path / "indexes" / "workspace"
+    project_index.mkdir(parents=True)
+    (project_index / "_index.db").write_text("db", encoding="utf-8")
+    locked_path = project_index / "nested" / "_index.db"
+    locked_path.parent.mkdir(parents=True)
+    locked_path.write_text("locked", encoding="utf-8")
+
+    captured: dict[str, object] = {}
+
+    class FakePathMapper:
+        def __init__(self):
+            self.index_root = tmp_path / "indexes"
+
+        def source_to_index_dir(self, source_path):
+            captured["mapped_source"] = source_path
+            return project_index
+
+    class FakeRegistryStore:
+        def initialize(self):
+            captured["registry_initialized"] = True
+
+        def unregister_project(self, source_path):
+            captured["unregistered_project"] = source_path
+            return True
+
+        def close(self):
+            captured["registry_closed"] = True
+
+    def fake_remove_tree(target):
+        captured["removed_target"] = target
+        return {
+            "removed": False,
+            "partial": True,
+            "locked_paths": [str(locked_path)],
+            "remaining_path": str(project_index),
+            "errors": [],
+        }
+
+    monkeypatch.setattr(commands, "PathMapper", FakePathMapper)
+    monkeypatch.setattr(commands, "RegistryStore", FakeRegistryStore)
+    monkeypatch.setattr(commands, "_remove_tree_best_effort", fake_remove_tree)
+
+    runner = CliRunner()
+    result = runner.invoke(app, ["clean", str(workspace), "--json"])
+
+    assert result.exit_code == 0, result.output
+    payload = json.loads(result.stdout)
+    body = payload["result"]
+    assert payload["success"] is True
+    assert body["cleaned"] == str(workspace.resolve())
+    assert body["index_path"] == str(project_index)
+    assert body["partial"] is True
+    assert body["locked_paths"] == [str(locked_path)]
+    assert body["remaining_path"] == str(project_index)
+    assert captured["registry_initialized"] is True
+    assert captured["registry_closed"] is True
+    assert captured["unregistered_project"] == workspace.resolve()
+    assert captured["removed_target"] == project_index
--- a/codex-lens/tests/test_index_tree_ignore_dirs.py
+++ b/codex-lens/tests/test_index_tree_ignore_dirs.py
@@ -5,7 +5,10 @@ from pathlib import Path
 from unittest.mock import MagicMock

 from codexlens.config import Config
-from codexlens.storage.index_tree import IndexTreeBuilder
+from codexlens.storage.dir_index import DirIndexStore
+from codexlens.storage.index_tree import DirBuildResult, IndexTreeBuilder
+from codexlens.storage.path_mapper import PathMapper
+from codexlens.storage.registry import RegistryStore


 def _relative_dirs(source_root: Path, dirs_by_depth: dict[int, list[Path]]) -> set[str]:
@@ -145,3 +148,148 @@ def test_builder_loads_saved_ignore_and_extension_filters_by_default(tmp_path: P

    assert [path.name for path in source_files] == ["app.ts"]
    assert "frontend/dist" not in discovered_dirs
+
+
+def test_prune_stale_project_dirs_removes_ignored_artifact_mappings(tmp_path: Path) -> None:
+    workspace = tmp_path / "workspace"
+    src_dir = workspace / "src"
+    dist_dir = workspace / "dist"
+    src_dir.mkdir(parents=True)
+    dist_dir.mkdir(parents=True)
+    (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8")
+    (dist_dir / "bundle.py").write_text("print('artifact')\n", encoding="utf-8")
+
+    mapper = PathMapper(index_root=tmp_path / "indexes")
+    registry = RegistryStore(db_path=tmp_path / "registry.db")
+    registry.initialize()
+    project = registry.register_project(workspace, mapper.source_to_index_dir(workspace))
+    registry.register_dir(project.id, workspace, mapper.source_to_index_db(workspace), depth=0)
+    registry.register_dir(project.id, src_dir, mapper.source_to_index_db(src_dir), depth=1)
+    registry.register_dir(project.id, dist_dir, mapper.source_to_index_db(dist_dir), depth=1)
+
+    builder = IndexTreeBuilder(
+        registry=registry,
+        mapper=mapper,
+        config=Config(data_dir=tmp_path / "data"),
+        incremental=False,
+    )
+
+    dirs_by_depth = builder._collect_dirs_by_depth(workspace)
+    pruned = builder._prune_stale_project_dirs(
+        project_id=project.id,
+        source_root=workspace,
+        dirs_by_depth=dirs_by_depth,
+    )
+
+    remaining = {mapping.source_path.resolve() for mapping in registry.get_project_dirs(project.id)}
+    registry.close()
+
+    assert dist_dir.resolve() in pruned
+    assert workspace.resolve() in remaining
+    assert src_dir.resolve() in remaining
+    assert dist_dir.resolve() not in remaining
+
+
+def test_force_full_build_prunes_stale_ignored_mappings(tmp_path: Path) -> None:
+    workspace = tmp_path / "workspace"
+    src_dir = workspace / "src"
+    dist_dir = workspace / "dist"
+    src_dir.mkdir(parents=True)
+    dist_dir.mkdir(parents=True)
+    (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8")
+    (dist_dir / "bundle.py").write_text("print('artifact')\n", encoding="utf-8")
+
+    mapper = PathMapper(index_root=tmp_path / "indexes")
+    registry = RegistryStore(db_path=tmp_path / "registry.db")
+    registry.initialize()
+    project = registry.register_project(workspace, mapper.source_to_index_dir(workspace))
+    registry.register_dir(project.id, workspace, mapper.source_to_index_db(workspace), depth=0)
+    registry.register_dir(project.id, dist_dir, mapper.source_to_index_db(dist_dir), depth=1)
+
+    builder = IndexTreeBuilder(
+        registry=registry,
+        mapper=mapper,
+        config=Config(
+            data_dir=tmp_path / "data",
+            global_symbol_index_enabled=False,
+        ),
+        incremental=False,
+    )
+
+    def fake_build_level_parallel(
+        dirs: list[Path],
+        languages,
+        workers,
+        *,
+        source_root: Path,
+        project_id: int,
+        global_index_db_path: Path,
+    ) -> list[DirBuildResult]:
+        return [
+            DirBuildResult(
+                source_path=dir_path,
+                index_path=mapper.source_to_index_db(dir_path),
+                files_count=1 if dir_path == src_dir else 0,
+                symbols_count=0,
+                subdirs=[],
+            )
+            for dir_path in dirs
+        ]
+
+    builder._build_level_parallel = fake_build_level_parallel  # type: ignore[method-assign]
+    builder._link_children_to_parent = MagicMock()
+
+    build_result = builder.build(workspace, force_full=True, workers=1)
+
+    remaining = {mapping.source_path.resolve() for mapping in registry.get_project_dirs(project.id)}
+    registry.close()
+
+    assert build_result.total_dirs == 2
+    assert workspace.resolve() in remaining
+    assert src_dir.resolve() in remaining
+    assert dist_dir.resolve() not in remaining
+
+
+def test_force_full_build_rewrites_directory_db_and_drops_stale_ignored_subdirs(
+    tmp_path: Path,
+) -> None:
+    project_root = tmp_path / "project"
+    src_dir = project_root / "src"
+    build_dir = project_root / "build"
+    src_dir.mkdir(parents=True)
+    build_dir.mkdir(parents=True)
+    (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8")
+    (build_dir / "generated.py").write_text("print('artifact')\n", encoding="utf-8")
+
+    mapper = PathMapper(index_root=tmp_path / "indexes")
+    registry = RegistryStore(db_path=tmp_path / "registry.db")
+    registry.initialize()
+    config = Config(
+        data_dir=tmp_path / "data",
+        global_symbol_index_enabled=False,
+    )
+
+    root_index_db = mapper.source_to_index_db(project_root)
+    with DirIndexStore(root_index_db, config=config) as store:
+        store.register_subdir(
+            name="build",
+            index_path=mapper.source_to_index_db(build_dir),
+            files_count=1,
+        )
+
+    builder = IndexTreeBuilder(
+        registry=registry,
+        mapper=mapper,
+        config=config,
+        incremental=False,
+    )
+
+    build_result = builder.build(project_root, force_full=True, workers=1)
+
+    with DirIndexStore(root_index_db, config=config) as store:
+        subdir_names = [link.name for link in store.get_subdirs()]
+
+    registry.close()
+
+    assert build_result.total_dirs == 2
+    assert subdir_names == ["src"]
--- a/codex-lens/tests/test_ranking.py
+++ b/codex-lens/tests/test_ranking.py
@@ -24,13 +24,24 @@ from codexlens.entities import SearchResult
 from codexlens.search.ranking import (
    DEFAULT_WEIGHTS,
    QueryIntent,
+    apply_path_penalties,
+    extract_explicit_path_hints,
+    cross_encoder_rerank,
    adjust_weights_by_intent,
    apply_symbol_boost,
    detect_query_intent,
    filter_results_by_category,
    get_rrf_weights,
    group_similar_results,
+    is_auxiliary_reference_path,
+    is_generated_artifact_path,
+    is_test_file,
    normalize_weights,
+    query_prefers_lexical_search,
+    query_targets_auxiliary_files,
+    query_targets_generated_files,
+    query_targets_test_files,
+    rebalance_noisy_results,
    reciprocal_rank_fusion,
    simple_weighted_fusion,
 )
@@ -73,6 +84,7 @@ class TestDetectQueryIntent:
    def test_detect_keyword_intent(self):
        """CamelCase/underscore queries should be detected as KEYWORD."""
        assert detect_query_intent("MyClassName") == QueryIntent.KEYWORD
+        assert detect_query_intent("windowsHide") == QueryIntent.KEYWORD
        assert detect_query_intent("my_function_name") == QueryIntent.KEYWORD
        assert detect_query_intent("foo::bar") == QueryIntent.KEYWORD

@@ -91,6 +103,25 @@ class TestDetectQueryIntent:
        assert detect_query_intent("") == QueryIntent.MIXED
        assert detect_query_intent("   ") == QueryIntent.MIXED

+    def test_query_targets_test_files(self):
+        """Queries explicitly mentioning tests should skip test penalties."""
+        assert query_targets_test_files("how do tests cover auth flow?")
+        assert query_targets_test_files("spec fixtures for parser")
+        assert not query_targets_test_files("windowsHide")
+
+    def test_query_targets_generated_files(self):
+        """Queries explicitly mentioning build artifacts should skip that penalty."""
+        assert query_targets_generated_files("inspect dist bundle output")
+        assert query_targets_generated_files("generated artifacts under build")
+        assert not query_targets_generated_files("cache invalidation strategy")
+
+    def test_query_prefers_lexical_search(self):
+        """Config/env/factory queries should prefer lexical-first routing."""
+        assert query_prefers_lexical_search("embedding backend fastembed local litellm api config")
+        assert query_prefers_lexical_search("get_reranker factory onnx backend selection")
+        assert query_prefers_lexical_search("EMBEDDING_BACKEND and RERANKER_BACKEND environment variables")
+        assert not query_prefers_lexical_search("how does smart search route keyword queries")
+

 # =============================================================================
 # Tests: adjust_weights_by_intent
@@ -129,6 +160,427 @@ class TestAdjustWeightsByIntent:
        assert adjusted["exact"] == pytest.approx(0.3, abs=0.01)


+class TestPathPenalties:
+    """Tests for lightweight path-based ranking penalties."""
+
+    def test_is_test_file(self):
+        assert is_test_file("/repo/tests/test_auth.py")
+        assert is_test_file("D:\\repo\\src\\auth.spec.ts")
+        assert is_test_file("/repo/frontend/src/pages/discoverypage.test.tsx")
+        assert is_test_file("/repo/frontend/src/pages/discoverypage.spec.jsx")
+        assert not is_test_file("/repo/src/auth.py")
+
+    def test_is_generated_artifact_path(self):
+        assert is_generated_artifact_path("/repo/dist/app.js")
+        assert is_generated_artifact_path("/repo/src/generated/client.ts")
+        assert is_generated_artifact_path("D:\\repo\\frontend\\.next\\server.js")
+        assert not is_generated_artifact_path("/repo/src/auth.py")
+
+    def test_is_auxiliary_reference_path(self):
+        assert is_auxiliary_reference_path("/repo/examples/auth_demo.py")
+        assert is_auxiliary_reference_path("/repo/benchmarks/search_eval.py")
+        assert is_auxiliary_reference_path("/repo/tools/debug_search.py")
+        assert not is_auxiliary_reference_path("/repo/src/auth.py")
+
+    def test_query_targets_auxiliary_files(self):
+        assert query_targets_auxiliary_files("show smart search examples")
+        assert query_targets_auxiliary_files("benchmark smart search")
+        assert not query_targets_auxiliary_files("smart search routing")
+
+    def test_apply_path_penalties_demotes_test_files(self):
+        results = [
+            _make_result(path="/repo/tests/test_auth.py", score=10.0),
+            _make_result(path="/repo/src/auth.py", score=9.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "authenticate user",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/auth.py"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"]
+
+    def test_apply_path_penalties_more_aggressively_demotes_tests_for_keyword_queries(self):
+        results = [
+            _make_result(path="/repo/tests/test_auth.py", score=5.0),
+            _make_result(path="/repo/src/auth.py", score=4.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "find_descendant_project_roots",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/auth.py"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"]
+        assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.55)
+        assert penalized[1].metadata["path_rank_multiplier"] == pytest.approx(0.55)
+
+    def test_apply_path_penalties_more_aggressively_demotes_tests_for_semantic_queries(self):
+        results = [
+            _make_result(path="/repo/tests/test_auth.py", score=5.0),
+            _make_result(path="/repo/src/auth.py", score=4.1),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "how does auth routing work",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/auth.py"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"]
+        assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.75)
+
+    def test_apply_path_penalties_boosts_source_definitions_for_identifier_queries(self):
+        results = [
+            _make_result(
+                path="/repo/tests/test_registry.py",
+                score=4.2,
+                excerpt='query="find_descendant_project_roots"',
+            ),
+            _make_result(
+                path="/repo/src/registry.py",
+                score=3.0,
+                excerpt="def find_descendant_project_roots(self, source_root: Path) -> list[str]:",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "find_descendant_project_roots",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/registry.py"
+        assert penalized[0].metadata["path_boost_reasons"] == ["source_definition"]
+        assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(2.0)
+        assert penalized[0].metadata["path_rank_multiplier"] == pytest.approx(2.0)
+        assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"]
+
+    def test_apply_path_penalties_boosts_source_paths_for_semantic_feature_queries(self):
+        results = [
+            _make_result(
+                path="/repo/tests/smart-search-intent.test.js",
+                score=0.832,
+                excerpt="describes how smart search routes keyword queries",
+            ),
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=0.555,
+                excerpt="smart search keyword routing logic",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "how does smart search route keyword queries",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/tools/smart-search.ts"
+        assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"]
+        assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(1.35)
+        assert penalized[0].metadata["path_boost_overlap_tokens"] == ["smart", "search"]
+        assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"]
+
+    def test_apply_path_penalties_strongly_boosts_keyword_basename_overlap(self):
+        results = [
+            _make_result(
+                path="/repo/src/tools/core-memory.ts",
+                score=0.04032417772512223,
+                excerpt="memory listing helpers",
+            ),
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=0.009836065573770493,
+                excerpt="smart search keyword routing logic",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "executeHybridMode dense_rerank semantic smart_search",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/tools/smart-search.ts"
+        assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"]
+        assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(4.5)
+        assert penalized[0].metadata["path_boost_overlap_tokens"] == ["smart", "search"]
+
+    def test_extract_explicit_path_hints_ignores_generic_platform_terms(self):
+        assert extract_explicit_path_hints(
+            "parse CodexLens JSON output strip ANSI smart_search",
+        ) == [["smart", "search"]]
+
+    def test_apply_path_penalties_prefers_explicit_feature_hint_over_platform_terms(self):
+        results = [
+            _make_result(
+                path="/repo/src/tools/codex-lens-lsp.ts",
+                score=0.045,
+                excerpt="CodexLens LSP bridge",
+            ),
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=0.03,
+                excerpt="parse JSON output and strip ANSI for plain-text fallback",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "parse CodexLens JSON output strip ANSI smart_search",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/tools/smart-search.ts"
+        assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"]
+        assert penalized[0].metadata["path_boost_overlap_tokens"] == ["smart", "search"]
+
+    def test_apply_path_penalties_strongly_boosts_lexical_config_modules(self):
+        results = [
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=22.07,
+                excerpt="embedding backend local api config routing",
+            ),
+            _make_result(
+                path="/repo/src/codexlens/config.py",
+                score=4.88,
+                excerpt="embedding_backend = 'fastembed'",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "embedding backend fastembed local litellm api config",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/codexlens/config.py"
+        assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"]
+        assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(5.0)
+        assert penalized[0].metadata["path_boost_overlap_tokens"] == ["config"]
+
+    def test_apply_path_penalties_more_aggressively_demotes_tests_for_explicit_feature_queries(self):
+        results = [
+            _make_result(
+                path="/repo/tests/smart-search-intent.test.js",
+                score=1.0,
+                excerpt="smart search intent coverage",
+            ),
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=0.58,
+                excerpt="plain-text JSON fallback for smart search",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "parse CodexLens JSON output strip ANSI smart_search",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/tools/smart-search.ts"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"]
+        assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.55)
+
+    def test_apply_path_penalties_demotes_generated_artifacts(self):
+        results = [
+            _make_result(path="/repo/dist/auth.js", score=10.0),
+            _make_result(path="/repo/src/auth.ts", score=9.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "authenticate user",
+            generated_file_penalty=0.35,
+        )
+
+        assert penalized[0].path == "/repo/src/auth.ts"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["generated_artifact"]
+
+    def test_apply_path_penalties_more_aggressively_demotes_generated_artifacts_for_explicit_feature_queries(self):
+        results = [
+            _make_result(
+                path="/repo/dist/tools/smart-search.js",
+                score=1.0,
+                excerpt="built smart search output",
+            ),
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=0.45,
+                excerpt="plain-text JSON fallback for smart search",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "parse CodexLens JSON output strip ANSI smart_search",
+            generated_file_penalty=0.35,
+        )
+
+        assert penalized[0].path == "/repo/src/tools/smart-search.ts"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["generated_artifact"]
+        assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.4)
+
+    def test_apply_path_penalties_demotes_auxiliary_reference_files(self):
+        results = [
+            _make_result(path="/repo/examples/simple_search_comparison.py", score=10.0),
+            _make_result(path="/repo/src/search/router.py", score=9.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "how does smart search route keyword queries",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/search/router.py"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["auxiliary_file"]
+
+    def test_apply_path_penalties_more_aggressively_demotes_auxiliary_files_for_explicit_feature_queries(self):
+        results = [
+            _make_result(
+                path="/repo/benchmarks/smart_search_demo.py",
+                score=1.0,
+                excerpt="demo for smart search fallback",
+            ),
+            _make_result(
+                path="/repo/src/tools/smart-search.ts",
+                score=0.52,
+                excerpt="plain-text JSON fallback for smart search",
+            ),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "parse CodexLens JSON output strip ANSI smart_search",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/src/tools/smart-search.ts"
+        assert penalized[1].metadata["path_penalty_reasons"] == ["auxiliary_file"]
+        assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.5)
+
+    def test_apply_path_penalties_skips_when_query_targets_tests(self):
+        results = [
+            _make_result(path="/repo/tests/test_auth.py", score=10.0),
+            _make_result(path="/repo/src/auth.py", score=9.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "auth tests",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/tests/test_auth.py"
+
+    def test_apply_path_penalties_skips_generated_penalty_when_query_targets_artifacts(self):
+        results = [
+            _make_result(path="/repo/dist/auth.js", score=10.0),
+            _make_result(path="/repo/src/auth.ts", score=9.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "dist auth bundle",
+            generated_file_penalty=0.35,
+        )
+
+        assert penalized[0].path == "/repo/dist/auth.js"
+
+    def test_rebalance_noisy_results_pushes_explicit_feature_query_noise_behind_source_files(self):
+        results = [
+            _make_result(path="/repo/src/tools/smart-search.ts", score=0.9),
+            _make_result(path="/repo/tests/smart-search-intent.test.tsx", score=0.8),
+            _make_result(path="/repo/src/core/cli-routes.ts", score=0.7),
+            _make_result(path="/repo/dist/tools/smart-search.js", score=0.6),
+            _make_result(path="/repo/benchmarks/smart_search_demo.py", score=0.5),
+        ]
+
+        rebalanced = rebalance_noisy_results(
+            results,
+            "parse CodexLens JSON output strip ANSI smart_search",
+        )
+
+        assert [item.path for item in rebalanced[:2]] == [
+            "/repo/src/tools/smart-search.ts",
+            "/repo/src/core/cli-routes.ts",
+        ]
+
+    def test_rebalance_noisy_results_preserves_tests_when_query_targets_them(self):
+        results = [
+            _make_result(path="/repo/tests/smart-search-intent.test.tsx", score=0.9),
+            _make_result(path="/repo/src/tools/smart-search.ts", score=0.8),
+        ]
+
+        rebalanced = rebalance_noisy_results(results, "smart search tests")
+
+        assert [item.path for item in rebalanced] == [
+            "/repo/tests/smart-search-intent.test.tsx",
+            "/repo/src/tools/smart-search.ts",
+        ]
+
+    def test_apply_path_penalties_skips_auxiliary_penalty_when_query_targets_examples(self):
+        results = [
+            _make_result(path="/repo/examples/simple_search_comparison.py", score=10.0),
+            _make_result(path="/repo/src/search/router.py", score=9.0),
+        ]
+
+        penalized = apply_path_penalties(
+            results,
+            "smart search examples",
+            test_file_penalty=0.15,
+        )
+
+        assert penalized[0].path == "/repo/examples/simple_search_comparison.py"
+
+
+class TestCrossEncoderRerank:
+    """Tests for cross-encoder reranking edge cases."""
+
+    def test_cross_encoder_rerank_preserves_strong_source_candidates_for_semantic_feature_queries(self):
+        class DummyReranker:
+            def score_pairs(self, pairs, batch_size=32):
+                _ = (pairs, batch_size)
+                return [0.8323705792427063, 1.2463066923373844e-05]
+
+        reranked = cross_encoder_rerank(
+            "how does smart search route keyword queries",
+            [
+                _make_result(
+                    path="/repo/tests/smart-search-intent.test.js",
+                    score=0.5989155769348145,
+                    excerpt="describes how smart search routes keyword queries",
+                ),
+                _make_result(
+                    path="/repo/src/tools/smart-search.ts",
+                    score=0.554444432258606,
+                    excerpt="smart search keyword routing logic",
+                ),
+            ],
+            DummyReranker(),
+            top_k=2,
+        )
+        reranked = apply_path_penalties(
+            reranked,
+            "how does smart search route keyword queries",
+            test_file_penalty=0.15,
+        )
+
+        assert reranked[0].path == "/repo/src/tools/smart-search.ts"
+        assert reranked[0].metadata["cross_encoder_floor_reason"] == "semantic_source_path_overlap"
+        assert reranked[0].metadata["cross_encoder_floor_overlap_tokens"] == ["smart", "search"]
+        assert reranked[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"]
+        assert reranked[1].metadata["path_penalty_reasons"] == ["test_file"]
+
 # =============================================================================
 # Tests: get_rrf_weights
 # =============================================================================
--- a/codex-lens/tests/test_registry.py
+++ b/codex-lens/tests/test_registry.py
@@ -67,3 +67,60 @@ def test_find_nearest_index(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) ->
        assert found is not None
        assert found.id == mapping.id

+
+def test_find_descendant_project_roots_returns_nested_project_roots(tmp_path: Path) -> None:
+    db_path = tmp_path / "registry.db"
+    workspace_root = tmp_path / "workspace"
+    child_a = workspace_root / "packages" / "app-a"
+    child_b = workspace_root / "tools" / "app-b"
+    outside_root = tmp_path / "external"
+
+    with RegistryStore(db_path=db_path) as store:
+        workspace_project = store.register_project(
+            workspace_root,
+            tmp_path / "indexes" / "workspace",
+        )
+        child_a_project = store.register_project(
+            child_a,
+            tmp_path / "indexes" / "workspace" / "packages" / "app-a",
+        )
+        child_b_project = store.register_project(
+            child_b,
+            tmp_path / "indexes" / "workspace" / "tools" / "app-b",
+        )
+        outside_project = store.register_project(
+            outside_root,
+            tmp_path / "indexes" / "external",
+        )
+
+        store.register_dir(
+            workspace_project.id,
+            workspace_root,
+            tmp_path / "indexes" / "workspace" / "_index.db",
+            depth=0,
+        )
+        child_a_mapping = store.register_dir(
+            child_a_project.id,
+            child_a,
+            tmp_path / "indexes" / "workspace" / "packages" / "app-a" / "_index.db",
+            depth=0,
+        )
+        child_b_mapping = store.register_dir(
+            child_b_project.id,
+            child_b,
+            tmp_path / "indexes" / "workspace" / "tools" / "app-b" / "_index.db",
+            depth=0,
+        )
+        store.register_dir(
+            outside_project.id,
+            outside_root,
+            tmp_path / "indexes" / "external" / "_index.db",
+            depth=0,
+        )
+
+        descendants = store.find_descendant_project_roots(workspace_root)
+
+        assert [mapping.index_path for mapping in descendants] == [
+            child_a_mapping.index_path,
+            child_b_mapping.index_path,
+        ]
--- a/codex-lens/tests/test_reranker_factory.py
+++ b/codex-lens/tests/test_reranker_factory.py
@@ -313,3 +313,89 @@ def test_onnx_reranker_scores_pairs_with_sigmoid_normalization(

    expected = [1.0 / (1.0 + math.exp(-float(i))) for i in range(len(pairs))]
    assert scores == pytest.approx(expected, rel=1e-6, abs=1e-6)
+
+
+def test_onnx_reranker_splits_tuple_providers_into_provider_options(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    import numpy as np
+
+    captured: dict[str, object] = {}
+
+    dummy_onnxruntime = types.ModuleType("onnxruntime")
+
+    dummy_optimum = types.ModuleType("optimum")
+    dummy_optimum.__path__ = []
+    dummy_optimum_ort = types.ModuleType("optimum.onnxruntime")
+
+    class DummyModelOutput:
+        def __init__(self, logits: np.ndarray) -> None:
+            self.logits = logits
+
+    class DummyModel:
+        input_names = ["input_ids", "attention_mask"]
+
+        def __call__(self, **inputs):
+            batch = int(inputs["input_ids"].shape[0])
+            return DummyModelOutput(logits=np.zeros((batch, 1), dtype=np.float32))
+
+    class DummyORTModelForSequenceClassification:
+        @classmethod
+        def from_pretrained(
+            cls,
+            model_name: str,
+            providers=None,
+            provider_options=None,
+            **kwargs,
+        ):
+            captured["model_name"] = model_name
+            captured["providers"] = providers
+            captured["provider_options"] = provider_options
+            captured["kwargs"] = kwargs
+            return DummyModel()
+
+    dummy_optimum_ort.ORTModelForSequenceClassification = DummyORTModelForSequenceClassification
+
+    dummy_transformers = types.ModuleType("transformers")
+
+    class DummyAutoTokenizer:
+        model_max_length = 512
+
+        @classmethod
+        def from_pretrained(cls, model_name: str, **kwargs):
+            _ = model_name, kwargs
+            return cls()
+
+        def __call__(self, *, text, text_pair, return_tensors, **kwargs):
+            _ = text_pair, kwargs
+            assert return_tensors == "np"
+            batch = len(text)
+            return {
+                "input_ids": np.zeros((batch, 4), dtype=np.int64),
+                "attention_mask": np.ones((batch, 4), dtype=np.int64),
+            }
+
+    dummy_transformers.AutoTokenizer = DummyAutoTokenizer
+
+    monkeypatch.setitem(sys.modules, "onnxruntime", dummy_onnxruntime)
+    monkeypatch.setitem(sys.modules, "optimum", dummy_optimum)
+    monkeypatch.setitem(sys.modules, "optimum.onnxruntime", dummy_optimum_ort)
+    monkeypatch.setitem(sys.modules, "transformers", dummy_transformers)
+
+    reranker = get_reranker(
+        backend="onnx",
+        model_name="dummy-model",
+        use_gpu=True,
+        providers=[
+            ("DmlExecutionProvider", {"device_id": 1}),
+            "CPUExecutionProvider",
+        ],
+    )
+    assert isinstance(reranker, ONNXReranker)
+
+    scores = reranker.score_pairs([("q", "d")], batch_size=1)
+
+    assert scores == pytest.approx([0.5])
+    assert captured["model_name"] == "dummy-model"
+    assert captured["providers"] == ["DmlExecutionProvider", "CPUExecutionProvider"]
+    assert captured["provider_options"] == [{"device_id": 1}, {}]
--- a/codex-lens/tests/test_search_full_coverage.py
+++ b/codex-lens/tests/test_search_full_coverage.py
@@ -428,6 +428,51 @@ class TestIndexPathCollection:
        assert len(paths) == 1
        engine.close()

+    def test_collect_skips_ignored_artifact_indexes(self, mock_registry, mock_mapper, temp_dir):
+        """Test collection skips dist/build-style artifact subtrees."""
+        root_dir = temp_dir / "project"
+        root_dir.mkdir()
+
+        root_db = root_dir / "_index.db"
+        root_store = DirIndexStore(root_db)
+        root_store.initialize()
+
+        src_dir = root_dir / "src"
+        src_dir.mkdir()
+        src_db = src_dir / "_index.db"
+        src_store = DirIndexStore(src_db)
+        src_store.initialize()
+
+        dist_dir = root_dir / "dist"
+        dist_dir.mkdir()
+        dist_db = dist_dir / "_index.db"
+        dist_store = DirIndexStore(dist_db)
+        dist_store.initialize()
+
+        workflow_dir = root_dir / ".workflow"
+        workflow_dir.mkdir()
+        workflow_db = workflow_dir / "_index.db"
+        workflow_store = DirIndexStore(workflow_db)
+        workflow_store.initialize()
+
+        root_store.register_subdir(name="src", index_path=src_db)
+        root_store.register_subdir(name="dist", index_path=dist_db)
+        root_store.register_subdir(name=".workflow", index_path=workflow_db)
+
+        root_store.close()
+        src_store.close()
+        dist_store.close()
+        workflow_store.close()
+
+        engine = ChainSearchEngine(mock_registry, mock_mapper)
+        paths = engine._collect_index_paths(root_db, depth=-1)
+
+        assert {path.relative_to(root_dir).as_posix() for path in paths} == {
+            "_index.db",
+            "src/_index.db",
+        }
+        engine.close()
+

 class TestResultMergeAndRank:
    """Tests for _merge_and_rank method."""
@@ -490,6 +535,36 @@ class TestResultMergeAndRank:
        assert merged == []
        engine.close()

+    def test_merge_applies_test_file_penalty_for_non_test_query(self, mock_registry, mock_mapper):
+        """Non-test queries should lightly demote test files during merge."""
+        engine = ChainSearchEngine(mock_registry, mock_mapper)
+
+        results = [
+            SearchResult(path="/repo/tests/test_auth.py", score=10.0, excerpt="match 1"),
+            SearchResult(path="/repo/src/auth.py", score=9.0, excerpt="match 2"),
+        ]
+
+        merged = engine._merge_and_rank(results, limit=10, query="authenticate users")
+
+        assert merged[0].path == "/repo/src/auth.py"
+        assert merged[1].metadata["path_penalty_reasons"] == ["test_file"]
+        engine.close()
+
+    def test_merge_applies_generated_file_penalty_for_non_artifact_query(self, mock_registry, mock_mapper):
+        """Non-artifact queries should lightly demote generated/build results during merge."""
+        engine = ChainSearchEngine(mock_registry, mock_mapper)
+
+        results = [
+            SearchResult(path="/repo/dist/auth.js", score=10.0, excerpt="match 1"),
+            SearchResult(path="/repo/src/auth.ts", score=9.0, excerpt="match 2"),
+        ]
+
+        merged = engine._merge_and_rank(results, limit=10, query="authenticate users")
+
+        assert merged[0].path == "/repo/src/auth.ts"
+        assert merged[1].metadata["path_penalty_reasons"] == ["generated_artifact"]
+        engine.close()
+

 # === Hierarchical Chain Search Tests ===

--- a/codex-lens/tests/test_staged_cascade.py
+++ b/codex-lens/tests/test_staged_cascade.py
@@ -400,15 +400,20 @@ class TestStage4OptionalRerank:
    """Tests for Stage 4: Optional cross-encoder reranking."""

    def test_stage4_reranks_with_reranker(
-        self, mock_registry, mock_mapper, mock_config
+        self, mock_registry, mock_mapper, temp_paths
    ):
-        """Test _stage4_optional_rerank uses _cross_encoder_rerank."""
-        engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
+        """Test _stage4_optional_rerank overfetches before final trim."""
+        config = Config(data_dir=temp_paths / "data")
+        config.reranker_top_k = 4
+        config.reranking_top_k = 4
+        engine = ChainSearchEngine(mock_registry, mock_mapper, config=config)

        results = [
            SearchResult(path="a.py", score=0.9, excerpt="a"),
            SearchResult(path="b.py", score=0.8, excerpt="b"),
            SearchResult(path="c.py", score=0.7, excerpt="c"),
+            SearchResult(path="d.py", score=0.6, excerpt="d"),
+            SearchResult(path="e.py", score=0.5, excerpt="e"),
        ]

        # Mock the _cross_encoder_rerank method that _stage4 calls
@@ -416,12 +421,14 @@ class TestStage4OptionalRerank:
            mock_rerank.return_value = [
                SearchResult(path="c.py", score=0.95, excerpt="c"),
                SearchResult(path="a.py", score=0.85, excerpt="a"),
+                SearchResult(path="d.py", score=0.83, excerpt="d"),
+                SearchResult(path="e.py", score=0.81, excerpt="e"),
            ]

            reranked = engine._stage4_optional_rerank("query", results, k=2)

-            mock_rerank.assert_called_once_with("query", results, 2)
-            assert len(reranked) <= 2
+            mock_rerank.assert_called_once_with("query", results, 4)
+            assert len(reranked) == 4
            # First result should be reranked winner
            assert reranked[0].path == "c.py"

@@ -633,6 +640,113 @@ class TestStagedCascadeIntegration:
                            a_result = next(r for r in result.results if r.path == "a.py")
                            assert a_result.score == 0.9

+    def test_staged_cascade_expands_stage3_target_for_rerank_budget(
+        self, mock_registry, mock_mapper, temp_paths
+    ):
+        """Test staged cascade preserves enough Stage 3 reps for rerank budget."""
+        config = Config(data_dir=temp_paths / "data")
+        config.enable_staged_rerank = True
+        config.reranker_top_k = 6
+        config.reranking_top_k = 6
+
+        engine = ChainSearchEngine(mock_registry, mock_mapper, config=config)
+        expanded_results = [
+            SearchResult(path=f"src/file-{index}.ts", score=1.0 - (index * 0.01), excerpt="x")
+            for index in range(8)
+        ]
+
+        with patch.object(engine, "_find_start_index") as mock_find:
+            mock_find.return_value = temp_paths / "index" / "_index.db"
+
+            with patch.object(engine, "_collect_index_paths") as mock_collect:
+                mock_collect.return_value = [temp_paths / "index" / "_index.db"]
+
+                with patch.object(engine, "_stage1_binary_search") as mock_stage1:
+                    mock_stage1.return_value = (
+                        [SearchResult(path="seed.ts", score=0.9, excerpt="seed")],
+                        temp_paths / "index",
+                    )
+
+                    with patch.object(engine, "_stage2_lsp_expand") as mock_stage2:
+                        mock_stage2.return_value = expanded_results
+
+                        with patch.object(engine, "_stage3_cluster_prune") as mock_stage3:
+                            mock_stage3.return_value = expanded_results[:6]
+
+                            with patch.object(engine, "_stage4_optional_rerank") as mock_stage4:
+                                mock_stage4.return_value = expanded_results[:2]
+
+                                engine.staged_cascade_search(
+                                    "query",
+                                    temp_paths / "src",
+                                    k=2,
+                                    coarse_k=20,
+                                )
+
+        mock_stage3.assert_called_once_with(
+            expanded_results,
+            6,
+            query="query",
+        )
+
+    def test_staged_cascade_overfetches_rerank_before_final_trim(
+        self, mock_registry, mock_mapper, temp_paths
+    ):
+        """Test staged rerank keeps enough candidates for path penalties to work."""
+        config = Config(data_dir=temp_paths / "data")
+        config.enable_staged_rerank = True
+        config.reranker_top_k = 4
+        config.reranking_top_k = 4
+        config.test_file_penalty = 0.15
+        config.generated_file_penalty = 0.35
+
+        engine = ChainSearchEngine(mock_registry, mock_mapper, config=config)
+
+        src_primary = str(temp_paths / "src" / "tools" / "smart-search.ts")
+        src_secondary = str(temp_paths / "src" / "tools" / "codex-lens.ts")
+        test_primary = str(temp_paths / "tests" / "integration" / "cli-routes.test.ts")
+        test_secondary = str(
+            temp_paths / "frontend" / "tests" / "e2e" / "prompt-memory.spec.ts"
+        )
+        query = "parse CodexLens JSON output strip ANSI smart_search"
+        clustered_results = [
+            SearchResult(path=test_primary, score=0.98, excerpt="test"),
+            SearchResult(path=test_secondary, score=0.97, excerpt="test"),
+            SearchResult(path=src_primary, score=0.96, excerpt="source"),
+            SearchResult(path=src_secondary, score=0.95, excerpt="source"),
+        ]
+
+        with patch.object(engine, "_find_start_index") as mock_find:
+            mock_find.return_value = temp_paths / "index" / "_index.db"
+
+            with patch.object(engine, "_collect_index_paths") as mock_collect:
+                mock_collect.return_value = [temp_paths / "index" / "_index.db"]
+
+                with patch.object(engine, "_stage1_binary_search") as mock_stage1:
+                    mock_stage1.return_value = (
+                        [SearchResult(path=src_primary, score=0.9, excerpt="seed")],
+                        temp_paths / "index",
+                    )
+
+                    with patch.object(engine, "_stage2_lsp_expand") as mock_stage2:
+                        mock_stage2.return_value = clustered_results
+
+                        with patch.object(engine, "_stage3_cluster_prune") as mock_stage3:
+                            mock_stage3.return_value = clustered_results
+
+                            with patch.object(engine, "_cross_encoder_rerank") as mock_rerank:
+                                mock_rerank.return_value = clustered_results
+
+                                result = engine.staged_cascade_search(
+                                    query,
+                                    temp_paths / "src",
+                                    k=2,
+                                    coarse_k=20,
+                                )
+
+        mock_rerank.assert_called_once_with(query, clustered_results, 4)
+        assert [item.path for item in result.results] == [src_primary, src_secondary]
+

 # =============================================================================
 # Graceful Degradation Tests