feat: enhance search, ranking, reranker and CLI tooling across ccw and codex-lens

Major improvements to smart-search, chain-search cascade, ranking pipeline, reranker factory, CLI history store, codex-lens integration, and uv-manager. Simplify command-generator skill by inlining phases. Add comprehensive tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 19:08:17 +08:00 · 2026-03-16 20:35:08 +08:00
parent 1cd96b90e8
commit 5a4b18d9b1
73 changed files with 14684 additions and 2442 deletions
--- a/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl
+++ b/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl
@@ -0,0 +1,16 @@
+{"query":"executeHybridMode dense_rerank semantic smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-semantic-routing","notes":"CCW semantic mode delegates to CodexLens dense_rerank."}
+{"query":"parse CodexLens JSON output strip ANSI smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-json-fallback","notes":"Covers JSON/plain-text fallback handling for CodexLens output."}
+{"query":"smart_search init embed search action schema","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-action-schema","notes":"Find the Zod schema that defines init/embed/search actions."}
+{"query":"auto init missing job dedupe smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-auto-init","notes":"Targets background init/embed warmup and dedupe state."}
+{"query":"smart_search exact mode fallback to CodexLens fts","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-exact-fallback","notes":"Tracks the exact-mode fallback path into CodexLens FTS."}
+{"query":"smart_search settings snapshot embedding backend reranker backend staged stage2 mode","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-config-snapshot","notes":"Reads local config snapshot for embedding/reranker/staged pipeline settings."}
+{"query":"embedding backend fastembed local litellm api config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-embedding-config","notes":"Local-only benchmark should resolve to fastembed defaults."}
+{"query":"reranker backend onnx api legacy configuration","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-reranker-config","notes":"Covers both config dataclass fields and env overrides."}
+{"query":"staged stage2 mode precomputed realtime static_global_graph","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-stage2-config","notes":"Benchmark matrix should exercise the three supported stage2 modes."}
+{"query":"enable staged rerank stage 4 config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-stage4-rerank","notes":"Stage 4 rerank flag needs to stay enabled for local benchmarks."}
+{"query":"cascade_search dense_rerank staged pipeline ChainSearchEngine","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-cascade","notes":"Baseline query for the central retrieval engine."}
+{"query":"realtime LSP expand stage2 search pipeline","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-realtime","notes":"Targets realtime stage2 expansion logic."}
+{"query":"static global graph stage2 expansion implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-static","notes":"Targets static_global_graph stage2 expansion logic."}
+{"query":"cross encoder rerank stage 4 implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-rerank","notes":"Relevant for dense_rerank and staged rerank latency comparisons."}
+{"query":"get_reranker factory onnx backend selection","relevant_paths":["codex-lens/src/codexlens/semantic/reranker/factory.py"],"intent":"reranker-factory","notes":"Keeps the benchmark aligned with local ONNX reranker selection."}
+{"query":"EMBEDDING_BACKEND and RERANKER_BACKEND environment variables","relevant_paths":["codex-lens/src/codexlens/env_config.py"],"intent":"env-overrides","notes":"Covers CCW/CodexLens local-only environment overrides."}
--- a/codex-lens/benchmarks/compare_accuracy_labeled.py
+++ b/codex-lens/benchmarks/compare_accuracy_labeled.py
@@ -239,6 +239,7 @@ def main() -> None:
    config.staged_clustering_strategy = str(args.staged_cluster_strategy or "path").strip().lower()
    # Stability: on some Windows setups, DirectML/ONNX can crash under load.
    config.embedding_use_gpu = False
+    config.reranker_use_gpu = False

    registry = RegistryStore()
    registry.initialize()
@@ -362,4 +363,3 @@ def main() -> None:

 if __name__ == "__main__":
    main()
-
--- a/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py
+++ b/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py
@@ -0,0 +1,980 @@
+#!/usr/bin/env python
+"""Benchmark local-only staged stage2 modes for CCW smart_search queries.
+
+This benchmark reuses the existing CodexLens benchmark style, but focuses on
+the real search intents that drive CCW `smart_search`. It evaluates:
+
+1. `dense_rerank` baseline
+2. `staged` + `precomputed`
+3. `staged` + `realtime`
+4. `staged` + `static_global_graph`
+
+Metrics:
+  - Hit@K
+  - MRR@K
+  - Recall@K
+  - latency (avg/p50/p95)
+
+The runner is intentionally local-only. By default it uses:
+  - embedding backend: `fastembed`
+  - reranker backend: `onnx`
+
+Examples:
+  python benchmarks/compare_ccw_smart_search_stage2.py --dry-run
+  python benchmarks/compare_ccw_smart_search_stage2.py --self-check
+  python benchmarks/compare_ccw_smart_search_stage2.py --source .. --k 10
+  python benchmarks/compare_ccw_smart_search_stage2.py --embedding-model code --reranker-model cross-encoder/ms-marco-MiniLM-L-6-v2
+"""
+
+from __future__ import annotations
+
+import argparse
+from copy import deepcopy
+import gc
+import json
+import os
+import re
+import statistics
+import sys
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from codexlens.config import Config
+from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
+from codexlens.search.ranking import (
+    QueryIntent,
+    detect_query_intent,
+    is_generated_artifact_path,
+    is_test_file,
+    query_prefers_lexical_search,
+    query_targets_generated_files,
+)
+from codexlens.storage.path_mapper import PathMapper
+from codexlens.storage.registry import RegistryStore
+
+
+DEFAULT_SOURCE = Path(__file__).resolve().parents[2]
+DEFAULT_QUERIES_FILE = Path(__file__).parent / "accuracy_queries_ccw_smart_search.jsonl"
+DEFAULT_OUTPUT = Path(__file__).parent / "results" / "ccw_smart_search_stage2.json"
+
+VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph")
+VALID_LOCAL_EMBEDDING_BACKENDS = ("fastembed",)
+VALID_LOCAL_RERANKER_BACKENDS = ("onnx", "fastembed", "legacy")
+VALID_BASELINE_METHODS = ("auto", "fts", "hybrid")
+DEFAULT_LOCAL_ONNX_RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"
+
+
+def _now_ms() -> float:
+    return time.perf_counter() * 1000.0
+
+
+def _normalize_path_key(path: str) -> str:
+    try:
+        candidate = Path(path)
+        if str(candidate) and (candidate.is_absolute() or re.match(r"^[A-Za-z]:", str(candidate))):
+            normalized = str(candidate.resolve())
+        else:
+            normalized = str(candidate)
+    except Exception:
+        normalized = path
+    normalized = normalized.replace("/", "\\")
+    if os.name == "nt":
+        normalized = normalized.lower()
+    return normalized
+
+
+def _dedup_topk(paths: Iterable[str], k: int) -> List[str]:
+    output: List[str] = []
+    seen: set[str] = set()
+    for path in paths:
+        if path in seen:
+            continue
+        seen.add(path)
+        output.append(path)
+        if len(output) >= k:
+            break
+    return output
+
+
+def _first_hit_rank(topk_paths: Sequence[str], relevant: set[str]) -> Optional[int]:
+    for index, path in enumerate(topk_paths, start=1):
+        if path in relevant:
+            return index
+    return None
+
+
+def _mrr(ranks: Sequence[Optional[int]]) -> float:
+    values = [1.0 / rank for rank in ranks if rank and rank > 0]
+    return statistics.mean(values) if values else 0.0
+
+
+def _mean(values: Sequence[float]) -> float:
+    return statistics.mean(values) if values else 0.0
+
+
+def _percentile(values: Sequence[float], percentile: float) -> float:
+    if not values:
+        return 0.0
+    ordered = sorted(values)
+    if len(ordered) == 1:
+        return ordered[0]
+    index = (len(ordered) - 1) * percentile
+    lower = int(index)
+    upper = min(lower + 1, len(ordered) - 1)
+    if lower == upper:
+        return ordered[lower]
+    fraction = index - lower
+    return ordered[lower] + (ordered[upper] - ordered[lower]) * fraction
+
+
+def _load_labeled_queries(path: Path, limit: Optional[int]) -> List[Dict[str, Any]]:
+    if not path.is_file():
+        raise SystemExit(f"Queries file does not exist: {path}")
+
+    output: List[Dict[str, Any]] = []
+    for raw_line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
+        line = raw_line.strip()
+        if not line or line.startswith("#"):
+            continue
+        try:
+            item = json.loads(line)
+        except Exception as exc:
+            raise SystemExit(f"Invalid JSONL line in {path}: {raw_line!r} ({exc})") from exc
+        if not isinstance(item, dict) or "query" not in item or "relevant_paths" not in item:
+            raise SystemExit(f"Invalid query item (expected object with query/relevant_paths): {item!r}")
+        relevant_paths = item.get("relevant_paths")
+        if not isinstance(relevant_paths, list) or not relevant_paths:
+            raise SystemExit(f"Query item must include non-empty relevant_paths[]: {item!r}")
+        output.append(item)
+        if limit is not None and len(output) >= limit:
+            break
+    return output
+
+
+def _resolve_expected_paths(source_root: Path, paths: Sequence[str]) -> Tuple[List[str], set[str], List[str]]:
+    resolved_display: List[str] = []
+    resolved_keys: set[str] = set()
+    missing: List[str] = []
+
+    for raw_path in paths:
+        candidate = Path(raw_path)
+        if not candidate.is_absolute():
+            candidate = (source_root / candidate).resolve()
+        if not candidate.exists():
+            missing.append(str(candidate))
+        resolved_display.append(str(candidate))
+        resolved_keys.add(_normalize_path_key(str(candidate)))
+    return resolved_display, resolved_keys, missing
+
+
+def _validate_local_only_backends(embedding_backend: str, reranker_backend: str) -> None:
+    if embedding_backend not in VALID_LOCAL_EMBEDDING_BACKENDS:
+        raise SystemExit(
+            "This runner is local-only. "
+            f"--embedding-backend must be one of {', '.join(VALID_LOCAL_EMBEDDING_BACKENDS)}; got {embedding_backend!r}"
+        )
+    if reranker_backend not in VALID_LOCAL_RERANKER_BACKENDS:
+        raise SystemExit(
+            "This runner is local-only. "
+            f"--reranker-backend must be one of {', '.join(VALID_LOCAL_RERANKER_BACKENDS)}; got {reranker_backend!r}"
+        )
+
+
+def _validate_stage2_modes(stage2_modes: Sequence[str]) -> List[str]:
+    normalized = [str(mode).strip().lower() for mode in stage2_modes if str(mode).strip()]
+    if not normalized:
+        raise SystemExit("At least one --stage2-modes entry is required")
+    invalid = [mode for mode in normalized if mode not in VALID_STAGE2_MODES]
+    if invalid:
+        raise SystemExit(
+            f"Invalid --stage2-modes entry: {invalid[0]} "
+            f"(valid: {', '.join(VALID_STAGE2_MODES)})"
+        )
+    deduped: List[str] = []
+    seen: set[str] = set()
+    for mode in normalized:
+        if mode in seen:
+            continue
+        seen.add(mode)
+        deduped.append(mode)
+    return deduped
+
+
+def _validate_baseline_methods(methods: Sequence[str]) -> List[str]:
+    normalized = [str(method).strip().lower() for method in methods if str(method).strip()]
+    invalid = [method for method in normalized if method not in VALID_BASELINE_METHODS]
+    if invalid:
+        raise SystemExit(
+            f"Invalid --baseline-methods entry: {invalid[0]} "
+            f"(valid: {', '.join(VALID_BASELINE_METHODS)})"
+        )
+    deduped: List[str] = []
+    seen: set[str] = set()
+    for method in normalized:
+        if method in seen:
+            continue
+        seen.add(method)
+        deduped.append(method)
+    return deduped
+
+
+@dataclass
+class StrategyRun:
+    strategy_key: str
+    strategy: str
+    stage2_mode: Optional[str]
+    effective_method: str
+    execution_method: str
+    latency_ms: float
+    topk_paths: List[str]
+    first_hit_rank: Optional[int]
+    hit_at_k: bool
+    recall_at_k: float
+    generated_artifact_count: int
+    test_file_count: int
+    error: Optional[str] = None
+
+
+@dataclass
+class QueryEvaluation:
+    query: str
+    intent: Optional[str]
+    notes: Optional[str]
+    relevant_paths: List[str]
+    runs: Dict[str, StrategyRun]
+
+
+@dataclass
+class PairwiseDelta:
+    mode_a: str
+    mode_b: str
+    hit_at_k_delta: float
+    mrr_at_k_delta: float
+    avg_recall_at_k_delta: float
+    avg_latency_ms_delta: float
+
+
+@dataclass
+class StrategySpec:
+    strategy_key: str
+    strategy: str
+    stage2_mode: Optional[str]
+
+
+@dataclass
+class StrategyRuntime:
+    strategy_spec: StrategySpec
+    config: Config
+    registry: RegistryStore
+    engine: ChainSearchEngine
+
+
+def _strategy_specs(
+    stage2_modes: Sequence[str],
+    include_dense_baseline: bool,
+    *,
+    baseline_methods: Sequence[str],
+) -> List[StrategySpec]:
+    specs: List[StrategySpec] = []
+    for method in baseline_methods:
+        specs.append(StrategySpec(strategy_key=method, strategy=method, stage2_mode=None))
+    if include_dense_baseline:
+        specs.append(StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None))
+    for stage2_mode in stage2_modes:
+        specs.append(
+            StrategySpec(
+                strategy_key=f"staged:{stage2_mode}",
+                strategy="staged",
+                stage2_mode=stage2_mode,
+            )
+        )
+    return specs
+
+
+def _build_strategy_runtime(base_config: Config, strategy_spec: StrategySpec) -> StrategyRuntime:
+    runtime_config = deepcopy(base_config)
+    registry = RegistryStore()
+    registry.initialize()
+    mapper = PathMapper()
+    engine = ChainSearchEngine(registry=registry, mapper=mapper, config=runtime_config)
+    return StrategyRuntime(
+        strategy_spec=strategy_spec,
+        config=runtime_config,
+        registry=registry,
+        engine=engine,
+    )
+
+
+def _select_effective_method(query: str, requested_method: str) -> str:
+    requested = str(requested_method).strip().lower()
+    if requested != "auto":
+        return requested
+    if query_targets_generated_files(query) or query_prefers_lexical_search(query):
+        return "fts"
+    intent = detect_query_intent(query)
+    if intent == QueryIntent.KEYWORD:
+        return "fts"
+    if intent == QueryIntent.SEMANTIC:
+        return "dense_rerank"
+    return "hybrid"
+
+
+def _filter_dataset_by_query_match(
+    dataset: Sequence[Dict[str, Any]],
+    query_match: Optional[str],
+) -> List[Dict[str, Any]]:
+    """Filter labeled queries by case-insensitive substring match."""
+    needle = str(query_match or "").strip().casefold()
+    if not needle:
+        return list(dataset)
+    return [
+        dict(item)
+        for item in dataset
+        if needle in str(item.get("query", "")).casefold()
+    ]
+
+
+def _apply_query_limit(
+    dataset: Sequence[Dict[str, Any]],
+    query_limit: Optional[int],
+) -> List[Dict[str, Any]]:
+    """Apply the optional query limit after any dataset-level filtering."""
+    if query_limit is None:
+        return list(dataset)
+    return [dict(item) for item in list(dataset)[: max(0, int(query_limit))]]
+
+
+def _write_json_payload(path: Path, payload: Dict[str, Any]) -> None:
+    """Persist a benchmark payload as UTF-8 JSON."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+
+def _write_final_outputs(
+    *,
+    output_path: Path,
+    progress_output: Optional[Path],
+    payload: Dict[str, Any],
+) -> None:
+    """Persist the final completed payload to both result and progress outputs."""
+    _write_json_payload(output_path, payload)
+    if progress_output is not None:
+        _write_json_payload(progress_output, payload)
+
+
+def _make_progress_payload(
+    *,
+    args: argparse.Namespace,
+    source_root: Path,
+    strategy_specs: Sequence[StrategySpec],
+    evaluations: Sequence[QueryEvaluation],
+    query_index: int,
+    total_queries: int,
+    run_index: int,
+    total_runs: int,
+    current_query: str,
+    current_strategy_key: str,
+) -> Dict[str, Any]:
+    """Create a partial progress snapshot for long benchmark runs."""
+    return {
+        "status": "running",
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "source": str(source_root),
+        "queries_file": str(args.queries_file),
+        "query_count": len(evaluations),
+        "planned_query_count": total_queries,
+        "k": int(args.k),
+        "coarse_k": int(args.coarse_k),
+        "strategy_keys": [spec.strategy_key for spec in strategy_specs],
+        "progress": {
+            "completed_queries": query_index,
+            "total_queries": total_queries,
+            "completed_runs": run_index,
+            "total_runs": total_runs,
+            "current_query": current_query,
+            "current_strategy_key": current_strategy_key,
+        },
+        "evaluations": [
+            {
+                "query": evaluation.query,
+                "intent": evaluation.intent,
+                "notes": evaluation.notes,
+                "relevant_paths": evaluation.relevant_paths,
+                "runs": {key: asdict(run) for key, run in evaluation.runs.items()},
+            }
+            for evaluation in evaluations
+        ],
+    }
+
+
+def _make_search_options(method: str, *, k: int) -> SearchOptions:
+    normalized = str(method).strip().lower()
+    if normalized == "fts":
+        return SearchOptions(
+            total_limit=k,
+            hybrid_mode=False,
+            enable_fuzzy=False,
+            enable_vector=False,
+            pure_vector=False,
+            enable_cascade=False,
+        )
+    if normalized == "hybrid":
+        return SearchOptions(
+            total_limit=k,
+            hybrid_mode=True,
+            enable_fuzzy=False,
+            enable_vector=True,
+            pure_vector=False,
+            enable_cascade=False,
+        )
+    if normalized in {"dense_rerank", "staged"}:
+        return SearchOptions(
+            total_limit=k,
+            hybrid_mode=True,
+            enable_fuzzy=False,
+            enable_vector=True,
+            pure_vector=False,
+            enable_cascade=True,
+        )
+    raise ValueError(f"Unsupported benchmark method: {method}")
+
+
+def _run_strategy(
+    engine: ChainSearchEngine,
+    config: Config,
+    *,
+    strategy_spec: StrategySpec,
+    query: str,
+    source_path: Path,
+    k: int,
+    coarse_k: int,
+    relevant: set[str],
+) -> StrategyRun:
+    gc.collect()
+    effective_method = _select_effective_method(query, strategy_spec.strategy)
+    execution_method = "cascade" if effective_method in {"dense_rerank", "staged"} else effective_method
+    previous_cascade_strategy = getattr(config, "cascade_strategy", None)
+    previous_stage2_mode = getattr(config, "staged_stage2_mode", None)
+
+    start_ms = _now_ms()
+    try:
+        options = _make_search_options(
+            "staged" if strategy_spec.strategy == "staged" else effective_method,
+            k=k,
+        )
+        if strategy_spec.strategy == "staged":
+            config.cascade_strategy = "staged"
+            if strategy_spec.stage2_mode:
+                config.staged_stage2_mode = strategy_spec.stage2_mode
+            result = engine.cascade_search(
+                query=query,
+                source_path=source_path,
+                k=k,
+                coarse_k=coarse_k,
+                options=options,
+                strategy="staged",
+            )
+        elif effective_method == "dense_rerank":
+            config.cascade_strategy = "dense_rerank"
+            result = engine.cascade_search(
+                query=query,
+                source_path=source_path,
+                k=k,
+                coarse_k=coarse_k,
+                options=options,
+                strategy="dense_rerank",
+            )
+        else:
+            result = engine.search(
+                query=query,
+                source_path=source_path,
+                options=options,
+            )
+        latency_ms = _now_ms() - start_ms
+        paths_raw = [item.path for item in (result.results or []) if getattr(item, "path", None)]
+        topk = _dedup_topk((_normalize_path_key(path) for path in paths_raw), k=k)
+        rank = _first_hit_rank(topk, relevant)
+        recall = 0.0
+        if relevant:
+            recall = len(set(topk) & relevant) / float(len(relevant))
+        return StrategyRun(
+            strategy_key=strategy_spec.strategy_key,
+            strategy=strategy_spec.strategy,
+            stage2_mode=strategy_spec.stage2_mode,
+            effective_method=effective_method,
+            execution_method=execution_method,
+            latency_ms=latency_ms,
+            topk_paths=topk,
+            first_hit_rank=rank,
+            hit_at_k=rank is not None,
+            recall_at_k=recall,
+            generated_artifact_count=sum(1 for path in topk if is_generated_artifact_path(path)),
+            test_file_count=sum(1 for path in topk if is_test_file(path)),
+            error=None,
+        )
+    except Exception as exc:
+        latency_ms = _now_ms() - start_ms
+        return StrategyRun(
+            strategy_key=strategy_spec.strategy_key,
+            strategy=strategy_spec.strategy,
+            stage2_mode=strategy_spec.stage2_mode,
+            effective_method=effective_method,
+            execution_method=execution_method,
+            latency_ms=latency_ms,
+            topk_paths=[],
+            first_hit_rank=None,
+            hit_at_k=False,
+            recall_at_k=0.0,
+            generated_artifact_count=0,
+            test_file_count=0,
+            error=f"{type(exc).__name__}: {exc}",
+        )
+    finally:
+        config.cascade_strategy = previous_cascade_strategy
+        config.staged_stage2_mode = previous_stage2_mode
+
+
+def _summarize_runs(runs: Sequence[StrategyRun]) -> Dict[str, Any]:
+    latencies = [run.latency_ms for run in runs if not run.error]
+    ranks = [run.first_hit_rank for run in runs]
+    effective_method_counts: Dict[str, int] = {}
+    for run in runs:
+        effective_method_counts[run.effective_method] = effective_method_counts.get(run.effective_method, 0) + 1
+    return {
+        "query_count": len(runs),
+        "hit_at_k": _mean([1.0 if run.hit_at_k else 0.0 for run in runs]),
+        "mrr_at_k": _mrr(ranks),
+        "avg_recall_at_k": _mean([run.recall_at_k for run in runs]),
+        "avg_latency_ms": _mean(latencies),
+        "p50_latency_ms": _percentile(latencies, 0.50),
+        "p95_latency_ms": _percentile(latencies, 0.95),
+        "avg_generated_artifact_count": _mean([float(run.generated_artifact_count) for run in runs]),
+        "avg_test_file_count": _mean([float(run.test_file_count) for run in runs]),
+        "runs_with_generated_artifacts": sum(1 for run in runs if run.generated_artifact_count > 0),
+        "runs_with_test_files": sum(1 for run in runs if run.test_file_count > 0),
+        "effective_methods": effective_method_counts,
+        "errors": sum(1 for run in runs if run.error),
+    }
+
+
+def _build_pairwise_deltas(stage2_summaries: Dict[str, Dict[str, Any]]) -> List[PairwiseDelta]:
+    modes = list(stage2_summaries.keys())
+    deltas: List[PairwiseDelta] = []
+    for left_index in range(len(modes)):
+        for right_index in range(left_index + 1, len(modes)):
+            left = modes[left_index]
+            right = modes[right_index]
+            left_summary = stage2_summaries[left]
+            right_summary = stage2_summaries[right]
+            deltas.append(
+                PairwiseDelta(
+                    mode_a=left,
+                    mode_b=right,
+                    hit_at_k_delta=left_summary["hit_at_k"] - right_summary["hit_at_k"],
+                    mrr_at_k_delta=left_summary["mrr_at_k"] - right_summary["mrr_at_k"],
+                    avg_recall_at_k_delta=left_summary["avg_recall_at_k"] - right_summary["avg_recall_at_k"],
+                    avg_latency_ms_delta=left_summary["avg_latency_ms"] - right_summary["avg_latency_ms"],
+                )
+            )
+    return deltas
+
+
+def _make_plan_payload(
+    *,
+    args: argparse.Namespace,
+    source_root: Path,
+    dataset: Sequence[Dict[str, Any]],
+    baseline_methods: Sequence[str],
+    stage2_modes: Sequence[str],
+    strategy_specs: Sequence[StrategySpec],
+) -> Dict[str, Any]:
+    return {
+        "mode": "dry-run" if args.dry_run else "self-check",
+        "local_only": True,
+        "source": str(source_root),
+        "queries_file": str(args.queries_file),
+        "query_count": len(dataset),
+        "query_match": args.query_match,
+        "k": int(args.k),
+        "coarse_k": int(args.coarse_k),
+        "baseline_methods": list(baseline_methods),
+        "stage2_modes": list(stage2_modes),
+        "strategy_keys": [spec.strategy_key for spec in strategy_specs],
+        "local_backends": {
+            "embedding_backend": args.embedding_backend,
+            "embedding_model": args.embedding_model,
+            "reranker_backend": args.reranker_backend,
+            "reranker_model": args.reranker_model,
+            "embedding_use_gpu": bool(args.embedding_use_gpu),
+            "reranker_use_gpu": bool(args.reranker_use_gpu),
+        },
+        "output": str(args.output),
+        "progress_output": str(args.progress_output) if args.progress_output else None,
+        "dataset_preview": [
+            {
+                "query": item.get("query"),
+                "intent": item.get("intent"),
+                "relevant_paths": item.get("relevant_paths"),
+            }
+            for item in list(dataset)[: min(3, len(dataset))]
+        ],
+    }
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--source",
+        type=Path,
+        default=DEFAULT_SOURCE,
+        help="Source root to benchmark. Defaults to the repository root so CCW and CodexLens paths resolve together.",
+    )
+    parser.add_argument(
+        "--queries-file",
+        type=Path,
+        default=DEFAULT_QUERIES_FILE,
+        help="Labeled JSONL dataset of CCW smart_search queries",
+    )
+    parser.add_argument("--query-limit", type=int, default=None, help="Optional query limit")
+    parser.add_argument(
+        "--query-match",
+        type=str,
+        default=None,
+        help="Optional case-insensitive substring filter for selecting specific benchmark queries.",
+    )
+    parser.add_argument("--k", type=int, default=10, help="Top-k to evaluate")
+    parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k")
+    parser.add_argument(
+        "--baseline-methods",
+        nargs="*",
+        default=list(VALID_BASELINE_METHODS),
+        help="Requested smart_search baselines to compare before staged modes (valid: auto, fts, hybrid).",
+    )
+    parser.add_argument(
+        "--stage2-modes",
+        nargs="*",
+        default=list(VALID_STAGE2_MODES),
+        help="Stage-2 modes to compare",
+    )
+    parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per strategy")
+    parser.add_argument(
+        "--embedding-backend",
+        default="fastembed",
+        help="Local embedding backend. This runner only accepts fastembed.",
+    )
+    parser.add_argument(
+        "--embedding-model",
+        default="code",
+        help="Embedding model/profile for the local embedding backend",
+    )
+    parser.add_argument(
+        "--embedding-use-gpu",
+        action="store_true",
+        help="Enable GPU acceleration for local embeddings. Off by default for stability.",
+    )
+    parser.add_argument(
+        "--reranker-backend",
+        default="onnx",
+        help="Local reranker backend. Supported local values: onnx, fastembed, legacy.",
+    )
+    parser.add_argument(
+        "--reranker-model",
+        default=DEFAULT_LOCAL_ONNX_RERANKER_MODEL,
+        help="Reranker model name for the local reranker backend",
+    )
+    parser.add_argument(
+        "--reranker-use-gpu",
+        action="store_true",
+        help="Enable GPU acceleration for the local reranker. Off by default for stability.",
+    )
+    parser.add_argument(
+        "--skip-dense-baseline",
+        action="store_true",
+        help="Only compare staged stage2 modes and skip the dense_rerank baseline.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate dataset/config and print the benchmark plan without running retrieval.",
+    )
+    parser.add_argument(
+        "--self-check",
+        action="store_true",
+        help="Smoke-check the entrypoint by validating dataset, source paths, and stage matrix wiring.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=DEFAULT_OUTPUT,
+        help="Output JSON path",
+    )
+    parser.add_argument(
+        "--progress-output",
+        type=Path,
+        default=None,
+        help="Optional JSON path updated after each query with partial progress and completed runs.",
+    )
+    return parser
+
+
+def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    source_root = args.source.expanduser().resolve()
+    if not source_root.exists():
+        raise SystemExit(f"Source path does not exist: {source_root}")
+    if int(args.k) <= 0:
+        raise SystemExit("--k must be > 0")
+    if int(args.coarse_k) <= 0:
+        raise SystemExit("--coarse-k must be > 0")
+    if int(args.coarse_k) < int(args.k):
+        raise SystemExit("--coarse-k must be >= --k")
+    if int(args.warmup) < 0:
+        raise SystemExit("--warmup must be >= 0")
+
+    embedding_backend = str(args.embedding_backend).strip().lower()
+    reranker_backend = str(args.reranker_backend).strip().lower()
+    _validate_local_only_backends(embedding_backend, reranker_backend)
+    baseline_methods = _validate_baseline_methods(args.baseline_methods)
+    stage2_modes = _validate_stage2_modes(args.stage2_modes)
+
+    dataset = _load_labeled_queries(args.queries_file, None)
+    dataset = _filter_dataset_by_query_match(dataset, args.query_match)
+    dataset = _apply_query_limit(dataset, args.query_limit)
+    if not dataset:
+        raise SystemExit("No queries to run")
+
+    missing_paths: List[str] = []
+    for item in dataset:
+        _, _, item_missing = _resolve_expected_paths(source_root, [str(path) for path in item["relevant_paths"]])
+        missing_paths.extend(item_missing)
+    if missing_paths:
+        preview = ", ".join(missing_paths[:3])
+        raise SystemExit(
+            "Dataset relevant_paths do not resolve under the selected source root. "
+            f"Examples: {preview}"
+        )
+
+    strategy_specs = _strategy_specs(
+        stage2_modes,
+        include_dense_baseline=not args.skip_dense_baseline,
+        baseline_methods=baseline_methods,
+    )
+
+    if args.dry_run or args.self_check:
+        payload = _make_plan_payload(
+            args=args,
+            source_root=source_root,
+            dataset=dataset,
+            baseline_methods=baseline_methods,
+            stage2_modes=stage2_modes,
+            strategy_specs=strategy_specs,
+        )
+        if args.self_check:
+            payload["status"] = "ok"
+            payload["checks"] = {
+                "dataset_loaded": True,
+                "stage2_matrix_size": len(stage2_modes),
+                "local_only_validation": True,
+                "source_path_exists": True,
+            }
+        print(json.dumps(payload, ensure_ascii=False, indent=2))
+        return
+
+    config = Config.load()
+    config.cascade_strategy = "staged"
+    config.enable_staged_rerank = True
+    config.enable_cross_encoder_rerank = True
+    config.embedding_backend = embedding_backend
+    config.embedding_model = str(args.embedding_model).strip()
+    config.embedding_use_gpu = bool(args.embedding_use_gpu)
+    config.embedding_auto_embed_missing = False
+    config.reranker_backend = reranker_backend
+    config.reranker_model = str(args.reranker_model).strip()
+    config.reranker_use_gpu = bool(args.reranker_use_gpu)
+
+    strategy_runtimes = {
+        spec.strategy_key: _build_strategy_runtime(config, spec)
+        for spec in strategy_specs
+    }
+
+    evaluations: List[QueryEvaluation] = []
+    total_queries = len(dataset)
+    total_runs = total_queries * len(strategy_specs)
+    completed_runs = 0
+
+    try:
+        if int(args.warmup) > 0:
+            warm_query = str(dataset[0]["query"]).strip()
+            warm_relevant_paths = [str(path) for path in dataset[0]["relevant_paths"]]
+            _, warm_relevant, _ = _resolve_expected_paths(source_root, warm_relevant_paths)
+            for spec in strategy_specs:
+                runtime = strategy_runtimes[spec.strategy_key]
+                for _ in range(int(args.warmup)):
+                    _run_strategy(
+                        runtime.engine,
+                        runtime.config,
+                        strategy_spec=spec,
+                        query=warm_query,
+                        source_path=source_root,
+                        k=min(int(args.k), 5),
+                        coarse_k=min(int(args.coarse_k), 50),
+                        relevant=warm_relevant,
+                    )
+
+        for index, item in enumerate(dataset, start=1):
+            query = str(item.get("query", "")).strip()
+            if not query:
+                continue
+            print(f"[query {index}/{total_queries}] {query}", flush=True)
+            relevant_paths, relevant, _ = _resolve_expected_paths(
+                source_root,
+                [str(path) for path in item["relevant_paths"]],
+            )
+            runs: Dict[str, StrategyRun] = {}
+            for spec in strategy_specs:
+                if args.progress_output is not None:
+                    _write_json_payload(
+                        args.progress_output,
+                        _make_progress_payload(
+                            args=args,
+                            source_root=source_root,
+                            strategy_specs=strategy_specs,
+                            evaluations=evaluations,
+                            query_index=index - 1,
+                            total_queries=total_queries,
+                            run_index=completed_runs,
+                            total_runs=total_runs,
+                            current_query=query,
+                            current_strategy_key=spec.strategy_key,
+                        ),
+                    )
+                print(
+                    f"[run {completed_runs + 1}/{total_runs}] "
+                    f"strategy={spec.strategy_key} query={query}",
+                    flush=True,
+                )
+                runtime = strategy_runtimes[spec.strategy_key]
+                runs[spec.strategy_key] = _run_strategy(
+                    runtime.engine,
+                    runtime.config,
+                    strategy_spec=spec,
+                    query=query,
+                    source_path=source_root,
+                    k=int(args.k),
+                    coarse_k=int(args.coarse_k),
+                    relevant=relevant,
+                )
+                completed_runs += 1
+                run = runs[spec.strategy_key]
+                outcome = "error" if run.error else "ok"
+                print(
+                    f"[done {completed_runs}/{total_runs}] "
+                    f"strategy={spec.strategy_key} outcome={outcome} "
+                    f"latency_ms={run.latency_ms:.2f} "
+                    f"first_hit_rank={run.first_hit_rank}",
+                    flush=True,
+                )
+            evaluations.append(
+                QueryEvaluation(
+                    query=query,
+                    intent=str(item.get("intent")) if item.get("intent") is not None else None,
+                    notes=str(item.get("notes")) if item.get("notes") is not None else None,
+                    relevant_paths=relevant_paths,
+                    runs=runs,
+                )
+            )
+            if args.progress_output is not None:
+                _write_json_payload(
+                    args.progress_output,
+                    _make_progress_payload(
+                        args=args,
+                        source_root=source_root,
+                        strategy_specs=strategy_specs,
+                        evaluations=evaluations,
+                        query_index=index,
+                        total_queries=total_queries,
+                        run_index=completed_runs,
+                        total_runs=total_runs,
+                        current_query=query,
+                        current_strategy_key="complete",
+                    ),
+                )
+    finally:
+        for runtime in strategy_runtimes.values():
+            try:
+                runtime.engine.close()
+            except Exception:
+                pass
+        for runtime in strategy_runtimes.values():
+            try:
+                runtime.registry.close()
+            except Exception:
+                pass
+
+    strategy_summaries: Dict[str, Dict[str, Any]] = {}
+    for spec in strategy_specs:
+        spec_runs = [evaluation.runs[spec.strategy_key] for evaluation in evaluations if spec.strategy_key in evaluation.runs]
+        summary = _summarize_runs(spec_runs)
+        summary["strategy"] = spec.strategy
+        summary["stage2_mode"] = spec.stage2_mode
+        strategy_summaries[spec.strategy_key] = summary
+
+    stage2_mode_matrix = {
+        mode: strategy_summaries[f"staged:{mode}"]
+        for mode in stage2_modes
+        if f"staged:{mode}" in strategy_summaries
+    }
+    pairwise_deltas = [asdict(item) for item in _build_pairwise_deltas(stage2_mode_matrix)]
+
+    payload = {
+        "status": "completed",
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "source": str(source_root),
+        "queries_file": str(args.queries_file),
+        "query_count": len(evaluations),
+        "query_match": args.query_match,
+        "k": int(args.k),
+        "coarse_k": int(args.coarse_k),
+        "local_only": True,
+        "strategies": strategy_summaries,
+        "stage2_mode_matrix": stage2_mode_matrix,
+        "pairwise_stage2_deltas": pairwise_deltas,
+        "config": {
+            "embedding_backend": config.embedding_backend,
+            "embedding_model": config.embedding_model,
+            "embedding_use_gpu": bool(config.embedding_use_gpu),
+            "reranker_backend": config.reranker_backend,
+            "reranker_model": config.reranker_model,
+            "reranker_use_gpu": bool(config.reranker_use_gpu),
+            "enable_staged_rerank": bool(config.enable_staged_rerank),
+            "enable_cross_encoder_rerank": bool(config.enable_cross_encoder_rerank),
+        },
+        "progress_output": str(args.progress_output) if args.progress_output else None,
+        "evaluations": [
+            {
+                "query": evaluation.query,
+                "intent": evaluation.intent,
+                "notes": evaluation.notes,
+                "relevant_paths": evaluation.relevant_paths,
+                "runs": {key: asdict(run) for key, run in evaluation.runs.items()},
+            }
+            for evaluation in evaluations
+        ],
+    }
+
+    _write_final_outputs(
+        output_path=args.output,
+        progress_output=args.progress_output,
+        payload=payload,
+    )
+    print(json.dumps(payload, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()
--- a/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py
+++ b/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py
@@ -280,8 +280,9 @@ def main() -> None:
    if args.staged_cluster_strategy:
        config.staged_clustering_strategy = str(args.staged_cluster_strategy)
    # Stability: on some Windows setups, fastembed + DirectML can crash under load.
-    # Dense_rerank uses the embedding backend that matches the index; force CPU here.
+    # Force local embeddings and reranking onto CPU for reproducible benchmark runs.
    config.embedding_use_gpu = False
+    config.reranker_use_gpu = False
    registry = RegistryStore()
    registry.initialize()
    mapper = PathMapper()
--- a/codex-lens/benchmarks/results/ccw_smart_search_stage2.json
+++ b/codex-lens/benchmarks/results/ccw_smart_search_stage2.json
--- a/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json
+++ b/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json
@@ -0,0 +1,526 @@
+{
+  "timestamp": "2026-03-14 23:16:55",
+  "source": "D:\\Claude_dms3",
+  "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
+  "query_count": 4,
+  "k": 10,
+  "coarse_k": 100,
+  "local_only": true,
+  "strategies": {
+    "dense_rerank": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 20171.940174996853,
+      "p50_latency_ms": 14222.247749984264,
+      "p95_latency_ms": 35222.31535999476,
+      "errors": 0,
+      "strategy": "dense_rerank",
+      "stage2_mode": null
+    },
+    "staged:precomputed": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13679.793299987912,
+      "p50_latency_ms": 12918.63379997015,
+      "p95_latency_ms": 16434.964765003322,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "staged:realtime": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13885.101849973202,
+      "p50_latency_ms": 13826.323699980974,
+      "p95_latency_ms": 14867.712269958853,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "staged:static_global_graph": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13336.124025002122,
+      "p50_latency_ms": 13415.476950019598,
+      "p95_latency_ms": 13514.329230004549,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "stage2_mode_matrix": {
+    "precomputed": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13679.793299987912,
+      "p50_latency_ms": 12918.63379997015,
+      "p95_latency_ms": 16434.964765003322,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "realtime": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13885.101849973202,
+      "p50_latency_ms": 13826.323699980974,
+      "p95_latency_ms": 14867.712269958853,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "static_global_graph": {
+      "query_count": 4,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 13336.124025002122,
+      "p50_latency_ms": 13415.476950019598,
+      "p95_latency_ms": 13514.329230004549,
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "pairwise_stage2_deltas": [
+    {
+      "mode_a": "precomputed",
+      "mode_b": "realtime",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": -205.30854998528957
+    },
+    {
+      "mode_a": "precomputed",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 343.66927498579025
+    },
+    {
+      "mode_a": "realtime",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 548.9778249710798
+    }
+  ],
+  "config": {
+    "embedding_backend": "fastembed",
+    "embedding_model": "code",
+    "embedding_use_gpu": false,
+    "reranker_backend": "onnx",
+    "reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
+    "enable_staged_rerank": true,
+    "enable_cross_encoder_rerank": true
+  },
+  "evaluations": [
+    {
+      "query": "executeHybridMode dense_rerank semantic smart_search",
+      "intent": "ccw-semantic-routing",
+      "notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 38829.27079999447,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
+            "d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
+            "d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 16915.833400011063,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 13961.2567999959,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 12986.330999970436,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    },
+    {
+      "query": "parse CodexLens JSON output strip ANSI smart_search",
+      "intent": "ccw-json-fallback",
+      "notes": "Covers JSON/plain-text fallback handling for CodexLens output.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 14782.901199996471,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\codex-lens-lsp.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\queue\\queueexecuteinsession.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-dashboard\\queuepanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usewebsocket.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useflows.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-error-monitoring.spec.ts",
+            "d:\\claude_dms3\\ccw\\tests\\native-session-discovery.test.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\services\\checkpoint-service.ts",
+            "d:\\claude_dms3\\ccw\\tests\\integration\\system-routes.test.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 13710.042499959469,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
+            "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 15027.674999952316,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
+            "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 13389.622500002384,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
+            "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    },
+    {
+      "query": "smart_search init embed search action schema",
+      "intent": "ccw-action-schema",
+      "notes": "Find the Zod schema that defines init/embed/search actions.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 13661.594299972057,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\discovery.spec.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\__tests__\\ask-question.test.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\a2ui\\a2uiwebsockethandler.js",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\dashboard.spec.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 12127.225099980831,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 12860.084999978542,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 13441.331400036812,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
+            "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    },
+    {
+      "query": "auto init missing job dedupe smart_search",
+      "intent": "ccw-auto-init",
+      "notes": "Targets background init/embed warmup and dedupe state.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "latency_ms": 13413.994400024414,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\memory-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usememory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\batchoperationtoolbar.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\memory.spec.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useprompthistory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\flowstore.ts",
+            "d:\\claude_dms3\\ccw\\src\\services\\deepwiki-service.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\claude-routes.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "latency_ms": 11966.072200000286,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "latency_ms": 13691.39059996605,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "latency_ms": 13527.211199998856,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
+            "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "error": null
+        }
+      }
+    }
+  ]
+}
--- a/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json
+++ b/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json
@@ -0,0 +1,415 @@
+{
+  "timestamp": "2026-03-15 00:19:16",
+  "source": "D:\\Claude_dms3",
+  "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
+  "query_count": 1,
+  "k": 10,
+  "coarse_k": 100,
+  "local_only": true,
+  "strategies": {
+    "auto": {
+      "query_count": 1,
+      "hit_at_k": 1.0,
+      "mrr_at_k": 1.0,
+      "avg_recall_at_k": 1.0,
+      "avg_latency_ms": 1377.3565999865532,
+      "p50_latency_ms": 1377.3565999865532,
+      "p95_latency_ms": 1377.3565999865532,
+      "avg_generated_artifact_count": 0.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 0,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "fts": 1
+      },
+      "errors": 0,
+      "strategy": "auto",
+      "stage2_mode": null
+    },
+    "fts": {
+      "query_count": 1,
+      "hit_at_k": 1.0,
+      "mrr_at_k": 1.0,
+      "avg_recall_at_k": 1.0,
+      "avg_latency_ms": 1460.0819000601768,
+      "p50_latency_ms": 1460.0819000601768,
+      "p95_latency_ms": 1460.0819000601768,
+      "avg_generated_artifact_count": 0.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 0,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "fts": 1
+      },
+      "errors": 0,
+      "strategy": "fts",
+      "stage2_mode": null
+    },
+    "hybrid": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 45991.74140000343,
+      "p50_latency_ms": 45991.74140000343,
+      "p95_latency_ms": 45991.74140000343,
+      "avg_generated_artifact_count": 0.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 0,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "hybrid": 1
+      },
+      "errors": 0,
+      "strategy": "hybrid",
+      "stage2_mode": null
+    },
+    "dense_rerank": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 22739.62610000372,
+      "p50_latency_ms": 22739.62610000372,
+      "p95_latency_ms": 22739.62610000372,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 2.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 1,
+      "effective_methods": {
+        "dense_rerank": 1
+      },
+      "errors": 0,
+      "strategy": "dense_rerank",
+      "stage2_mode": null
+    },
+    "staged:precomputed": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14900.017599999905,
+      "p50_latency_ms": 14900.017599999905,
+      "p95_latency_ms": 14900.017599999905,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "staged:realtime": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14104.314599990845,
+      "p50_latency_ms": 14104.314599990845,
+      "p95_latency_ms": 14104.314599990845,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "staged:static_global_graph": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 11906.852500021458,
+      "p50_latency_ms": 11906.852500021458,
+      "p95_latency_ms": 11906.852500021458,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "stage2_mode_matrix": {
+    "precomputed": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14900.017599999905,
+      "p50_latency_ms": 14900.017599999905,
+      "p95_latency_ms": 14900.017599999905,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "precomputed"
+    },
+    "realtime": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 14104.314599990845,
+      "p50_latency_ms": 14104.314599990845,
+      "p95_latency_ms": 14104.314599990845,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "realtime"
+    },
+    "static_global_graph": {
+      "query_count": 1,
+      "hit_at_k": 0.0,
+      "mrr_at_k": 0.0,
+      "avg_recall_at_k": 0.0,
+      "avg_latency_ms": 11906.852500021458,
+      "p50_latency_ms": 11906.852500021458,
+      "p95_latency_ms": 11906.852500021458,
+      "avg_generated_artifact_count": 1.0,
+      "avg_test_file_count": 0.0,
+      "runs_with_generated_artifacts": 1,
+      "runs_with_test_files": 0,
+      "effective_methods": {
+        "staged": 1
+      },
+      "errors": 0,
+      "strategy": "staged",
+      "stage2_mode": "static_global_graph"
+    }
+  },
+  "pairwise_stage2_deltas": [
+    {
+      "mode_a": "precomputed",
+      "mode_b": "realtime",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 795.7030000090599
+    },
+    {
+      "mode_a": "precomputed",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 2993.165099978447
+    },
+    {
+      "mode_a": "realtime",
+      "mode_b": "static_global_graph",
+      "hit_at_k_delta": 0.0,
+      "mrr_at_k_delta": 0.0,
+      "avg_recall_at_k_delta": 0.0,
+      "avg_latency_ms_delta": 2197.462099969387
+    }
+  ],
+  "config": {
+    "embedding_backend": "fastembed",
+    "embedding_model": "code",
+    "embedding_use_gpu": false,
+    "reranker_backend": "onnx",
+    "reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
+    "reranker_use_gpu": false,
+    "enable_staged_rerank": true,
+    "enable_cross_encoder_rerank": true
+  },
+  "evaluations": [
+    {
+      "query": "executeHybridMode dense_rerank semantic smart_search",
+      "intent": "ccw-semantic-routing",
+      "notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
+      "relevant_paths": [
+        "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+      ],
+      "runs": {
+        "auto": {
+          "strategy_key": "auto",
+          "strategy": "auto",
+          "stage2_mode": null,
+          "effective_method": "fts",
+          "execution_method": "fts",
+          "latency_ms": 1377.3565999865532,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+          ],
+          "first_hit_rank": 1,
+          "hit_at_k": true,
+          "recall_at_k": 1.0,
+          "generated_artifact_count": 0,
+          "test_file_count": 0,
+          "error": null
+        },
+        "fts": {
+          "strategy_key": "fts",
+          "strategy": "fts",
+          "stage2_mode": null,
+          "effective_method": "fts",
+          "execution_method": "fts",
+          "latency_ms": 1460.0819000601768,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
+          ],
+          "first_hit_rank": 1,
+          "hit_at_k": true,
+          "recall_at_k": 1.0,
+          "generated_artifact_count": 0,
+          "test_file_count": 0,
+          "error": null
+        },
+        "hybrid": {
+          "strategy_key": "hybrid",
+          "strategy": "hybrid",
+          "stage2_mode": null,
+          "effective_method": "hybrid",
+          "execution_method": "hybrid",
+          "latency_ms": 45991.74140000343,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\config\\litellm-api-config-manager.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py",
+            "d:\\claude_dms3\\ccw\\src\\commands\\core-memory.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
+            "d:\\claude_dms3\\codex-lens\\scripts\\generate_embeddings.py",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\notification-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\team-msg.ts",
+            "d:\\claude_dms3\\ccw\\src\\types\\remote-notification.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\memory-store.ts",
+            "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 0,
+          "test_file_count": 0,
+          "error": null
+        },
+        "dense_rerank": {
+          "strategy_key": "dense_rerank",
+          "strategy": "dense_rerank",
+          "stage2_mode": null,
+          "effective_method": "dense_rerank",
+          "execution_method": "cascade",
+          "latency_ms": 22739.62610000372,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
+            "d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
+            "d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
+            "d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
+            "d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
+            "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 2,
+          "error": null
+        },
+        "staged:precomputed": {
+          "strategy_key": "staged:precomputed",
+          "strategy": "staged",
+          "stage2_mode": "precomputed",
+          "effective_method": "staged",
+          "execution_method": "cascade",
+          "latency_ms": 14900.017599999905,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 0,
+          "error": null
+        },
+        "staged:realtime": {
+          "strategy_key": "staged:realtime",
+          "strategy": "staged",
+          "stage2_mode": "realtime",
+          "effective_method": "staged",
+          "execution_method": "cascade",
+          "latency_ms": 14104.314599990845,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 0,
+          "error": null
+        },
+        "staged:static_global_graph": {
+          "strategy_key": "staged:static_global_graph",
+          "strategy": "staged",
+          "stage2_mode": "static_global_graph",
+          "effective_method": "staged",
+          "execution_method": "cascade",
+          "latency_ms": 11906.852500021458,
+          "topk_paths": [
+            "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
+            "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
+            "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
+            "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
+            "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
+            "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
+            "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
+          ],
+          "first_hit_rank": null,
+          "hit_at_k": false,
+          "recall_at_k": 0.0,
+          "generated_artifact_count": 1,
+          "test_file_count": 0,
+          "error": null
+        }
+      }
+    }
+  ]
+}