feat: enhance search, ranking, reranker and CLI tooling across ccw and codex-lens

Major improvements to smart-search, chain-search cascade, ranking pipeline,
reranker factory, CLI history store, codex-lens integration, and uv-manager.
Simplify command-generator skill by inlining phases. Add comprehensive tests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
catlog22
2026-03-16 20:35:08 +08:00
parent 1cd96b90e8
commit 5a4b18d9b1
73 changed files with 14684 additions and 2442 deletions

View File

@@ -0,0 +1,16 @@
{"query":"executeHybridMode dense_rerank semantic smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-semantic-routing","notes":"CCW semantic mode delegates to CodexLens dense_rerank."}
{"query":"parse CodexLens JSON output strip ANSI smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-json-fallback","notes":"Covers JSON/plain-text fallback handling for CodexLens output."}
{"query":"smart_search init embed search action schema","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-action-schema","notes":"Find the Zod schema that defines init/embed/search actions."}
{"query":"auto init missing job dedupe smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-auto-init","notes":"Targets background init/embed warmup and dedupe state."}
{"query":"smart_search exact mode fallback to CodexLens fts","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-exact-fallback","notes":"Tracks the exact-mode fallback path into CodexLens FTS."}
{"query":"smart_search settings snapshot embedding backend reranker backend staged stage2 mode","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-config-snapshot","notes":"Reads local config snapshot for embedding/reranker/staged pipeline settings."}
{"query":"embedding backend fastembed local litellm api config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-embedding-config","notes":"Local-only benchmark should resolve to fastembed defaults."}
{"query":"reranker backend onnx api legacy configuration","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-reranker-config","notes":"Covers both config dataclass fields and env overrides."}
{"query":"staged stage2 mode precomputed realtime static_global_graph","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-stage2-config","notes":"Benchmark matrix should exercise the three supported stage2 modes."}
{"query":"enable staged rerank stage 4 config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-stage4-rerank","notes":"Stage 4 rerank flag needs to stay enabled for local benchmarks."}
{"query":"cascade_search dense_rerank staged pipeline ChainSearchEngine","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-cascade","notes":"Baseline query for the central retrieval engine."}
{"query":"realtime LSP expand stage2 search pipeline","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-realtime","notes":"Targets realtime stage2 expansion logic."}
{"query":"static global graph stage2 expansion implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-static","notes":"Targets static_global_graph stage2 expansion logic."}
{"query":"cross encoder rerank stage 4 implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-rerank","notes":"Relevant for dense_rerank and staged rerank latency comparisons."}
{"query":"get_reranker factory onnx backend selection","relevant_paths":["codex-lens/src/codexlens/semantic/reranker/factory.py"],"intent":"reranker-factory","notes":"Keeps the benchmark aligned with local ONNX reranker selection."}
{"query":"EMBEDDING_BACKEND and RERANKER_BACKEND environment variables","relevant_paths":["codex-lens/src/codexlens/env_config.py"],"intent":"env-overrides","notes":"Covers CCW/CodexLens local-only environment overrides."}

View File

@@ -239,6 +239,7 @@ def main() -> None:
config.staged_clustering_strategy = str(args.staged_cluster_strategy or "path").strip().lower()
# Stability: on some Windows setups, DirectML/ONNX can crash under load.
config.embedding_use_gpu = False
config.reranker_use_gpu = False
registry = RegistryStore()
registry.initialize()
@@ -362,4 +363,3 @@ def main() -> None:
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,980 @@
#!/usr/bin/env python
"""Benchmark local-only staged stage2 modes for CCW smart_search queries.
This benchmark reuses the existing CodexLens benchmark style, but focuses on
the real search intents that drive CCW `smart_search`. It evaluates:
1. `dense_rerank` baseline
2. `staged` + `precomputed`
3. `staged` + `realtime`
4. `staged` + `static_global_graph`
Metrics:
- Hit@K
- MRR@K
- Recall@K
- latency (avg/p50/p95)
The runner is intentionally local-only. By default it uses:
- embedding backend: `fastembed`
- reranker backend: `onnx`
Examples:
python benchmarks/compare_ccw_smart_search_stage2.py --dry-run
python benchmarks/compare_ccw_smart_search_stage2.py --self-check
python benchmarks/compare_ccw_smart_search_stage2.py --source .. --k 10
python benchmarks/compare_ccw_smart_search_stage2.py --embedding-model code --reranker-model cross-encoder/ms-marco-MiniLM-L-6-v2
"""
from __future__ import annotations
import argparse
from copy import deepcopy
import gc
import json
import os
import re
import statistics
import sys
import time
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from codexlens.config import Config
from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
from codexlens.search.ranking import (
QueryIntent,
detect_query_intent,
is_generated_artifact_path,
is_test_file,
query_prefers_lexical_search,
query_targets_generated_files,
)
from codexlens.storage.path_mapper import PathMapper
from codexlens.storage.registry import RegistryStore
DEFAULT_SOURCE = Path(__file__).resolve().parents[2]
DEFAULT_QUERIES_FILE = Path(__file__).parent / "accuracy_queries_ccw_smart_search.jsonl"
DEFAULT_OUTPUT = Path(__file__).parent / "results" / "ccw_smart_search_stage2.json"
VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph")
VALID_LOCAL_EMBEDDING_BACKENDS = ("fastembed",)
VALID_LOCAL_RERANKER_BACKENDS = ("onnx", "fastembed", "legacy")
VALID_BASELINE_METHODS = ("auto", "fts", "hybrid")
DEFAULT_LOCAL_ONNX_RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"
def _now_ms() -> float:
return time.perf_counter() * 1000.0
def _normalize_path_key(path: str) -> str:
try:
candidate = Path(path)
if str(candidate) and (candidate.is_absolute() or re.match(r"^[A-Za-z]:", str(candidate))):
normalized = str(candidate.resolve())
else:
normalized = str(candidate)
except Exception:
normalized = path
normalized = normalized.replace("/", "\\")
if os.name == "nt":
normalized = normalized.lower()
return normalized
def _dedup_topk(paths: Iterable[str], k: int) -> List[str]:
output: List[str] = []
seen: set[str] = set()
for path in paths:
if path in seen:
continue
seen.add(path)
output.append(path)
if len(output) >= k:
break
return output
def _first_hit_rank(topk_paths: Sequence[str], relevant: set[str]) -> Optional[int]:
for index, path in enumerate(topk_paths, start=1):
if path in relevant:
return index
return None
def _mrr(ranks: Sequence[Optional[int]]) -> float:
values = [1.0 / rank for rank in ranks if rank and rank > 0]
return statistics.mean(values) if values else 0.0
def _mean(values: Sequence[float]) -> float:
return statistics.mean(values) if values else 0.0
def _percentile(values: Sequence[float], percentile: float) -> float:
if not values:
return 0.0
ordered = sorted(values)
if len(ordered) == 1:
return ordered[0]
index = (len(ordered) - 1) * percentile
lower = int(index)
upper = min(lower + 1, len(ordered) - 1)
if lower == upper:
return ordered[lower]
fraction = index - lower
return ordered[lower] + (ordered[upper] - ordered[lower]) * fraction
def _load_labeled_queries(path: Path, limit: Optional[int]) -> List[Dict[str, Any]]:
if not path.is_file():
raise SystemExit(f"Queries file does not exist: {path}")
output: List[Dict[str, Any]] = []
for raw_line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
try:
item = json.loads(line)
except Exception as exc:
raise SystemExit(f"Invalid JSONL line in {path}: {raw_line!r} ({exc})") from exc
if not isinstance(item, dict) or "query" not in item or "relevant_paths" not in item:
raise SystemExit(f"Invalid query item (expected object with query/relevant_paths): {item!r}")
relevant_paths = item.get("relevant_paths")
if not isinstance(relevant_paths, list) or not relevant_paths:
raise SystemExit(f"Query item must include non-empty relevant_paths[]: {item!r}")
output.append(item)
if limit is not None and len(output) >= limit:
break
return output
def _resolve_expected_paths(source_root: Path, paths: Sequence[str]) -> Tuple[List[str], set[str], List[str]]:
resolved_display: List[str] = []
resolved_keys: set[str] = set()
missing: List[str] = []
for raw_path in paths:
candidate = Path(raw_path)
if not candidate.is_absolute():
candidate = (source_root / candidate).resolve()
if not candidate.exists():
missing.append(str(candidate))
resolved_display.append(str(candidate))
resolved_keys.add(_normalize_path_key(str(candidate)))
return resolved_display, resolved_keys, missing
def _validate_local_only_backends(embedding_backend: str, reranker_backend: str) -> None:
if embedding_backend not in VALID_LOCAL_EMBEDDING_BACKENDS:
raise SystemExit(
"This runner is local-only. "
f"--embedding-backend must be one of {', '.join(VALID_LOCAL_EMBEDDING_BACKENDS)}; got {embedding_backend!r}"
)
if reranker_backend not in VALID_LOCAL_RERANKER_BACKENDS:
raise SystemExit(
"This runner is local-only. "
f"--reranker-backend must be one of {', '.join(VALID_LOCAL_RERANKER_BACKENDS)}; got {reranker_backend!r}"
)
def _validate_stage2_modes(stage2_modes: Sequence[str]) -> List[str]:
normalized = [str(mode).strip().lower() for mode in stage2_modes if str(mode).strip()]
if not normalized:
raise SystemExit("At least one --stage2-modes entry is required")
invalid = [mode for mode in normalized if mode not in VALID_STAGE2_MODES]
if invalid:
raise SystemExit(
f"Invalid --stage2-modes entry: {invalid[0]} "
f"(valid: {', '.join(VALID_STAGE2_MODES)})"
)
deduped: List[str] = []
seen: set[str] = set()
for mode in normalized:
if mode in seen:
continue
seen.add(mode)
deduped.append(mode)
return deduped
def _validate_baseline_methods(methods: Sequence[str]) -> List[str]:
normalized = [str(method).strip().lower() for method in methods if str(method).strip()]
invalid = [method for method in normalized if method not in VALID_BASELINE_METHODS]
if invalid:
raise SystemExit(
f"Invalid --baseline-methods entry: {invalid[0]} "
f"(valid: {', '.join(VALID_BASELINE_METHODS)})"
)
deduped: List[str] = []
seen: set[str] = set()
for method in normalized:
if method in seen:
continue
seen.add(method)
deduped.append(method)
return deduped
@dataclass
class StrategyRun:
strategy_key: str
strategy: str
stage2_mode: Optional[str]
effective_method: str
execution_method: str
latency_ms: float
topk_paths: List[str]
first_hit_rank: Optional[int]
hit_at_k: bool
recall_at_k: float
generated_artifact_count: int
test_file_count: int
error: Optional[str] = None
@dataclass
class QueryEvaluation:
query: str
intent: Optional[str]
notes: Optional[str]
relevant_paths: List[str]
runs: Dict[str, StrategyRun]
@dataclass
class PairwiseDelta:
mode_a: str
mode_b: str
hit_at_k_delta: float
mrr_at_k_delta: float
avg_recall_at_k_delta: float
avg_latency_ms_delta: float
@dataclass
class StrategySpec:
strategy_key: str
strategy: str
stage2_mode: Optional[str]
@dataclass
class StrategyRuntime:
strategy_spec: StrategySpec
config: Config
registry: RegistryStore
engine: ChainSearchEngine
def _strategy_specs(
stage2_modes: Sequence[str],
include_dense_baseline: bool,
*,
baseline_methods: Sequence[str],
) -> List[StrategySpec]:
specs: List[StrategySpec] = []
for method in baseline_methods:
specs.append(StrategySpec(strategy_key=method, strategy=method, stage2_mode=None))
if include_dense_baseline:
specs.append(StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None))
for stage2_mode in stage2_modes:
specs.append(
StrategySpec(
strategy_key=f"staged:{stage2_mode}",
strategy="staged",
stage2_mode=stage2_mode,
)
)
return specs
def _build_strategy_runtime(base_config: Config, strategy_spec: StrategySpec) -> StrategyRuntime:
runtime_config = deepcopy(base_config)
registry = RegistryStore()
registry.initialize()
mapper = PathMapper()
engine = ChainSearchEngine(registry=registry, mapper=mapper, config=runtime_config)
return StrategyRuntime(
strategy_spec=strategy_spec,
config=runtime_config,
registry=registry,
engine=engine,
)
def _select_effective_method(query: str, requested_method: str) -> str:
requested = str(requested_method).strip().lower()
if requested != "auto":
return requested
if query_targets_generated_files(query) or query_prefers_lexical_search(query):
return "fts"
intent = detect_query_intent(query)
if intent == QueryIntent.KEYWORD:
return "fts"
if intent == QueryIntent.SEMANTIC:
return "dense_rerank"
return "hybrid"
def _filter_dataset_by_query_match(
dataset: Sequence[Dict[str, Any]],
query_match: Optional[str],
) -> List[Dict[str, Any]]:
"""Filter labeled queries by case-insensitive substring match."""
needle = str(query_match or "").strip().casefold()
if not needle:
return list(dataset)
return [
dict(item)
for item in dataset
if needle in str(item.get("query", "")).casefold()
]
def _apply_query_limit(
dataset: Sequence[Dict[str, Any]],
query_limit: Optional[int],
) -> List[Dict[str, Any]]:
"""Apply the optional query limit after any dataset-level filtering."""
if query_limit is None:
return list(dataset)
return [dict(item) for item in list(dataset)[: max(0, int(query_limit))]]
def _write_json_payload(path: Path, payload: Dict[str, Any]) -> None:
"""Persist a benchmark payload as UTF-8 JSON."""
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def _write_final_outputs(
*,
output_path: Path,
progress_output: Optional[Path],
payload: Dict[str, Any],
) -> None:
"""Persist the final completed payload to both result and progress outputs."""
_write_json_payload(output_path, payload)
if progress_output is not None:
_write_json_payload(progress_output, payload)
def _make_progress_payload(
*,
args: argparse.Namespace,
source_root: Path,
strategy_specs: Sequence[StrategySpec],
evaluations: Sequence[QueryEvaluation],
query_index: int,
total_queries: int,
run_index: int,
total_runs: int,
current_query: str,
current_strategy_key: str,
) -> Dict[str, Any]:
"""Create a partial progress snapshot for long benchmark runs."""
return {
"status": "running",
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"source": str(source_root),
"queries_file": str(args.queries_file),
"query_count": len(evaluations),
"planned_query_count": total_queries,
"k": int(args.k),
"coarse_k": int(args.coarse_k),
"strategy_keys": [spec.strategy_key for spec in strategy_specs],
"progress": {
"completed_queries": query_index,
"total_queries": total_queries,
"completed_runs": run_index,
"total_runs": total_runs,
"current_query": current_query,
"current_strategy_key": current_strategy_key,
},
"evaluations": [
{
"query": evaluation.query,
"intent": evaluation.intent,
"notes": evaluation.notes,
"relevant_paths": evaluation.relevant_paths,
"runs": {key: asdict(run) for key, run in evaluation.runs.items()},
}
for evaluation in evaluations
],
}
def _make_search_options(method: str, *, k: int) -> SearchOptions:
normalized = str(method).strip().lower()
if normalized == "fts":
return SearchOptions(
total_limit=k,
hybrid_mode=False,
enable_fuzzy=False,
enable_vector=False,
pure_vector=False,
enable_cascade=False,
)
if normalized == "hybrid":
return SearchOptions(
total_limit=k,
hybrid_mode=True,
enable_fuzzy=False,
enable_vector=True,
pure_vector=False,
enable_cascade=False,
)
if normalized in {"dense_rerank", "staged"}:
return SearchOptions(
total_limit=k,
hybrid_mode=True,
enable_fuzzy=False,
enable_vector=True,
pure_vector=False,
enable_cascade=True,
)
raise ValueError(f"Unsupported benchmark method: {method}")
def _run_strategy(
engine: ChainSearchEngine,
config: Config,
*,
strategy_spec: StrategySpec,
query: str,
source_path: Path,
k: int,
coarse_k: int,
relevant: set[str],
) -> StrategyRun:
gc.collect()
effective_method = _select_effective_method(query, strategy_spec.strategy)
execution_method = "cascade" if effective_method in {"dense_rerank", "staged"} else effective_method
previous_cascade_strategy = getattr(config, "cascade_strategy", None)
previous_stage2_mode = getattr(config, "staged_stage2_mode", None)
start_ms = _now_ms()
try:
options = _make_search_options(
"staged" if strategy_spec.strategy == "staged" else effective_method,
k=k,
)
if strategy_spec.strategy == "staged":
config.cascade_strategy = "staged"
if strategy_spec.stage2_mode:
config.staged_stage2_mode = strategy_spec.stage2_mode
result = engine.cascade_search(
query=query,
source_path=source_path,
k=k,
coarse_k=coarse_k,
options=options,
strategy="staged",
)
elif effective_method == "dense_rerank":
config.cascade_strategy = "dense_rerank"
result = engine.cascade_search(
query=query,
source_path=source_path,
k=k,
coarse_k=coarse_k,
options=options,
strategy="dense_rerank",
)
else:
result = engine.search(
query=query,
source_path=source_path,
options=options,
)
latency_ms = _now_ms() - start_ms
paths_raw = [item.path for item in (result.results or []) if getattr(item, "path", None)]
topk = _dedup_topk((_normalize_path_key(path) for path in paths_raw), k=k)
rank = _first_hit_rank(topk, relevant)
recall = 0.0
if relevant:
recall = len(set(topk) & relevant) / float(len(relevant))
return StrategyRun(
strategy_key=strategy_spec.strategy_key,
strategy=strategy_spec.strategy,
stage2_mode=strategy_spec.stage2_mode,
effective_method=effective_method,
execution_method=execution_method,
latency_ms=latency_ms,
topk_paths=topk,
first_hit_rank=rank,
hit_at_k=rank is not None,
recall_at_k=recall,
generated_artifact_count=sum(1 for path in topk if is_generated_artifact_path(path)),
test_file_count=sum(1 for path in topk if is_test_file(path)),
error=None,
)
except Exception as exc:
latency_ms = _now_ms() - start_ms
return StrategyRun(
strategy_key=strategy_spec.strategy_key,
strategy=strategy_spec.strategy,
stage2_mode=strategy_spec.stage2_mode,
effective_method=effective_method,
execution_method=execution_method,
latency_ms=latency_ms,
topk_paths=[],
first_hit_rank=None,
hit_at_k=False,
recall_at_k=0.0,
generated_artifact_count=0,
test_file_count=0,
error=f"{type(exc).__name__}: {exc}",
)
finally:
config.cascade_strategy = previous_cascade_strategy
config.staged_stage2_mode = previous_stage2_mode
def _summarize_runs(runs: Sequence[StrategyRun]) -> Dict[str, Any]:
latencies = [run.latency_ms for run in runs if not run.error]
ranks = [run.first_hit_rank for run in runs]
effective_method_counts: Dict[str, int] = {}
for run in runs:
effective_method_counts[run.effective_method] = effective_method_counts.get(run.effective_method, 0) + 1
return {
"query_count": len(runs),
"hit_at_k": _mean([1.0 if run.hit_at_k else 0.0 for run in runs]),
"mrr_at_k": _mrr(ranks),
"avg_recall_at_k": _mean([run.recall_at_k for run in runs]),
"avg_latency_ms": _mean(latencies),
"p50_latency_ms": _percentile(latencies, 0.50),
"p95_latency_ms": _percentile(latencies, 0.95),
"avg_generated_artifact_count": _mean([float(run.generated_artifact_count) for run in runs]),
"avg_test_file_count": _mean([float(run.test_file_count) for run in runs]),
"runs_with_generated_artifacts": sum(1 for run in runs if run.generated_artifact_count > 0),
"runs_with_test_files": sum(1 for run in runs if run.test_file_count > 0),
"effective_methods": effective_method_counts,
"errors": sum(1 for run in runs if run.error),
}
def _build_pairwise_deltas(stage2_summaries: Dict[str, Dict[str, Any]]) -> List[PairwiseDelta]:
modes = list(stage2_summaries.keys())
deltas: List[PairwiseDelta] = []
for left_index in range(len(modes)):
for right_index in range(left_index + 1, len(modes)):
left = modes[left_index]
right = modes[right_index]
left_summary = stage2_summaries[left]
right_summary = stage2_summaries[right]
deltas.append(
PairwiseDelta(
mode_a=left,
mode_b=right,
hit_at_k_delta=left_summary["hit_at_k"] - right_summary["hit_at_k"],
mrr_at_k_delta=left_summary["mrr_at_k"] - right_summary["mrr_at_k"],
avg_recall_at_k_delta=left_summary["avg_recall_at_k"] - right_summary["avg_recall_at_k"],
avg_latency_ms_delta=left_summary["avg_latency_ms"] - right_summary["avg_latency_ms"],
)
)
return deltas
def _make_plan_payload(
*,
args: argparse.Namespace,
source_root: Path,
dataset: Sequence[Dict[str, Any]],
baseline_methods: Sequence[str],
stage2_modes: Sequence[str],
strategy_specs: Sequence[StrategySpec],
) -> Dict[str, Any]:
return {
"mode": "dry-run" if args.dry_run else "self-check",
"local_only": True,
"source": str(source_root),
"queries_file": str(args.queries_file),
"query_count": len(dataset),
"query_match": args.query_match,
"k": int(args.k),
"coarse_k": int(args.coarse_k),
"baseline_methods": list(baseline_methods),
"stage2_modes": list(stage2_modes),
"strategy_keys": [spec.strategy_key for spec in strategy_specs],
"local_backends": {
"embedding_backend": args.embedding_backend,
"embedding_model": args.embedding_model,
"reranker_backend": args.reranker_backend,
"reranker_model": args.reranker_model,
"embedding_use_gpu": bool(args.embedding_use_gpu),
"reranker_use_gpu": bool(args.reranker_use_gpu),
},
"output": str(args.output),
"progress_output": str(args.progress_output) if args.progress_output else None,
"dataset_preview": [
{
"query": item.get("query"),
"intent": item.get("intent"),
"relevant_paths": item.get("relevant_paths"),
}
for item in list(dataset)[: min(3, len(dataset))]
],
}
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--source",
type=Path,
default=DEFAULT_SOURCE,
help="Source root to benchmark. Defaults to the repository root so CCW and CodexLens paths resolve together.",
)
parser.add_argument(
"--queries-file",
type=Path,
default=DEFAULT_QUERIES_FILE,
help="Labeled JSONL dataset of CCW smart_search queries",
)
parser.add_argument("--query-limit", type=int, default=None, help="Optional query limit")
parser.add_argument(
"--query-match",
type=str,
default=None,
help="Optional case-insensitive substring filter for selecting specific benchmark queries.",
)
parser.add_argument("--k", type=int, default=10, help="Top-k to evaluate")
parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k")
parser.add_argument(
"--baseline-methods",
nargs="*",
default=list(VALID_BASELINE_METHODS),
help="Requested smart_search baselines to compare before staged modes (valid: auto, fts, hybrid).",
)
parser.add_argument(
"--stage2-modes",
nargs="*",
default=list(VALID_STAGE2_MODES),
help="Stage-2 modes to compare",
)
parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per strategy")
parser.add_argument(
"--embedding-backend",
default="fastembed",
help="Local embedding backend. This runner only accepts fastembed.",
)
parser.add_argument(
"--embedding-model",
default="code",
help="Embedding model/profile for the local embedding backend",
)
parser.add_argument(
"--embedding-use-gpu",
action="store_true",
help="Enable GPU acceleration for local embeddings. Off by default for stability.",
)
parser.add_argument(
"--reranker-backend",
default="onnx",
help="Local reranker backend. Supported local values: onnx, fastembed, legacy.",
)
parser.add_argument(
"--reranker-model",
default=DEFAULT_LOCAL_ONNX_RERANKER_MODEL,
help="Reranker model name for the local reranker backend",
)
parser.add_argument(
"--reranker-use-gpu",
action="store_true",
help="Enable GPU acceleration for the local reranker. Off by default for stability.",
)
parser.add_argument(
"--skip-dense-baseline",
action="store_true",
help="Only compare staged stage2 modes and skip the dense_rerank baseline.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Validate dataset/config and print the benchmark plan without running retrieval.",
)
parser.add_argument(
"--self-check",
action="store_true",
help="Smoke-check the entrypoint by validating dataset, source paths, and stage matrix wiring.",
)
parser.add_argument(
"--output",
type=Path,
default=DEFAULT_OUTPUT,
help="Output JSON path",
)
parser.add_argument(
"--progress-output",
type=Path,
default=None,
help="Optional JSON path updated after each query with partial progress and completed runs.",
)
return parser
def main() -> None:
parser = build_parser()
args = parser.parse_args()
source_root = args.source.expanduser().resolve()
if not source_root.exists():
raise SystemExit(f"Source path does not exist: {source_root}")
if int(args.k) <= 0:
raise SystemExit("--k must be > 0")
if int(args.coarse_k) <= 0:
raise SystemExit("--coarse-k must be > 0")
if int(args.coarse_k) < int(args.k):
raise SystemExit("--coarse-k must be >= --k")
if int(args.warmup) < 0:
raise SystemExit("--warmup must be >= 0")
embedding_backend = str(args.embedding_backend).strip().lower()
reranker_backend = str(args.reranker_backend).strip().lower()
_validate_local_only_backends(embedding_backend, reranker_backend)
baseline_methods = _validate_baseline_methods(args.baseline_methods)
stage2_modes = _validate_stage2_modes(args.stage2_modes)
dataset = _load_labeled_queries(args.queries_file, None)
dataset = _filter_dataset_by_query_match(dataset, args.query_match)
dataset = _apply_query_limit(dataset, args.query_limit)
if not dataset:
raise SystemExit("No queries to run")
missing_paths: List[str] = []
for item in dataset:
_, _, item_missing = _resolve_expected_paths(source_root, [str(path) for path in item["relevant_paths"]])
missing_paths.extend(item_missing)
if missing_paths:
preview = ", ".join(missing_paths[:3])
raise SystemExit(
"Dataset relevant_paths do not resolve under the selected source root. "
f"Examples: {preview}"
)
strategy_specs = _strategy_specs(
stage2_modes,
include_dense_baseline=not args.skip_dense_baseline,
baseline_methods=baseline_methods,
)
if args.dry_run or args.self_check:
payload = _make_plan_payload(
args=args,
source_root=source_root,
dataset=dataset,
baseline_methods=baseline_methods,
stage2_modes=stage2_modes,
strategy_specs=strategy_specs,
)
if args.self_check:
payload["status"] = "ok"
payload["checks"] = {
"dataset_loaded": True,
"stage2_matrix_size": len(stage2_modes),
"local_only_validation": True,
"source_path_exists": True,
}
print(json.dumps(payload, ensure_ascii=False, indent=2))
return
config = Config.load()
config.cascade_strategy = "staged"
config.enable_staged_rerank = True
config.enable_cross_encoder_rerank = True
config.embedding_backend = embedding_backend
config.embedding_model = str(args.embedding_model).strip()
config.embedding_use_gpu = bool(args.embedding_use_gpu)
config.embedding_auto_embed_missing = False
config.reranker_backend = reranker_backend
config.reranker_model = str(args.reranker_model).strip()
config.reranker_use_gpu = bool(args.reranker_use_gpu)
strategy_runtimes = {
spec.strategy_key: _build_strategy_runtime(config, spec)
for spec in strategy_specs
}
evaluations: List[QueryEvaluation] = []
total_queries = len(dataset)
total_runs = total_queries * len(strategy_specs)
completed_runs = 0
try:
if int(args.warmup) > 0:
warm_query = str(dataset[0]["query"]).strip()
warm_relevant_paths = [str(path) for path in dataset[0]["relevant_paths"]]
_, warm_relevant, _ = _resolve_expected_paths(source_root, warm_relevant_paths)
for spec in strategy_specs:
runtime = strategy_runtimes[spec.strategy_key]
for _ in range(int(args.warmup)):
_run_strategy(
runtime.engine,
runtime.config,
strategy_spec=spec,
query=warm_query,
source_path=source_root,
k=min(int(args.k), 5),
coarse_k=min(int(args.coarse_k), 50),
relevant=warm_relevant,
)
for index, item in enumerate(dataset, start=1):
query = str(item.get("query", "")).strip()
if not query:
continue
print(f"[query {index}/{total_queries}] {query}", flush=True)
relevant_paths, relevant, _ = _resolve_expected_paths(
source_root,
[str(path) for path in item["relevant_paths"]],
)
runs: Dict[str, StrategyRun] = {}
for spec in strategy_specs:
if args.progress_output is not None:
_write_json_payload(
args.progress_output,
_make_progress_payload(
args=args,
source_root=source_root,
strategy_specs=strategy_specs,
evaluations=evaluations,
query_index=index - 1,
total_queries=total_queries,
run_index=completed_runs,
total_runs=total_runs,
current_query=query,
current_strategy_key=spec.strategy_key,
),
)
print(
f"[run {completed_runs + 1}/{total_runs}] "
f"strategy={spec.strategy_key} query={query}",
flush=True,
)
runtime = strategy_runtimes[spec.strategy_key]
runs[spec.strategy_key] = _run_strategy(
runtime.engine,
runtime.config,
strategy_spec=spec,
query=query,
source_path=source_root,
k=int(args.k),
coarse_k=int(args.coarse_k),
relevant=relevant,
)
completed_runs += 1
run = runs[spec.strategy_key]
outcome = "error" if run.error else "ok"
print(
f"[done {completed_runs}/{total_runs}] "
f"strategy={spec.strategy_key} outcome={outcome} "
f"latency_ms={run.latency_ms:.2f} "
f"first_hit_rank={run.first_hit_rank}",
flush=True,
)
evaluations.append(
QueryEvaluation(
query=query,
intent=str(item.get("intent")) if item.get("intent") is not None else None,
notes=str(item.get("notes")) if item.get("notes") is not None else None,
relevant_paths=relevant_paths,
runs=runs,
)
)
if args.progress_output is not None:
_write_json_payload(
args.progress_output,
_make_progress_payload(
args=args,
source_root=source_root,
strategy_specs=strategy_specs,
evaluations=evaluations,
query_index=index,
total_queries=total_queries,
run_index=completed_runs,
total_runs=total_runs,
current_query=query,
current_strategy_key="complete",
),
)
finally:
for runtime in strategy_runtimes.values():
try:
runtime.engine.close()
except Exception:
pass
for runtime in strategy_runtimes.values():
try:
runtime.registry.close()
except Exception:
pass
strategy_summaries: Dict[str, Dict[str, Any]] = {}
for spec in strategy_specs:
spec_runs = [evaluation.runs[spec.strategy_key] for evaluation in evaluations if spec.strategy_key in evaluation.runs]
summary = _summarize_runs(spec_runs)
summary["strategy"] = spec.strategy
summary["stage2_mode"] = spec.stage2_mode
strategy_summaries[spec.strategy_key] = summary
stage2_mode_matrix = {
mode: strategy_summaries[f"staged:{mode}"]
for mode in stage2_modes
if f"staged:{mode}" in strategy_summaries
}
pairwise_deltas = [asdict(item) for item in _build_pairwise_deltas(stage2_mode_matrix)]
payload = {
"status": "completed",
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"source": str(source_root),
"queries_file": str(args.queries_file),
"query_count": len(evaluations),
"query_match": args.query_match,
"k": int(args.k),
"coarse_k": int(args.coarse_k),
"local_only": True,
"strategies": strategy_summaries,
"stage2_mode_matrix": stage2_mode_matrix,
"pairwise_stage2_deltas": pairwise_deltas,
"config": {
"embedding_backend": config.embedding_backend,
"embedding_model": config.embedding_model,
"embedding_use_gpu": bool(config.embedding_use_gpu),
"reranker_backend": config.reranker_backend,
"reranker_model": config.reranker_model,
"reranker_use_gpu": bool(config.reranker_use_gpu),
"enable_staged_rerank": bool(config.enable_staged_rerank),
"enable_cross_encoder_rerank": bool(config.enable_cross_encoder_rerank),
},
"progress_output": str(args.progress_output) if args.progress_output else None,
"evaluations": [
{
"query": evaluation.query,
"intent": evaluation.intent,
"notes": evaluation.notes,
"relevant_paths": evaluation.relevant_paths,
"runs": {key: asdict(run) for key, run in evaluation.runs.items()},
}
for evaluation in evaluations
],
}
_write_final_outputs(
output_path=args.output,
progress_output=args.progress_output,
payload=payload,
)
print(json.dumps(payload, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()

View File

@@ -280,8 +280,9 @@ def main() -> None:
if args.staged_cluster_strategy:
config.staged_clustering_strategy = str(args.staged_cluster_strategy)
# Stability: on some Windows setups, fastembed + DirectML can crash under load.
# Dense_rerank uses the embedding backend that matches the index; force CPU here.
# Force local embeddings and reranking onto CPU for reproducible benchmark runs.
config.embedding_use_gpu = False
config.reranker_use_gpu = False
registry = RegistryStore()
registry.initialize()
mapper = PathMapper()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,526 @@
{
"timestamp": "2026-03-14 23:16:55",
"source": "D:\\Claude_dms3",
"queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
"query_count": 4,
"k": 10,
"coarse_k": 100,
"local_only": true,
"strategies": {
"dense_rerank": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 20171.940174996853,
"p50_latency_ms": 14222.247749984264,
"p95_latency_ms": 35222.31535999476,
"errors": 0,
"strategy": "dense_rerank",
"stage2_mode": null
},
"staged:precomputed": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 13679.793299987912,
"p50_latency_ms": 12918.63379997015,
"p95_latency_ms": 16434.964765003322,
"errors": 0,
"strategy": "staged",
"stage2_mode": "precomputed"
},
"staged:realtime": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 13885.101849973202,
"p50_latency_ms": 13826.323699980974,
"p95_latency_ms": 14867.712269958853,
"errors": 0,
"strategy": "staged",
"stage2_mode": "realtime"
},
"staged:static_global_graph": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 13336.124025002122,
"p50_latency_ms": 13415.476950019598,
"p95_latency_ms": 13514.329230004549,
"errors": 0,
"strategy": "staged",
"stage2_mode": "static_global_graph"
}
},
"stage2_mode_matrix": {
"precomputed": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 13679.793299987912,
"p50_latency_ms": 12918.63379997015,
"p95_latency_ms": 16434.964765003322,
"errors": 0,
"strategy": "staged",
"stage2_mode": "precomputed"
},
"realtime": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 13885.101849973202,
"p50_latency_ms": 13826.323699980974,
"p95_latency_ms": 14867.712269958853,
"errors": 0,
"strategy": "staged",
"stage2_mode": "realtime"
},
"static_global_graph": {
"query_count": 4,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 13336.124025002122,
"p50_latency_ms": 13415.476950019598,
"p95_latency_ms": 13514.329230004549,
"errors": 0,
"strategy": "staged",
"stage2_mode": "static_global_graph"
}
},
"pairwise_stage2_deltas": [
{
"mode_a": "precomputed",
"mode_b": "realtime",
"hit_at_k_delta": 0.0,
"mrr_at_k_delta": 0.0,
"avg_recall_at_k_delta": 0.0,
"avg_latency_ms_delta": -205.30854998528957
},
{
"mode_a": "precomputed",
"mode_b": "static_global_graph",
"hit_at_k_delta": 0.0,
"mrr_at_k_delta": 0.0,
"avg_recall_at_k_delta": 0.0,
"avg_latency_ms_delta": 343.66927498579025
},
{
"mode_a": "realtime",
"mode_b": "static_global_graph",
"hit_at_k_delta": 0.0,
"mrr_at_k_delta": 0.0,
"avg_recall_at_k_delta": 0.0,
"avg_latency_ms_delta": 548.9778249710798
}
],
"config": {
"embedding_backend": "fastembed",
"embedding_model": "code",
"embedding_use_gpu": false,
"reranker_backend": "onnx",
"reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
"enable_staged_rerank": true,
"enable_cross_encoder_rerank": true
},
"evaluations": [
{
"query": "executeHybridMode dense_rerank semantic smart_search",
"intent": "ccw-semantic-routing",
"notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
"relevant_paths": [
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"runs": {
"dense_rerank": {
"strategy_key": "dense_rerank",
"strategy": "dense_rerank",
"stage2_mode": null,
"latency_ms": 38829.27079999447,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
"d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
"d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
"d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:precomputed": {
"strategy_key": "staged:precomputed",
"strategy": "staged",
"stage2_mode": "precomputed",
"latency_ms": 16915.833400011063,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:realtime": {
"strategy_key": "staged:realtime",
"strategy": "staged",
"stage2_mode": "realtime",
"latency_ms": 13961.2567999959,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:static_global_graph": {
"strategy_key": "staged:static_global_graph",
"strategy": "staged",
"stage2_mode": "static_global_graph",
"latency_ms": 12986.330999970436,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
}
}
},
{
"query": "parse CodexLens JSON output strip ANSI smart_search",
"intent": "ccw-json-fallback",
"notes": "Covers JSON/plain-text fallback handling for CodexLens output.",
"relevant_paths": [
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"runs": {
"dense_rerank": {
"strategy_key": "dense_rerank",
"strategy": "dense_rerank",
"stage2_mode": null,
"latency_ms": 14782.901199996471,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\codex-lens-lsp.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\queue\\queueexecuteinsession.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-dashboard\\queuepanel.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usewebsocket.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useflows.ts",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-error-monitoring.spec.ts",
"d:\\claude_dms3\\ccw\\tests\\native-session-discovery.test.ts",
"d:\\claude_dms3\\ccw\\src\\core\\services\\checkpoint-service.ts",
"d:\\claude_dms3\\ccw\\tests\\integration\\system-routes.test.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:precomputed": {
"strategy_key": "staged:precomputed",
"strategy": "staged",
"stage2_mode": "precomputed",
"latency_ms": 13710.042499959469,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
"d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:realtime": {
"strategy_key": "staged:realtime",
"strategy": "staged",
"stage2_mode": "realtime",
"latency_ms": 15027.674999952316,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
"d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:static_global_graph": {
"strategy_key": "staged:static_global_graph",
"strategy": "staged",
"stage2_mode": "static_global_graph",
"latency_ms": 13389.622500002384,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
"d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
}
}
},
{
"query": "smart_search init embed search action schema",
"intent": "ccw-action-schema",
"notes": "Find the Zod schema that defines init/embed/search actions.",
"relevant_paths": [
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"runs": {
"dense_rerank": {
"strategy_key": "dense_rerank",
"strategy": "dense_rerank",
"stage2_mode": null,
"latency_ms": 13661.594299972057,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\discovery.spec.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\__tests__\\ask-question.test.ts",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\a2ui\\a2uiwebsockethandler.js",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\dashboard.spec.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:precomputed": {
"strategy_key": "staged:precomputed",
"strategy": "staged",
"stage2_mode": "precomputed",
"latency_ms": 12127.225099980831,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:realtime": {
"strategy_key": "staged:realtime",
"strategy": "staged",
"stage2_mode": "realtime",
"latency_ms": 12860.084999978542,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:static_global_graph": {
"strategy_key": "staged:static_global_graph",
"strategy": "staged",
"stage2_mode": "static_global_graph",
"latency_ms": 13441.331400036812,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
}
}
},
{
"query": "auto init missing job dedupe smart_search",
"intent": "ccw-auto-init",
"notes": "Targets background init/embed warmup and dedupe state.",
"relevant_paths": [
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"runs": {
"dense_rerank": {
"strategy_key": "dense_rerank",
"strategy": "dense_rerank",
"stage2_mode": null,
"latency_ms": 13413.994400024414,
"topk_paths": [
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\memory-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usememory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\batchoperationtoolbar.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\memory.spec.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useprompthistory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\flowstore.ts",
"d:\\claude_dms3\\ccw\\src\\services\\deepwiki-service.ts",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\claude-routes.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:precomputed": {
"strategy_key": "staged:precomputed",
"strategy": "staged",
"stage2_mode": "precomputed",
"latency_ms": 11966.072200000286,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:realtime": {
"strategy_key": "staged:realtime",
"strategy": "staged",
"stage2_mode": "realtime",
"latency_ms": 13691.39059996605,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
},
"staged:static_global_graph": {
"strategy_key": "staged:static_global_graph",
"strategy": "staged",
"stage2_mode": "static_global_graph",
"latency_ms": 13527.211199998856,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"error": null
}
}
}
]
}

View File

@@ -0,0 +1,415 @@
{
"timestamp": "2026-03-15 00:19:16",
"source": "D:\\Claude_dms3",
"queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
"query_count": 1,
"k": 10,
"coarse_k": 100,
"local_only": true,
"strategies": {
"auto": {
"query_count": 1,
"hit_at_k": 1.0,
"mrr_at_k": 1.0,
"avg_recall_at_k": 1.0,
"avg_latency_ms": 1377.3565999865532,
"p50_latency_ms": 1377.3565999865532,
"p95_latency_ms": 1377.3565999865532,
"avg_generated_artifact_count": 0.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 0,
"runs_with_test_files": 0,
"effective_methods": {
"fts": 1
},
"errors": 0,
"strategy": "auto",
"stage2_mode": null
},
"fts": {
"query_count": 1,
"hit_at_k": 1.0,
"mrr_at_k": 1.0,
"avg_recall_at_k": 1.0,
"avg_latency_ms": 1460.0819000601768,
"p50_latency_ms": 1460.0819000601768,
"p95_latency_ms": 1460.0819000601768,
"avg_generated_artifact_count": 0.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 0,
"runs_with_test_files": 0,
"effective_methods": {
"fts": 1
},
"errors": 0,
"strategy": "fts",
"stage2_mode": null
},
"hybrid": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 45991.74140000343,
"p50_latency_ms": 45991.74140000343,
"p95_latency_ms": 45991.74140000343,
"avg_generated_artifact_count": 0.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 0,
"runs_with_test_files": 0,
"effective_methods": {
"hybrid": 1
},
"errors": 0,
"strategy": "hybrid",
"stage2_mode": null
},
"dense_rerank": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 22739.62610000372,
"p50_latency_ms": 22739.62610000372,
"p95_latency_ms": 22739.62610000372,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 2.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 1,
"effective_methods": {
"dense_rerank": 1
},
"errors": 0,
"strategy": "dense_rerank",
"stage2_mode": null
},
"staged:precomputed": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 14900.017599999905,
"p50_latency_ms": 14900.017599999905,
"p95_latency_ms": 14900.017599999905,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 0,
"effective_methods": {
"staged": 1
},
"errors": 0,
"strategy": "staged",
"stage2_mode": "precomputed"
},
"staged:realtime": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 14104.314599990845,
"p50_latency_ms": 14104.314599990845,
"p95_latency_ms": 14104.314599990845,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 0,
"effective_methods": {
"staged": 1
},
"errors": 0,
"strategy": "staged",
"stage2_mode": "realtime"
},
"staged:static_global_graph": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 11906.852500021458,
"p50_latency_ms": 11906.852500021458,
"p95_latency_ms": 11906.852500021458,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 0,
"effective_methods": {
"staged": 1
},
"errors": 0,
"strategy": "staged",
"stage2_mode": "static_global_graph"
}
},
"stage2_mode_matrix": {
"precomputed": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 14900.017599999905,
"p50_latency_ms": 14900.017599999905,
"p95_latency_ms": 14900.017599999905,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 0,
"effective_methods": {
"staged": 1
},
"errors": 0,
"strategy": "staged",
"stage2_mode": "precomputed"
},
"realtime": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 14104.314599990845,
"p50_latency_ms": 14104.314599990845,
"p95_latency_ms": 14104.314599990845,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 0,
"effective_methods": {
"staged": 1
},
"errors": 0,
"strategy": "staged",
"stage2_mode": "realtime"
},
"static_global_graph": {
"query_count": 1,
"hit_at_k": 0.0,
"mrr_at_k": 0.0,
"avg_recall_at_k": 0.0,
"avg_latency_ms": 11906.852500021458,
"p50_latency_ms": 11906.852500021458,
"p95_latency_ms": 11906.852500021458,
"avg_generated_artifact_count": 1.0,
"avg_test_file_count": 0.0,
"runs_with_generated_artifacts": 1,
"runs_with_test_files": 0,
"effective_methods": {
"staged": 1
},
"errors": 0,
"strategy": "staged",
"stage2_mode": "static_global_graph"
}
},
"pairwise_stage2_deltas": [
{
"mode_a": "precomputed",
"mode_b": "realtime",
"hit_at_k_delta": 0.0,
"mrr_at_k_delta": 0.0,
"avg_recall_at_k_delta": 0.0,
"avg_latency_ms_delta": 795.7030000090599
},
{
"mode_a": "precomputed",
"mode_b": "static_global_graph",
"hit_at_k_delta": 0.0,
"mrr_at_k_delta": 0.0,
"avg_recall_at_k_delta": 0.0,
"avg_latency_ms_delta": 2993.165099978447
},
{
"mode_a": "realtime",
"mode_b": "static_global_graph",
"hit_at_k_delta": 0.0,
"mrr_at_k_delta": 0.0,
"avg_recall_at_k_delta": 0.0,
"avg_latency_ms_delta": 2197.462099969387
}
],
"config": {
"embedding_backend": "fastembed",
"embedding_model": "code",
"embedding_use_gpu": false,
"reranker_backend": "onnx",
"reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
"reranker_use_gpu": false,
"enable_staged_rerank": true,
"enable_cross_encoder_rerank": true
},
"evaluations": [
{
"query": "executeHybridMode dense_rerank semantic smart_search",
"intent": "ccw-semantic-routing",
"notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
"relevant_paths": [
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"runs": {
"auto": {
"strategy_key": "auto",
"strategy": "auto",
"stage2_mode": null,
"effective_method": "fts",
"execution_method": "fts",
"latency_ms": 1377.3565999865532,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"first_hit_rank": 1,
"hit_at_k": true,
"recall_at_k": 1.0,
"generated_artifact_count": 0,
"test_file_count": 0,
"error": null
},
"fts": {
"strategy_key": "fts",
"strategy": "fts",
"stage2_mode": null,
"effective_method": "fts",
"execution_method": "fts",
"latency_ms": 1460.0819000601768,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
],
"first_hit_rank": 1,
"hit_at_k": true,
"recall_at_k": 1.0,
"generated_artifact_count": 0,
"test_file_count": 0,
"error": null
},
"hybrid": {
"strategy_key": "hybrid",
"strategy": "hybrid",
"stage2_mode": null,
"effective_method": "hybrid",
"execution_method": "hybrid",
"latency_ms": 45991.74140000343,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\config\\litellm-api-config-manager.ts",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py",
"d:\\claude_dms3\\ccw\\src\\commands\\core-memory.ts",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\scripts\\generate_embeddings.py",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\notification-routes.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\team-msg.ts",
"d:\\claude_dms3\\ccw\\src\\types\\remote-notification.ts",
"d:\\claude_dms3\\ccw\\src\\core\\memory-store.ts",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"generated_artifact_count": 0,
"test_file_count": 0,
"error": null
},
"dense_rerank": {
"strategy_key": "dense_rerank",
"strategy": "dense_rerank",
"stage2_mode": null,
"effective_method": "dense_rerank",
"execution_method": "cascade",
"latency_ms": 22739.62610000372,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
"d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
"d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
"d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
"d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"generated_artifact_count": 1,
"test_file_count": 2,
"error": null
},
"staged:precomputed": {
"strategy_key": "staged:precomputed",
"strategy": "staged",
"stage2_mode": "precomputed",
"effective_method": "staged",
"execution_method": "cascade",
"latency_ms": 14900.017599999905,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"generated_artifact_count": 1,
"test_file_count": 0,
"error": null
},
"staged:realtime": {
"strategy_key": "staged:realtime",
"strategy": "staged",
"stage2_mode": "realtime",
"effective_method": "staged",
"execution_method": "cascade",
"latency_ms": 14104.314599990845,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"generated_artifact_count": 1,
"test_file_count": 0,
"error": null
},
"staged:static_global_graph": {
"strategy_key": "staged:static_global_graph",
"strategy": "staged",
"stage2_mode": "static_global_graph",
"effective_method": "staged",
"execution_method": "cascade",
"latency_ms": 11906.852500021458,
"topk_paths": [
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
],
"first_hit_rank": null,
"hit_at_k": false,
"recall_at_k": 0.0,
"generated_artifact_count": 1,
"test_file_count": 0,
"error": null
}
}
}
]
}