mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-21 19:08:17 +08:00
feat: enhance search, ranking, reranker and CLI tooling across ccw and codex-lens
Major improvements to smart-search, chain-search cascade, ranking pipeline, reranker factory, CLI history store, codex-lens integration, and uv-manager. Simplify command-generator skill by inlining phases. Add comprehensive tests. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
{"query":"executeHybridMode dense_rerank semantic smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-semantic-routing","notes":"CCW semantic mode delegates to CodexLens dense_rerank."}
|
||||
{"query":"parse CodexLens JSON output strip ANSI smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-json-fallback","notes":"Covers JSON/plain-text fallback handling for CodexLens output."}
|
||||
{"query":"smart_search init embed search action schema","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-action-schema","notes":"Find the Zod schema that defines init/embed/search actions."}
|
||||
{"query":"auto init missing job dedupe smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-auto-init","notes":"Targets background init/embed warmup and dedupe state."}
|
||||
{"query":"smart_search exact mode fallback to CodexLens fts","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-exact-fallback","notes":"Tracks the exact-mode fallback path into CodexLens FTS."}
|
||||
{"query":"smart_search settings snapshot embedding backend reranker backend staged stage2 mode","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-config-snapshot","notes":"Reads local config snapshot for embedding/reranker/staged pipeline settings."}
|
||||
{"query":"embedding backend fastembed local litellm api config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-embedding-config","notes":"Local-only benchmark should resolve to fastembed defaults."}
|
||||
{"query":"reranker backend onnx api legacy configuration","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-reranker-config","notes":"Covers both config dataclass fields and env overrides."}
|
||||
{"query":"staged stage2 mode precomputed realtime static_global_graph","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-stage2-config","notes":"Benchmark matrix should exercise the three supported stage2 modes."}
|
||||
{"query":"enable staged rerank stage 4 config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-stage4-rerank","notes":"Stage 4 rerank flag needs to stay enabled for local benchmarks."}
|
||||
{"query":"cascade_search dense_rerank staged pipeline ChainSearchEngine","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-cascade","notes":"Baseline query for the central retrieval engine."}
|
||||
{"query":"realtime LSP expand stage2 search pipeline","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-realtime","notes":"Targets realtime stage2 expansion logic."}
|
||||
{"query":"static global graph stage2 expansion implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-static","notes":"Targets static_global_graph stage2 expansion logic."}
|
||||
{"query":"cross encoder rerank stage 4 implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-rerank","notes":"Relevant for dense_rerank and staged rerank latency comparisons."}
|
||||
{"query":"get_reranker factory onnx backend selection","relevant_paths":["codex-lens/src/codexlens/semantic/reranker/factory.py"],"intent":"reranker-factory","notes":"Keeps the benchmark aligned with local ONNX reranker selection."}
|
||||
{"query":"EMBEDDING_BACKEND and RERANKER_BACKEND environment variables","relevant_paths":["codex-lens/src/codexlens/env_config.py"],"intent":"env-overrides","notes":"Covers CCW/CodexLens local-only environment overrides."}
|
||||
@@ -239,6 +239,7 @@ def main() -> None:
|
||||
config.staged_clustering_strategy = str(args.staged_cluster_strategy or "path").strip().lower()
|
||||
# Stability: on some Windows setups, DirectML/ONNX can crash under load.
|
||||
config.embedding_use_gpu = False
|
||||
config.reranker_use_gpu = False
|
||||
|
||||
registry = RegistryStore()
|
||||
registry.initialize()
|
||||
@@ -362,4 +363,3 @@ def main() -> None:
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
980
codex-lens/benchmarks/compare_ccw_smart_search_stage2.py
Normal file
980
codex-lens/benchmarks/compare_ccw_smart_search_stage2.py
Normal file
@@ -0,0 +1,980 @@
|
||||
#!/usr/bin/env python
|
||||
"""Benchmark local-only staged stage2 modes for CCW smart_search queries.
|
||||
|
||||
This benchmark reuses the existing CodexLens benchmark style, but focuses on
|
||||
the real search intents that drive CCW `smart_search`. It evaluates:
|
||||
|
||||
1. `dense_rerank` baseline
|
||||
2. `staged` + `precomputed`
|
||||
3. `staged` + `realtime`
|
||||
4. `staged` + `static_global_graph`
|
||||
|
||||
Metrics:
|
||||
- Hit@K
|
||||
- MRR@K
|
||||
- Recall@K
|
||||
- latency (avg/p50/p95)
|
||||
|
||||
The runner is intentionally local-only. By default it uses:
|
||||
- embedding backend: `fastembed`
|
||||
- reranker backend: `onnx`
|
||||
|
||||
Examples:
|
||||
python benchmarks/compare_ccw_smart_search_stage2.py --dry-run
|
||||
python benchmarks/compare_ccw_smart_search_stage2.py --self-check
|
||||
python benchmarks/compare_ccw_smart_search_stage2.py --source .. --k 10
|
||||
python benchmarks/compare_ccw_smart_search_stage2.py --embedding-model code --reranker-model cross-encoder/ms-marco-MiniLM-L-6-v2
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from copy import deepcopy
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import statistics
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
|
||||
from codexlens.search.ranking import (
|
||||
QueryIntent,
|
||||
detect_query_intent,
|
||||
is_generated_artifact_path,
|
||||
is_test_file,
|
||||
query_prefers_lexical_search,
|
||||
query_targets_generated_files,
|
||||
)
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
|
||||
|
||||
DEFAULT_SOURCE = Path(__file__).resolve().parents[2]
|
||||
DEFAULT_QUERIES_FILE = Path(__file__).parent / "accuracy_queries_ccw_smart_search.jsonl"
|
||||
DEFAULT_OUTPUT = Path(__file__).parent / "results" / "ccw_smart_search_stage2.json"
|
||||
|
||||
VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph")
|
||||
VALID_LOCAL_EMBEDDING_BACKENDS = ("fastembed",)
|
||||
VALID_LOCAL_RERANKER_BACKENDS = ("onnx", "fastembed", "legacy")
|
||||
VALID_BASELINE_METHODS = ("auto", "fts", "hybrid")
|
||||
DEFAULT_LOCAL_ONNX_RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2"
|
||||
|
||||
|
||||
def _now_ms() -> float:
|
||||
return time.perf_counter() * 1000.0
|
||||
|
||||
|
||||
def _normalize_path_key(path: str) -> str:
|
||||
try:
|
||||
candidate = Path(path)
|
||||
if str(candidate) and (candidate.is_absolute() or re.match(r"^[A-Za-z]:", str(candidate))):
|
||||
normalized = str(candidate.resolve())
|
||||
else:
|
||||
normalized = str(candidate)
|
||||
except Exception:
|
||||
normalized = path
|
||||
normalized = normalized.replace("/", "\\")
|
||||
if os.name == "nt":
|
||||
normalized = normalized.lower()
|
||||
return normalized
|
||||
|
||||
|
||||
def _dedup_topk(paths: Iterable[str], k: int) -> List[str]:
|
||||
output: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for path in paths:
|
||||
if path in seen:
|
||||
continue
|
||||
seen.add(path)
|
||||
output.append(path)
|
||||
if len(output) >= k:
|
||||
break
|
||||
return output
|
||||
|
||||
|
||||
def _first_hit_rank(topk_paths: Sequence[str], relevant: set[str]) -> Optional[int]:
|
||||
for index, path in enumerate(topk_paths, start=1):
|
||||
if path in relevant:
|
||||
return index
|
||||
return None
|
||||
|
||||
|
||||
def _mrr(ranks: Sequence[Optional[int]]) -> float:
|
||||
values = [1.0 / rank for rank in ranks if rank and rank > 0]
|
||||
return statistics.mean(values) if values else 0.0
|
||||
|
||||
|
||||
def _mean(values: Sequence[float]) -> float:
|
||||
return statistics.mean(values) if values else 0.0
|
||||
|
||||
|
||||
def _percentile(values: Sequence[float], percentile: float) -> float:
|
||||
if not values:
|
||||
return 0.0
|
||||
ordered = sorted(values)
|
||||
if len(ordered) == 1:
|
||||
return ordered[0]
|
||||
index = (len(ordered) - 1) * percentile
|
||||
lower = int(index)
|
||||
upper = min(lower + 1, len(ordered) - 1)
|
||||
if lower == upper:
|
||||
return ordered[lower]
|
||||
fraction = index - lower
|
||||
return ordered[lower] + (ordered[upper] - ordered[lower]) * fraction
|
||||
|
||||
|
||||
def _load_labeled_queries(path: Path, limit: Optional[int]) -> List[Dict[str, Any]]:
|
||||
if not path.is_file():
|
||||
raise SystemExit(f"Queries file does not exist: {path}")
|
||||
|
||||
output: List[Dict[str, Any]] = []
|
||||
for raw_line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
try:
|
||||
item = json.loads(line)
|
||||
except Exception as exc:
|
||||
raise SystemExit(f"Invalid JSONL line in {path}: {raw_line!r} ({exc})") from exc
|
||||
if not isinstance(item, dict) or "query" not in item or "relevant_paths" not in item:
|
||||
raise SystemExit(f"Invalid query item (expected object with query/relevant_paths): {item!r}")
|
||||
relevant_paths = item.get("relevant_paths")
|
||||
if not isinstance(relevant_paths, list) or not relevant_paths:
|
||||
raise SystemExit(f"Query item must include non-empty relevant_paths[]: {item!r}")
|
||||
output.append(item)
|
||||
if limit is not None and len(output) >= limit:
|
||||
break
|
||||
return output
|
||||
|
||||
|
||||
def _resolve_expected_paths(source_root: Path, paths: Sequence[str]) -> Tuple[List[str], set[str], List[str]]:
|
||||
resolved_display: List[str] = []
|
||||
resolved_keys: set[str] = set()
|
||||
missing: List[str] = []
|
||||
|
||||
for raw_path in paths:
|
||||
candidate = Path(raw_path)
|
||||
if not candidate.is_absolute():
|
||||
candidate = (source_root / candidate).resolve()
|
||||
if not candidate.exists():
|
||||
missing.append(str(candidate))
|
||||
resolved_display.append(str(candidate))
|
||||
resolved_keys.add(_normalize_path_key(str(candidate)))
|
||||
return resolved_display, resolved_keys, missing
|
||||
|
||||
|
||||
def _validate_local_only_backends(embedding_backend: str, reranker_backend: str) -> None:
|
||||
if embedding_backend not in VALID_LOCAL_EMBEDDING_BACKENDS:
|
||||
raise SystemExit(
|
||||
"This runner is local-only. "
|
||||
f"--embedding-backend must be one of {', '.join(VALID_LOCAL_EMBEDDING_BACKENDS)}; got {embedding_backend!r}"
|
||||
)
|
||||
if reranker_backend not in VALID_LOCAL_RERANKER_BACKENDS:
|
||||
raise SystemExit(
|
||||
"This runner is local-only. "
|
||||
f"--reranker-backend must be one of {', '.join(VALID_LOCAL_RERANKER_BACKENDS)}; got {reranker_backend!r}"
|
||||
)
|
||||
|
||||
|
||||
def _validate_stage2_modes(stage2_modes: Sequence[str]) -> List[str]:
|
||||
normalized = [str(mode).strip().lower() for mode in stage2_modes if str(mode).strip()]
|
||||
if not normalized:
|
||||
raise SystemExit("At least one --stage2-modes entry is required")
|
||||
invalid = [mode for mode in normalized if mode not in VALID_STAGE2_MODES]
|
||||
if invalid:
|
||||
raise SystemExit(
|
||||
f"Invalid --stage2-modes entry: {invalid[0]} "
|
||||
f"(valid: {', '.join(VALID_STAGE2_MODES)})"
|
||||
)
|
||||
deduped: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for mode in normalized:
|
||||
if mode in seen:
|
||||
continue
|
||||
seen.add(mode)
|
||||
deduped.append(mode)
|
||||
return deduped
|
||||
|
||||
|
||||
def _validate_baseline_methods(methods: Sequence[str]) -> List[str]:
|
||||
normalized = [str(method).strip().lower() for method in methods if str(method).strip()]
|
||||
invalid = [method for method in normalized if method not in VALID_BASELINE_METHODS]
|
||||
if invalid:
|
||||
raise SystemExit(
|
||||
f"Invalid --baseline-methods entry: {invalid[0]} "
|
||||
f"(valid: {', '.join(VALID_BASELINE_METHODS)})"
|
||||
)
|
||||
deduped: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for method in normalized:
|
||||
if method in seen:
|
||||
continue
|
||||
seen.add(method)
|
||||
deduped.append(method)
|
||||
return deduped
|
||||
|
||||
|
||||
@dataclass
|
||||
class StrategyRun:
|
||||
strategy_key: str
|
||||
strategy: str
|
||||
stage2_mode: Optional[str]
|
||||
effective_method: str
|
||||
execution_method: str
|
||||
latency_ms: float
|
||||
topk_paths: List[str]
|
||||
first_hit_rank: Optional[int]
|
||||
hit_at_k: bool
|
||||
recall_at_k: float
|
||||
generated_artifact_count: int
|
||||
test_file_count: int
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class QueryEvaluation:
|
||||
query: str
|
||||
intent: Optional[str]
|
||||
notes: Optional[str]
|
||||
relevant_paths: List[str]
|
||||
runs: Dict[str, StrategyRun]
|
||||
|
||||
|
||||
@dataclass
|
||||
class PairwiseDelta:
|
||||
mode_a: str
|
||||
mode_b: str
|
||||
hit_at_k_delta: float
|
||||
mrr_at_k_delta: float
|
||||
avg_recall_at_k_delta: float
|
||||
avg_latency_ms_delta: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class StrategySpec:
|
||||
strategy_key: str
|
||||
strategy: str
|
||||
stage2_mode: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class StrategyRuntime:
|
||||
strategy_spec: StrategySpec
|
||||
config: Config
|
||||
registry: RegistryStore
|
||||
engine: ChainSearchEngine
|
||||
|
||||
|
||||
def _strategy_specs(
|
||||
stage2_modes: Sequence[str],
|
||||
include_dense_baseline: bool,
|
||||
*,
|
||||
baseline_methods: Sequence[str],
|
||||
) -> List[StrategySpec]:
|
||||
specs: List[StrategySpec] = []
|
||||
for method in baseline_methods:
|
||||
specs.append(StrategySpec(strategy_key=method, strategy=method, stage2_mode=None))
|
||||
if include_dense_baseline:
|
||||
specs.append(StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None))
|
||||
for stage2_mode in stage2_modes:
|
||||
specs.append(
|
||||
StrategySpec(
|
||||
strategy_key=f"staged:{stage2_mode}",
|
||||
strategy="staged",
|
||||
stage2_mode=stage2_mode,
|
||||
)
|
||||
)
|
||||
return specs
|
||||
|
||||
|
||||
def _build_strategy_runtime(base_config: Config, strategy_spec: StrategySpec) -> StrategyRuntime:
|
||||
runtime_config = deepcopy(base_config)
|
||||
registry = RegistryStore()
|
||||
registry.initialize()
|
||||
mapper = PathMapper()
|
||||
engine = ChainSearchEngine(registry=registry, mapper=mapper, config=runtime_config)
|
||||
return StrategyRuntime(
|
||||
strategy_spec=strategy_spec,
|
||||
config=runtime_config,
|
||||
registry=registry,
|
||||
engine=engine,
|
||||
)
|
||||
|
||||
|
||||
def _select_effective_method(query: str, requested_method: str) -> str:
|
||||
requested = str(requested_method).strip().lower()
|
||||
if requested != "auto":
|
||||
return requested
|
||||
if query_targets_generated_files(query) or query_prefers_lexical_search(query):
|
||||
return "fts"
|
||||
intent = detect_query_intent(query)
|
||||
if intent == QueryIntent.KEYWORD:
|
||||
return "fts"
|
||||
if intent == QueryIntent.SEMANTIC:
|
||||
return "dense_rerank"
|
||||
return "hybrid"
|
||||
|
||||
|
||||
def _filter_dataset_by_query_match(
|
||||
dataset: Sequence[Dict[str, Any]],
|
||||
query_match: Optional[str],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Filter labeled queries by case-insensitive substring match."""
|
||||
needle = str(query_match or "").strip().casefold()
|
||||
if not needle:
|
||||
return list(dataset)
|
||||
return [
|
||||
dict(item)
|
||||
for item in dataset
|
||||
if needle in str(item.get("query", "")).casefold()
|
||||
]
|
||||
|
||||
|
||||
def _apply_query_limit(
|
||||
dataset: Sequence[Dict[str, Any]],
|
||||
query_limit: Optional[int],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Apply the optional query limit after any dataset-level filtering."""
|
||||
if query_limit is None:
|
||||
return list(dataset)
|
||||
return [dict(item) for item in list(dataset)[: max(0, int(query_limit))]]
|
||||
|
||||
|
||||
def _write_json_payload(path: Path, payload: Dict[str, Any]) -> None:
|
||||
"""Persist a benchmark payload as UTF-8 JSON."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
def _write_final_outputs(
|
||||
*,
|
||||
output_path: Path,
|
||||
progress_output: Optional[Path],
|
||||
payload: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Persist the final completed payload to both result and progress outputs."""
|
||||
_write_json_payload(output_path, payload)
|
||||
if progress_output is not None:
|
||||
_write_json_payload(progress_output, payload)
|
||||
|
||||
|
||||
def _make_progress_payload(
|
||||
*,
|
||||
args: argparse.Namespace,
|
||||
source_root: Path,
|
||||
strategy_specs: Sequence[StrategySpec],
|
||||
evaluations: Sequence[QueryEvaluation],
|
||||
query_index: int,
|
||||
total_queries: int,
|
||||
run_index: int,
|
||||
total_runs: int,
|
||||
current_query: str,
|
||||
current_strategy_key: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a partial progress snapshot for long benchmark runs."""
|
||||
return {
|
||||
"status": "running",
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"source": str(source_root),
|
||||
"queries_file": str(args.queries_file),
|
||||
"query_count": len(evaluations),
|
||||
"planned_query_count": total_queries,
|
||||
"k": int(args.k),
|
||||
"coarse_k": int(args.coarse_k),
|
||||
"strategy_keys": [spec.strategy_key for spec in strategy_specs],
|
||||
"progress": {
|
||||
"completed_queries": query_index,
|
||||
"total_queries": total_queries,
|
||||
"completed_runs": run_index,
|
||||
"total_runs": total_runs,
|
||||
"current_query": current_query,
|
||||
"current_strategy_key": current_strategy_key,
|
||||
},
|
||||
"evaluations": [
|
||||
{
|
||||
"query": evaluation.query,
|
||||
"intent": evaluation.intent,
|
||||
"notes": evaluation.notes,
|
||||
"relevant_paths": evaluation.relevant_paths,
|
||||
"runs": {key: asdict(run) for key, run in evaluation.runs.items()},
|
||||
}
|
||||
for evaluation in evaluations
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _make_search_options(method: str, *, k: int) -> SearchOptions:
|
||||
normalized = str(method).strip().lower()
|
||||
if normalized == "fts":
|
||||
return SearchOptions(
|
||||
total_limit=k,
|
||||
hybrid_mode=False,
|
||||
enable_fuzzy=False,
|
||||
enable_vector=False,
|
||||
pure_vector=False,
|
||||
enable_cascade=False,
|
||||
)
|
||||
if normalized == "hybrid":
|
||||
return SearchOptions(
|
||||
total_limit=k,
|
||||
hybrid_mode=True,
|
||||
enable_fuzzy=False,
|
||||
enable_vector=True,
|
||||
pure_vector=False,
|
||||
enable_cascade=False,
|
||||
)
|
||||
if normalized in {"dense_rerank", "staged"}:
|
||||
return SearchOptions(
|
||||
total_limit=k,
|
||||
hybrid_mode=True,
|
||||
enable_fuzzy=False,
|
||||
enable_vector=True,
|
||||
pure_vector=False,
|
||||
enable_cascade=True,
|
||||
)
|
||||
raise ValueError(f"Unsupported benchmark method: {method}")
|
||||
|
||||
|
||||
def _run_strategy(
|
||||
engine: ChainSearchEngine,
|
||||
config: Config,
|
||||
*,
|
||||
strategy_spec: StrategySpec,
|
||||
query: str,
|
||||
source_path: Path,
|
||||
k: int,
|
||||
coarse_k: int,
|
||||
relevant: set[str],
|
||||
) -> StrategyRun:
|
||||
gc.collect()
|
||||
effective_method = _select_effective_method(query, strategy_spec.strategy)
|
||||
execution_method = "cascade" if effective_method in {"dense_rerank", "staged"} else effective_method
|
||||
previous_cascade_strategy = getattr(config, "cascade_strategy", None)
|
||||
previous_stage2_mode = getattr(config, "staged_stage2_mode", None)
|
||||
|
||||
start_ms = _now_ms()
|
||||
try:
|
||||
options = _make_search_options(
|
||||
"staged" if strategy_spec.strategy == "staged" else effective_method,
|
||||
k=k,
|
||||
)
|
||||
if strategy_spec.strategy == "staged":
|
||||
config.cascade_strategy = "staged"
|
||||
if strategy_spec.stage2_mode:
|
||||
config.staged_stage2_mode = strategy_spec.stage2_mode
|
||||
result = engine.cascade_search(
|
||||
query=query,
|
||||
source_path=source_path,
|
||||
k=k,
|
||||
coarse_k=coarse_k,
|
||||
options=options,
|
||||
strategy="staged",
|
||||
)
|
||||
elif effective_method == "dense_rerank":
|
||||
config.cascade_strategy = "dense_rerank"
|
||||
result = engine.cascade_search(
|
||||
query=query,
|
||||
source_path=source_path,
|
||||
k=k,
|
||||
coarse_k=coarse_k,
|
||||
options=options,
|
||||
strategy="dense_rerank",
|
||||
)
|
||||
else:
|
||||
result = engine.search(
|
||||
query=query,
|
||||
source_path=source_path,
|
||||
options=options,
|
||||
)
|
||||
latency_ms = _now_ms() - start_ms
|
||||
paths_raw = [item.path for item in (result.results or []) if getattr(item, "path", None)]
|
||||
topk = _dedup_topk((_normalize_path_key(path) for path in paths_raw), k=k)
|
||||
rank = _first_hit_rank(topk, relevant)
|
||||
recall = 0.0
|
||||
if relevant:
|
||||
recall = len(set(topk) & relevant) / float(len(relevant))
|
||||
return StrategyRun(
|
||||
strategy_key=strategy_spec.strategy_key,
|
||||
strategy=strategy_spec.strategy,
|
||||
stage2_mode=strategy_spec.stage2_mode,
|
||||
effective_method=effective_method,
|
||||
execution_method=execution_method,
|
||||
latency_ms=latency_ms,
|
||||
topk_paths=topk,
|
||||
first_hit_rank=rank,
|
||||
hit_at_k=rank is not None,
|
||||
recall_at_k=recall,
|
||||
generated_artifact_count=sum(1 for path in topk if is_generated_artifact_path(path)),
|
||||
test_file_count=sum(1 for path in topk if is_test_file(path)),
|
||||
error=None,
|
||||
)
|
||||
except Exception as exc:
|
||||
latency_ms = _now_ms() - start_ms
|
||||
return StrategyRun(
|
||||
strategy_key=strategy_spec.strategy_key,
|
||||
strategy=strategy_spec.strategy,
|
||||
stage2_mode=strategy_spec.stage2_mode,
|
||||
effective_method=effective_method,
|
||||
execution_method=execution_method,
|
||||
latency_ms=latency_ms,
|
||||
topk_paths=[],
|
||||
first_hit_rank=None,
|
||||
hit_at_k=False,
|
||||
recall_at_k=0.0,
|
||||
generated_artifact_count=0,
|
||||
test_file_count=0,
|
||||
error=f"{type(exc).__name__}: {exc}",
|
||||
)
|
||||
finally:
|
||||
config.cascade_strategy = previous_cascade_strategy
|
||||
config.staged_stage2_mode = previous_stage2_mode
|
||||
|
||||
|
||||
def _summarize_runs(runs: Sequence[StrategyRun]) -> Dict[str, Any]:
|
||||
latencies = [run.latency_ms for run in runs if not run.error]
|
||||
ranks = [run.first_hit_rank for run in runs]
|
||||
effective_method_counts: Dict[str, int] = {}
|
||||
for run in runs:
|
||||
effective_method_counts[run.effective_method] = effective_method_counts.get(run.effective_method, 0) + 1
|
||||
return {
|
||||
"query_count": len(runs),
|
||||
"hit_at_k": _mean([1.0 if run.hit_at_k else 0.0 for run in runs]),
|
||||
"mrr_at_k": _mrr(ranks),
|
||||
"avg_recall_at_k": _mean([run.recall_at_k for run in runs]),
|
||||
"avg_latency_ms": _mean(latencies),
|
||||
"p50_latency_ms": _percentile(latencies, 0.50),
|
||||
"p95_latency_ms": _percentile(latencies, 0.95),
|
||||
"avg_generated_artifact_count": _mean([float(run.generated_artifact_count) for run in runs]),
|
||||
"avg_test_file_count": _mean([float(run.test_file_count) for run in runs]),
|
||||
"runs_with_generated_artifacts": sum(1 for run in runs if run.generated_artifact_count > 0),
|
||||
"runs_with_test_files": sum(1 for run in runs if run.test_file_count > 0),
|
||||
"effective_methods": effective_method_counts,
|
||||
"errors": sum(1 for run in runs if run.error),
|
||||
}
|
||||
|
||||
|
||||
def _build_pairwise_deltas(stage2_summaries: Dict[str, Dict[str, Any]]) -> List[PairwiseDelta]:
|
||||
modes = list(stage2_summaries.keys())
|
||||
deltas: List[PairwiseDelta] = []
|
||||
for left_index in range(len(modes)):
|
||||
for right_index in range(left_index + 1, len(modes)):
|
||||
left = modes[left_index]
|
||||
right = modes[right_index]
|
||||
left_summary = stage2_summaries[left]
|
||||
right_summary = stage2_summaries[right]
|
||||
deltas.append(
|
||||
PairwiseDelta(
|
||||
mode_a=left,
|
||||
mode_b=right,
|
||||
hit_at_k_delta=left_summary["hit_at_k"] - right_summary["hit_at_k"],
|
||||
mrr_at_k_delta=left_summary["mrr_at_k"] - right_summary["mrr_at_k"],
|
||||
avg_recall_at_k_delta=left_summary["avg_recall_at_k"] - right_summary["avg_recall_at_k"],
|
||||
avg_latency_ms_delta=left_summary["avg_latency_ms"] - right_summary["avg_latency_ms"],
|
||||
)
|
||||
)
|
||||
return deltas
|
||||
|
||||
|
||||
def _make_plan_payload(
|
||||
*,
|
||||
args: argparse.Namespace,
|
||||
source_root: Path,
|
||||
dataset: Sequence[Dict[str, Any]],
|
||||
baseline_methods: Sequence[str],
|
||||
stage2_modes: Sequence[str],
|
||||
strategy_specs: Sequence[StrategySpec],
|
||||
) -> Dict[str, Any]:
|
||||
return {
|
||||
"mode": "dry-run" if args.dry_run else "self-check",
|
||||
"local_only": True,
|
||||
"source": str(source_root),
|
||||
"queries_file": str(args.queries_file),
|
||||
"query_count": len(dataset),
|
||||
"query_match": args.query_match,
|
||||
"k": int(args.k),
|
||||
"coarse_k": int(args.coarse_k),
|
||||
"baseline_methods": list(baseline_methods),
|
||||
"stage2_modes": list(stage2_modes),
|
||||
"strategy_keys": [spec.strategy_key for spec in strategy_specs],
|
||||
"local_backends": {
|
||||
"embedding_backend": args.embedding_backend,
|
||||
"embedding_model": args.embedding_model,
|
||||
"reranker_backend": args.reranker_backend,
|
||||
"reranker_model": args.reranker_model,
|
||||
"embedding_use_gpu": bool(args.embedding_use_gpu),
|
||||
"reranker_use_gpu": bool(args.reranker_use_gpu),
|
||||
},
|
||||
"output": str(args.output),
|
||||
"progress_output": str(args.progress_output) if args.progress_output else None,
|
||||
"dataset_preview": [
|
||||
{
|
||||
"query": item.get("query"),
|
||||
"intent": item.get("intent"),
|
||||
"relevant_paths": item.get("relevant_paths"),
|
||||
}
|
||||
for item in list(dataset)[: min(3, len(dataset))]
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--source",
|
||||
type=Path,
|
||||
default=DEFAULT_SOURCE,
|
||||
help="Source root to benchmark. Defaults to the repository root so CCW and CodexLens paths resolve together.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--queries-file",
|
||||
type=Path,
|
||||
default=DEFAULT_QUERIES_FILE,
|
||||
help="Labeled JSONL dataset of CCW smart_search queries",
|
||||
)
|
||||
parser.add_argument("--query-limit", type=int, default=None, help="Optional query limit")
|
||||
parser.add_argument(
|
||||
"--query-match",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Optional case-insensitive substring filter for selecting specific benchmark queries.",
|
||||
)
|
||||
parser.add_argument("--k", type=int, default=10, help="Top-k to evaluate")
|
||||
parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k")
|
||||
parser.add_argument(
|
||||
"--baseline-methods",
|
||||
nargs="*",
|
||||
default=list(VALID_BASELINE_METHODS),
|
||||
help="Requested smart_search baselines to compare before staged modes (valid: auto, fts, hybrid).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--stage2-modes",
|
||||
nargs="*",
|
||||
default=list(VALID_STAGE2_MODES),
|
||||
help="Stage-2 modes to compare",
|
||||
)
|
||||
parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per strategy")
|
||||
parser.add_argument(
|
||||
"--embedding-backend",
|
||||
default="fastembed",
|
||||
help="Local embedding backend. This runner only accepts fastembed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding-model",
|
||||
default="code",
|
||||
help="Embedding model/profile for the local embedding backend",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--embedding-use-gpu",
|
||||
action="store_true",
|
||||
help="Enable GPU acceleration for local embeddings. Off by default for stability.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reranker-backend",
|
||||
default="onnx",
|
||||
help="Local reranker backend. Supported local values: onnx, fastembed, legacy.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reranker-model",
|
||||
default=DEFAULT_LOCAL_ONNX_RERANKER_MODEL,
|
||||
help="Reranker model name for the local reranker backend",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reranker-use-gpu",
|
||||
action="store_true",
|
||||
help="Enable GPU acceleration for the local reranker. Off by default for stability.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-dense-baseline",
|
||||
action="store_true",
|
||||
help="Only compare staged stage2 modes and skip the dense_rerank baseline.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Validate dataset/config and print the benchmark plan without running retrieval.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--self-check",
|
||||
action="store_true",
|
||||
help="Smoke-check the entrypoint by validating dataset, source paths, and stage matrix wiring.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
default=DEFAULT_OUTPUT,
|
||||
help="Output JSON path",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--progress-output",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Optional JSON path updated after each query with partial progress and completed runs.",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
source_root = args.source.expanduser().resolve()
|
||||
if not source_root.exists():
|
||||
raise SystemExit(f"Source path does not exist: {source_root}")
|
||||
if int(args.k) <= 0:
|
||||
raise SystemExit("--k must be > 0")
|
||||
if int(args.coarse_k) <= 0:
|
||||
raise SystemExit("--coarse-k must be > 0")
|
||||
if int(args.coarse_k) < int(args.k):
|
||||
raise SystemExit("--coarse-k must be >= --k")
|
||||
if int(args.warmup) < 0:
|
||||
raise SystemExit("--warmup must be >= 0")
|
||||
|
||||
embedding_backend = str(args.embedding_backend).strip().lower()
|
||||
reranker_backend = str(args.reranker_backend).strip().lower()
|
||||
_validate_local_only_backends(embedding_backend, reranker_backend)
|
||||
baseline_methods = _validate_baseline_methods(args.baseline_methods)
|
||||
stage2_modes = _validate_stage2_modes(args.stage2_modes)
|
||||
|
||||
dataset = _load_labeled_queries(args.queries_file, None)
|
||||
dataset = _filter_dataset_by_query_match(dataset, args.query_match)
|
||||
dataset = _apply_query_limit(dataset, args.query_limit)
|
||||
if not dataset:
|
||||
raise SystemExit("No queries to run")
|
||||
|
||||
missing_paths: List[str] = []
|
||||
for item in dataset:
|
||||
_, _, item_missing = _resolve_expected_paths(source_root, [str(path) for path in item["relevant_paths"]])
|
||||
missing_paths.extend(item_missing)
|
||||
if missing_paths:
|
||||
preview = ", ".join(missing_paths[:3])
|
||||
raise SystemExit(
|
||||
"Dataset relevant_paths do not resolve under the selected source root. "
|
||||
f"Examples: {preview}"
|
||||
)
|
||||
|
||||
strategy_specs = _strategy_specs(
|
||||
stage2_modes,
|
||||
include_dense_baseline=not args.skip_dense_baseline,
|
||||
baseline_methods=baseline_methods,
|
||||
)
|
||||
|
||||
if args.dry_run or args.self_check:
|
||||
payload = _make_plan_payload(
|
||||
args=args,
|
||||
source_root=source_root,
|
||||
dataset=dataset,
|
||||
baseline_methods=baseline_methods,
|
||||
stage2_modes=stage2_modes,
|
||||
strategy_specs=strategy_specs,
|
||||
)
|
||||
if args.self_check:
|
||||
payload["status"] = "ok"
|
||||
payload["checks"] = {
|
||||
"dataset_loaded": True,
|
||||
"stage2_matrix_size": len(stage2_modes),
|
||||
"local_only_validation": True,
|
||||
"source_path_exists": True,
|
||||
}
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
return
|
||||
|
||||
config = Config.load()
|
||||
config.cascade_strategy = "staged"
|
||||
config.enable_staged_rerank = True
|
||||
config.enable_cross_encoder_rerank = True
|
||||
config.embedding_backend = embedding_backend
|
||||
config.embedding_model = str(args.embedding_model).strip()
|
||||
config.embedding_use_gpu = bool(args.embedding_use_gpu)
|
||||
config.embedding_auto_embed_missing = False
|
||||
config.reranker_backend = reranker_backend
|
||||
config.reranker_model = str(args.reranker_model).strip()
|
||||
config.reranker_use_gpu = bool(args.reranker_use_gpu)
|
||||
|
||||
strategy_runtimes = {
|
||||
spec.strategy_key: _build_strategy_runtime(config, spec)
|
||||
for spec in strategy_specs
|
||||
}
|
||||
|
||||
evaluations: List[QueryEvaluation] = []
|
||||
total_queries = len(dataset)
|
||||
total_runs = total_queries * len(strategy_specs)
|
||||
completed_runs = 0
|
||||
|
||||
try:
|
||||
if int(args.warmup) > 0:
|
||||
warm_query = str(dataset[0]["query"]).strip()
|
||||
warm_relevant_paths = [str(path) for path in dataset[0]["relevant_paths"]]
|
||||
_, warm_relevant, _ = _resolve_expected_paths(source_root, warm_relevant_paths)
|
||||
for spec in strategy_specs:
|
||||
runtime = strategy_runtimes[spec.strategy_key]
|
||||
for _ in range(int(args.warmup)):
|
||||
_run_strategy(
|
||||
runtime.engine,
|
||||
runtime.config,
|
||||
strategy_spec=spec,
|
||||
query=warm_query,
|
||||
source_path=source_root,
|
||||
k=min(int(args.k), 5),
|
||||
coarse_k=min(int(args.coarse_k), 50),
|
||||
relevant=warm_relevant,
|
||||
)
|
||||
|
||||
for index, item in enumerate(dataset, start=1):
|
||||
query = str(item.get("query", "")).strip()
|
||||
if not query:
|
||||
continue
|
||||
print(f"[query {index}/{total_queries}] {query}", flush=True)
|
||||
relevant_paths, relevant, _ = _resolve_expected_paths(
|
||||
source_root,
|
||||
[str(path) for path in item["relevant_paths"]],
|
||||
)
|
||||
runs: Dict[str, StrategyRun] = {}
|
||||
for spec in strategy_specs:
|
||||
if args.progress_output is not None:
|
||||
_write_json_payload(
|
||||
args.progress_output,
|
||||
_make_progress_payload(
|
||||
args=args,
|
||||
source_root=source_root,
|
||||
strategy_specs=strategy_specs,
|
||||
evaluations=evaluations,
|
||||
query_index=index - 1,
|
||||
total_queries=total_queries,
|
||||
run_index=completed_runs,
|
||||
total_runs=total_runs,
|
||||
current_query=query,
|
||||
current_strategy_key=spec.strategy_key,
|
||||
),
|
||||
)
|
||||
print(
|
||||
f"[run {completed_runs + 1}/{total_runs}] "
|
||||
f"strategy={spec.strategy_key} query={query}",
|
||||
flush=True,
|
||||
)
|
||||
runtime = strategy_runtimes[spec.strategy_key]
|
||||
runs[spec.strategy_key] = _run_strategy(
|
||||
runtime.engine,
|
||||
runtime.config,
|
||||
strategy_spec=spec,
|
||||
query=query,
|
||||
source_path=source_root,
|
||||
k=int(args.k),
|
||||
coarse_k=int(args.coarse_k),
|
||||
relevant=relevant,
|
||||
)
|
||||
completed_runs += 1
|
||||
run = runs[spec.strategy_key]
|
||||
outcome = "error" if run.error else "ok"
|
||||
print(
|
||||
f"[done {completed_runs}/{total_runs}] "
|
||||
f"strategy={spec.strategy_key} outcome={outcome} "
|
||||
f"latency_ms={run.latency_ms:.2f} "
|
||||
f"first_hit_rank={run.first_hit_rank}",
|
||||
flush=True,
|
||||
)
|
||||
evaluations.append(
|
||||
QueryEvaluation(
|
||||
query=query,
|
||||
intent=str(item.get("intent")) if item.get("intent") is not None else None,
|
||||
notes=str(item.get("notes")) if item.get("notes") is not None else None,
|
||||
relevant_paths=relevant_paths,
|
||||
runs=runs,
|
||||
)
|
||||
)
|
||||
if args.progress_output is not None:
|
||||
_write_json_payload(
|
||||
args.progress_output,
|
||||
_make_progress_payload(
|
||||
args=args,
|
||||
source_root=source_root,
|
||||
strategy_specs=strategy_specs,
|
||||
evaluations=evaluations,
|
||||
query_index=index,
|
||||
total_queries=total_queries,
|
||||
run_index=completed_runs,
|
||||
total_runs=total_runs,
|
||||
current_query=query,
|
||||
current_strategy_key="complete",
|
||||
),
|
||||
)
|
||||
finally:
|
||||
for runtime in strategy_runtimes.values():
|
||||
try:
|
||||
runtime.engine.close()
|
||||
except Exception:
|
||||
pass
|
||||
for runtime in strategy_runtimes.values():
|
||||
try:
|
||||
runtime.registry.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
strategy_summaries: Dict[str, Dict[str, Any]] = {}
|
||||
for spec in strategy_specs:
|
||||
spec_runs = [evaluation.runs[spec.strategy_key] for evaluation in evaluations if spec.strategy_key in evaluation.runs]
|
||||
summary = _summarize_runs(spec_runs)
|
||||
summary["strategy"] = spec.strategy
|
||||
summary["stage2_mode"] = spec.stage2_mode
|
||||
strategy_summaries[spec.strategy_key] = summary
|
||||
|
||||
stage2_mode_matrix = {
|
||||
mode: strategy_summaries[f"staged:{mode}"]
|
||||
for mode in stage2_modes
|
||||
if f"staged:{mode}" in strategy_summaries
|
||||
}
|
||||
pairwise_deltas = [asdict(item) for item in _build_pairwise_deltas(stage2_mode_matrix)]
|
||||
|
||||
payload = {
|
||||
"status": "completed",
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"source": str(source_root),
|
||||
"queries_file": str(args.queries_file),
|
||||
"query_count": len(evaluations),
|
||||
"query_match": args.query_match,
|
||||
"k": int(args.k),
|
||||
"coarse_k": int(args.coarse_k),
|
||||
"local_only": True,
|
||||
"strategies": strategy_summaries,
|
||||
"stage2_mode_matrix": stage2_mode_matrix,
|
||||
"pairwise_stage2_deltas": pairwise_deltas,
|
||||
"config": {
|
||||
"embedding_backend": config.embedding_backend,
|
||||
"embedding_model": config.embedding_model,
|
||||
"embedding_use_gpu": bool(config.embedding_use_gpu),
|
||||
"reranker_backend": config.reranker_backend,
|
||||
"reranker_model": config.reranker_model,
|
||||
"reranker_use_gpu": bool(config.reranker_use_gpu),
|
||||
"enable_staged_rerank": bool(config.enable_staged_rerank),
|
||||
"enable_cross_encoder_rerank": bool(config.enable_cross_encoder_rerank),
|
||||
},
|
||||
"progress_output": str(args.progress_output) if args.progress_output else None,
|
||||
"evaluations": [
|
||||
{
|
||||
"query": evaluation.query,
|
||||
"intent": evaluation.intent,
|
||||
"notes": evaluation.notes,
|
||||
"relevant_paths": evaluation.relevant_paths,
|
||||
"runs": {key: asdict(run) for key, run in evaluation.runs.items()},
|
||||
}
|
||||
for evaluation in evaluations
|
||||
],
|
||||
}
|
||||
|
||||
_write_final_outputs(
|
||||
output_path=args.output,
|
||||
progress_output=args.progress_output,
|
||||
payload=payload,
|
||||
)
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -280,8 +280,9 @@ def main() -> None:
|
||||
if args.staged_cluster_strategy:
|
||||
config.staged_clustering_strategy = str(args.staged_cluster_strategy)
|
||||
# Stability: on some Windows setups, fastembed + DirectML can crash under load.
|
||||
# Dense_rerank uses the embedding backend that matches the index; force CPU here.
|
||||
# Force local embeddings and reranking onto CPU for reproducible benchmark runs.
|
||||
config.embedding_use_gpu = False
|
||||
config.reranker_use_gpu = False
|
||||
registry = RegistryStore()
|
||||
registry.initialize()
|
||||
mapper = PathMapper()
|
||||
|
||||
1704
codex-lens/benchmarks/results/ccw_smart_search_stage2.json
Normal file
1704
codex-lens/benchmarks/results/ccw_smart_search_stage2.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,526 @@
|
||||
{
|
||||
"timestamp": "2026-03-14 23:16:55",
|
||||
"source": "D:\\Claude_dms3",
|
||||
"queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
|
||||
"query_count": 4,
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"local_only": true,
|
||||
"strategies": {
|
||||
"dense_rerank": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 20171.940174996853,
|
||||
"p50_latency_ms": 14222.247749984264,
|
||||
"p95_latency_ms": 35222.31535999476,
|
||||
"errors": 0,
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 13679.793299987912,
|
||||
"p50_latency_ms": 12918.63379997015,
|
||||
"p95_latency_ms": 16434.964765003322,
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed"
|
||||
},
|
||||
"staged:realtime": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 13885.101849973202,
|
||||
"p50_latency_ms": 13826.323699980974,
|
||||
"p95_latency_ms": 14867.712269958853,
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime"
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 13336.124025002122,
|
||||
"p50_latency_ms": 13415.476950019598,
|
||||
"p95_latency_ms": 13514.329230004549,
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph"
|
||||
}
|
||||
},
|
||||
"stage2_mode_matrix": {
|
||||
"precomputed": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 13679.793299987912,
|
||||
"p50_latency_ms": 12918.63379997015,
|
||||
"p95_latency_ms": 16434.964765003322,
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed"
|
||||
},
|
||||
"realtime": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 13885.101849973202,
|
||||
"p50_latency_ms": 13826.323699980974,
|
||||
"p95_latency_ms": 14867.712269958853,
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime"
|
||||
},
|
||||
"static_global_graph": {
|
||||
"query_count": 4,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 13336.124025002122,
|
||||
"p50_latency_ms": 13415.476950019598,
|
||||
"p95_latency_ms": 13514.329230004549,
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph"
|
||||
}
|
||||
},
|
||||
"pairwise_stage2_deltas": [
|
||||
{
|
||||
"mode_a": "precomputed",
|
||||
"mode_b": "realtime",
|
||||
"hit_at_k_delta": 0.0,
|
||||
"mrr_at_k_delta": 0.0,
|
||||
"avg_recall_at_k_delta": 0.0,
|
||||
"avg_latency_ms_delta": -205.30854998528957
|
||||
},
|
||||
{
|
||||
"mode_a": "precomputed",
|
||||
"mode_b": "static_global_graph",
|
||||
"hit_at_k_delta": 0.0,
|
||||
"mrr_at_k_delta": 0.0,
|
||||
"avg_recall_at_k_delta": 0.0,
|
||||
"avg_latency_ms_delta": 343.66927498579025
|
||||
},
|
||||
{
|
||||
"mode_a": "realtime",
|
||||
"mode_b": "static_global_graph",
|
||||
"hit_at_k_delta": 0.0,
|
||||
"mrr_at_k_delta": 0.0,
|
||||
"avg_recall_at_k_delta": 0.0,
|
||||
"avg_latency_ms_delta": 548.9778249710798
|
||||
}
|
||||
],
|
||||
"config": {
|
||||
"embedding_backend": "fastembed",
|
||||
"embedding_model": "code",
|
||||
"embedding_use_gpu": false,
|
||||
"reranker_backend": "onnx",
|
||||
"reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
"enable_staged_rerank": true,
|
||||
"enable_cross_encoder_rerank": true
|
||||
},
|
||||
"evaluations": [
|
||||
{
|
||||
"query": "executeHybridMode dense_rerank semantic smart_search",
|
||||
"intent": "ccw-semantic-routing",
|
||||
"notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
|
||||
"relevant_paths": [
|
||||
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"runs": {
|
||||
"dense_rerank": {
|
||||
"strategy_key": "dense_rerank",
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null,
|
||||
"latency_ms": 38829.27079999447,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"strategy_key": "staged:precomputed",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed",
|
||||
"latency_ms": 16915.833400011063,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:realtime": {
|
||||
"strategy_key": "staged:realtime",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime",
|
||||
"latency_ms": 13961.2567999959,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"strategy_key": "staged:static_global_graph",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph",
|
||||
"latency_ms": 12986.330999970436,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": "parse CodexLens JSON output strip ANSI smart_search",
|
||||
"intent": "ccw-json-fallback",
|
||||
"notes": "Covers JSON/plain-text fallback handling for CodexLens output.",
|
||||
"relevant_paths": [
|
||||
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"runs": {
|
||||
"dense_rerank": {
|
||||
"strategy_key": "dense_rerank",
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null,
|
||||
"latency_ms": 14782.901199996471,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\codex-lens-lsp.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\queue\\queueexecuteinsession.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-dashboard\\queuepanel.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usewebsocket.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useflows.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-error-monitoring.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\tests\\native-session-discovery.test.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\services\\checkpoint-service.ts",
|
||||
"d:\\claude_dms3\\ccw\\tests\\integration\\system-routes.test.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"strategy_key": "staged:precomputed",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed",
|
||||
"latency_ms": 13710.042499959469,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:realtime": {
|
||||
"strategy_key": "staged:realtime",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime",
|
||||
"latency_ms": 15027.674999952316,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"strategy_key": "staged:static_global_graph",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph",
|
||||
"latency_ms": 13389.622500002384,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": "smart_search init embed search action schema",
|
||||
"intent": "ccw-action-schema",
|
||||
"notes": "Find the Zod schema that defines init/embed/search actions.",
|
||||
"relevant_paths": [
|
||||
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"runs": {
|
||||
"dense_rerank": {
|
||||
"strategy_key": "dense_rerank",
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null,
|
||||
"latency_ms": 13661.594299972057,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\discovery.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\__tests__\\ask-question.test.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\a2ui\\a2uiwebsockethandler.js",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\dashboard.spec.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"strategy_key": "staged:precomputed",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed",
|
||||
"latency_ms": 12127.225099980831,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:realtime": {
|
||||
"strategy_key": "staged:realtime",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime",
|
||||
"latency_ms": 12860.084999978542,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"strategy_key": "staged:static_global_graph",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph",
|
||||
"latency_ms": 13441.331400036812,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"query": "auto init missing job dedupe smart_search",
|
||||
"intent": "ccw-auto-init",
|
||||
"notes": "Targets background init/embed warmup and dedupe state.",
|
||||
"relevant_paths": [
|
||||
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"runs": {
|
||||
"dense_rerank": {
|
||||
"strategy_key": "dense_rerank",
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null,
|
||||
"latency_ms": 13413.994400024414,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\memory-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usememory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\batchoperationtoolbar.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\memory.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useprompthistory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\stores\\flowstore.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\services\\deepwiki-service.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\claude-routes.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"strategy_key": "staged:precomputed",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed",
|
||||
"latency_ms": 11966.072200000286,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:realtime": {
|
||||
"strategy_key": "staged:realtime",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime",
|
||||
"latency_ms": 13691.39059996605,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"strategy_key": "staged:static_global_graph",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph",
|
||||
"latency_ms": 13527.211199998856,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,415 @@
|
||||
{
|
||||
"timestamp": "2026-03-15 00:19:16",
|
||||
"source": "D:\\Claude_dms3",
|
||||
"queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl",
|
||||
"query_count": 1,
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"local_only": true,
|
||||
"strategies": {
|
||||
"auto": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 1.0,
|
||||
"mrr_at_k": 1.0,
|
||||
"avg_recall_at_k": 1.0,
|
||||
"avg_latency_ms": 1377.3565999865532,
|
||||
"p50_latency_ms": 1377.3565999865532,
|
||||
"p95_latency_ms": 1377.3565999865532,
|
||||
"avg_generated_artifact_count": 0.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 0,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"fts": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "auto",
|
||||
"stage2_mode": null
|
||||
},
|
||||
"fts": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 1.0,
|
||||
"mrr_at_k": 1.0,
|
||||
"avg_recall_at_k": 1.0,
|
||||
"avg_latency_ms": 1460.0819000601768,
|
||||
"p50_latency_ms": 1460.0819000601768,
|
||||
"p95_latency_ms": 1460.0819000601768,
|
||||
"avg_generated_artifact_count": 0.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 0,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"fts": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "fts",
|
||||
"stage2_mode": null
|
||||
},
|
||||
"hybrid": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 45991.74140000343,
|
||||
"p50_latency_ms": 45991.74140000343,
|
||||
"p95_latency_ms": 45991.74140000343,
|
||||
"avg_generated_artifact_count": 0.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 0,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"hybrid": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "hybrid",
|
||||
"stage2_mode": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 22739.62610000372,
|
||||
"p50_latency_ms": 22739.62610000372,
|
||||
"p95_latency_ms": 22739.62610000372,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 2.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 1,
|
||||
"effective_methods": {
|
||||
"dense_rerank": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 14900.017599999905,
|
||||
"p50_latency_ms": 14900.017599999905,
|
||||
"p95_latency_ms": 14900.017599999905,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"staged": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed"
|
||||
},
|
||||
"staged:realtime": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 14104.314599990845,
|
||||
"p50_latency_ms": 14104.314599990845,
|
||||
"p95_latency_ms": 14104.314599990845,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"staged": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime"
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 11906.852500021458,
|
||||
"p50_latency_ms": 11906.852500021458,
|
||||
"p95_latency_ms": 11906.852500021458,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"staged": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph"
|
||||
}
|
||||
},
|
||||
"stage2_mode_matrix": {
|
||||
"precomputed": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 14900.017599999905,
|
||||
"p50_latency_ms": 14900.017599999905,
|
||||
"p95_latency_ms": 14900.017599999905,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"staged": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed"
|
||||
},
|
||||
"realtime": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 14104.314599990845,
|
||||
"p50_latency_ms": 14104.314599990845,
|
||||
"p95_latency_ms": 14104.314599990845,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"staged": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime"
|
||||
},
|
||||
"static_global_graph": {
|
||||
"query_count": 1,
|
||||
"hit_at_k": 0.0,
|
||||
"mrr_at_k": 0.0,
|
||||
"avg_recall_at_k": 0.0,
|
||||
"avg_latency_ms": 11906.852500021458,
|
||||
"p50_latency_ms": 11906.852500021458,
|
||||
"p95_latency_ms": 11906.852500021458,
|
||||
"avg_generated_artifact_count": 1.0,
|
||||
"avg_test_file_count": 0.0,
|
||||
"runs_with_generated_artifacts": 1,
|
||||
"runs_with_test_files": 0,
|
||||
"effective_methods": {
|
||||
"staged": 1
|
||||
},
|
||||
"errors": 0,
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph"
|
||||
}
|
||||
},
|
||||
"pairwise_stage2_deltas": [
|
||||
{
|
||||
"mode_a": "precomputed",
|
||||
"mode_b": "realtime",
|
||||
"hit_at_k_delta": 0.0,
|
||||
"mrr_at_k_delta": 0.0,
|
||||
"avg_recall_at_k_delta": 0.0,
|
||||
"avg_latency_ms_delta": 795.7030000090599
|
||||
},
|
||||
{
|
||||
"mode_a": "precomputed",
|
||||
"mode_b": "static_global_graph",
|
||||
"hit_at_k_delta": 0.0,
|
||||
"mrr_at_k_delta": 0.0,
|
||||
"avg_recall_at_k_delta": 0.0,
|
||||
"avg_latency_ms_delta": 2993.165099978447
|
||||
},
|
||||
{
|
||||
"mode_a": "realtime",
|
||||
"mode_b": "static_global_graph",
|
||||
"hit_at_k_delta": 0.0,
|
||||
"mrr_at_k_delta": 0.0,
|
||||
"avg_recall_at_k_delta": 0.0,
|
||||
"avg_latency_ms_delta": 2197.462099969387
|
||||
}
|
||||
],
|
||||
"config": {
|
||||
"embedding_backend": "fastembed",
|
||||
"embedding_model": "code",
|
||||
"embedding_use_gpu": false,
|
||||
"reranker_backend": "onnx",
|
||||
"reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
"reranker_use_gpu": false,
|
||||
"enable_staged_rerank": true,
|
||||
"enable_cross_encoder_rerank": true
|
||||
},
|
||||
"evaluations": [
|
||||
{
|
||||
"query": "executeHybridMode dense_rerank semantic smart_search",
|
||||
"intent": "ccw-semantic-routing",
|
||||
"notes": "CCW semantic mode delegates to CodexLens dense_rerank.",
|
||||
"relevant_paths": [
|
||||
"D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"runs": {
|
||||
"auto": {
|
||||
"strategy_key": "auto",
|
||||
"strategy": "auto",
|
||||
"stage2_mode": null,
|
||||
"effective_method": "fts",
|
||||
"execution_method": "fts",
|
||||
"latency_ms": 1377.3565999865532,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"first_hit_rank": 1,
|
||||
"hit_at_k": true,
|
||||
"recall_at_k": 1.0,
|
||||
"generated_artifact_count": 0,
|
||||
"test_file_count": 0,
|
||||
"error": null
|
||||
},
|
||||
"fts": {
|
||||
"strategy_key": "fts",
|
||||
"strategy": "fts",
|
||||
"stage2_mode": null,
|
||||
"effective_method": "fts",
|
||||
"execution_method": "fts",
|
||||
"latency_ms": 1460.0819000601768,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts"
|
||||
],
|
||||
"first_hit_rank": 1,
|
||||
"hit_at_k": true,
|
||||
"recall_at_k": 1.0,
|
||||
"generated_artifact_count": 0,
|
||||
"test_file_count": 0,
|
||||
"error": null
|
||||
},
|
||||
"hybrid": {
|
||||
"strategy_key": "hybrid",
|
||||
"strategy": "hybrid",
|
||||
"stage2_mode": null,
|
||||
"effective_method": "hybrid",
|
||||
"execution_method": "hybrid",
|
||||
"latency_ms": 45991.74140000343,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\config\\litellm-api-config-manager.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\core-memory.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\scripts\\generate_embeddings.py",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\notification-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\team-msg.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\types\\remote-notification.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\memory-store.ts",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"generated_artifact_count": 0,
|
||||
"test_file_count": 0,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy_key": "dense_rerank",
|
||||
"strategy": "dense_rerank",
|
||||
"stage2_mode": null,
|
||||
"effective_method": "dense_rerank",
|
||||
"execution_method": "cascade",
|
||||
"latency_ms": 22739.62610000372,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"generated_artifact_count": 1,
|
||||
"test_file_count": 2,
|
||||
"error": null
|
||||
},
|
||||
"staged:precomputed": {
|
||||
"strategy_key": "staged:precomputed",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "precomputed",
|
||||
"effective_method": "staged",
|
||||
"execution_method": "cascade",
|
||||
"latency_ms": 14900.017599999905,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"generated_artifact_count": 1,
|
||||
"test_file_count": 0,
|
||||
"error": null
|
||||
},
|
||||
"staged:realtime": {
|
||||
"strategy_key": "staged:realtime",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "realtime",
|
||||
"effective_method": "staged",
|
||||
"execution_method": "cascade",
|
||||
"latency_ms": 14104.314599990845,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"generated_artifact_count": 1,
|
||||
"test_file_count": 0,
|
||||
"error": null
|
||||
},
|
||||
"staged:static_global_graph": {
|
||||
"strategy_key": "staged:static_global_graph",
|
||||
"strategy": "staged",
|
||||
"stage2_mode": "static_global_graph",
|
||||
"effective_method": "staged",
|
||||
"execution_method": "cascade",
|
||||
"latency_ms": 11906.852500021458,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\commands\\memory.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts",
|
||||
"d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts",
|
||||
"d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts",
|
||||
"d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js",
|
||||
"d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts"
|
||||
],
|
||||
"first_hit_rank": null,
|
||||
"hit_at_k": false,
|
||||
"recall_at_k": 0.0,
|
||||
"generated_artifact_count": 1,
|
||||
"test_file_count": 0,
|
||||
"error": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user