mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-12 02:37:45 +08:00
feat: Add comprehensive tests for contentPattern and glob pattern matching
- Implemented final verification tests for contentPattern to validate behavior with empty strings, dangerous patterns, and normal patterns. - Created glob pattern matching tests to verify regex conversion and matching functionality. - Developed infinite loop risk tests using Worker threads to isolate potential blocking operations. - Introduced optimized contentPattern tests to validate improvements in the findMatches function. - Added verification tests to assess the effectiveness of contentPattern optimizations. - Conducted safety tests for contentPattern to identify edge cases and potential vulnerabilities. - Implemented unrestricted loop tests to analyze infinite loop risks without match limits. - Developed tests for zero-width pattern detection logic to ensure proper handling of dangerous regex patterns.
This commit is contained in:
384
codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py
Normal file
384
codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py
Normal file
@@ -0,0 +1,384 @@
|
||||
#!/usr/bin/env python
|
||||
"""Compare staged realtime LSP pipeline vs direct dense->rerank cascade.
|
||||
|
||||
This benchmark compares two retrieval pipelines:
|
||||
1) staged+realtime: coarse (binary or dense fallback) -> realtime LSP graph expand -> clustering -> rerank
|
||||
2) dense_rerank: dense ANN coarse -> cross-encoder rerank
|
||||
|
||||
Because most repos do not have ground-truth labels, this script reports:
|
||||
- latency statistics
|
||||
- top-k overlap metrics (Jaccard + RBO)
|
||||
- diversity proxies (unique files/dirs)
|
||||
- staged pipeline stage stats (if present)
|
||||
|
||||
Usage:
|
||||
python benchmarks/compare_staged_realtime_vs_dense_rerank.py --source ./src
|
||||
python benchmarks/compare_staged_realtime_vs_dense_rerank.py --queries-file benchmarks/queries.txt
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import statistics
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
# Add src to path (match other benchmark scripts)
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
|
||||
|
||||
DEFAULT_QUERIES = [
|
||||
"class Config",
|
||||
"def search",
|
||||
"LspBridge",
|
||||
"graph expansion",
|
||||
"clustering strategy",
|
||||
"error handling",
|
||||
"how to parse json",
|
||||
]
|
||||
|
||||
|
||||
def _now_ms() -> float:
|
||||
return time.perf_counter() * 1000.0
|
||||
|
||||
|
||||
def _safe_relpath(path: str, root: Path) -> str:
|
||||
try:
|
||||
return str(Path(path).resolve().relative_to(root.resolve()))
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
|
||||
def _normalize_path_key(path: str) -> str:
|
||||
"""Normalize file paths for overlap/dedup metrics (Windows-safe)."""
|
||||
try:
|
||||
p = Path(path)
|
||||
# Don't explode on non-files like "<memory>".
|
||||
if str(p) and (p.is_absolute() or re.match(r"^[A-Za-z]:", str(p))):
|
||||
norm = str(p.resolve())
|
||||
else:
|
||||
norm = str(p)
|
||||
except Exception:
|
||||
norm = path
|
||||
norm = norm.replace("/", "\\")
|
||||
if os.name == "nt":
|
||||
norm = norm.lower()
|
||||
return norm
|
||||
|
||||
|
||||
def _extract_stage_stats(errors: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Extract STAGE_STATS JSON blob from SearchStats.errors."""
|
||||
for item in errors or []:
|
||||
if not isinstance(item, str):
|
||||
continue
|
||||
if not item.startswith("STAGE_STATS:"):
|
||||
continue
|
||||
payload = item[len("STAGE_STATS:") :]
|
||||
try:
|
||||
return json.loads(payload)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def jaccard_topk(a: List[str], b: List[str]) -> float:
|
||||
sa, sb = set(a), set(b)
|
||||
if not sa and not sb:
|
||||
return 1.0
|
||||
if not sa or not sb:
|
||||
return 0.0
|
||||
return len(sa & sb) / len(sa | sb)
|
||||
|
||||
|
||||
def rbo(a: List[str], b: List[str], p: float = 0.9) -> float:
|
||||
"""Rank-biased overlap for two ranked lists."""
|
||||
if p <= 0.0 or p >= 1.0:
|
||||
raise ValueError("p must be in (0, 1)")
|
||||
if not a and not b:
|
||||
return 1.0
|
||||
|
||||
depth = max(len(a), len(b))
|
||||
seen_a: set[str] = set()
|
||||
seen_b: set[str] = set()
|
||||
|
||||
score = 0.0
|
||||
for d in range(1, depth + 1):
|
||||
if d <= len(a):
|
||||
seen_a.add(a[d - 1])
|
||||
if d <= len(b):
|
||||
seen_b.add(b[d - 1])
|
||||
overlap = len(seen_a & seen_b)
|
||||
score += (overlap / d) * ((1.0 - p) * (p ** (d - 1)))
|
||||
return score
|
||||
|
||||
|
||||
def _unique_parent_dirs(paths: Iterable[str]) -> int:
|
||||
dirs = set()
|
||||
for p in paths:
|
||||
try:
|
||||
dirs.add(str(Path(p).parent))
|
||||
except Exception:
|
||||
continue
|
||||
return len(dirs)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunDetail:
|
||||
strategy: str
|
||||
query: str
|
||||
latency_ms: float
|
||||
num_results: int
|
||||
topk_paths: List[str]
|
||||
stage_stats: Optional[Dict[str, Any]] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompareDetail:
|
||||
query: str
|
||||
staged: RunDetail
|
||||
dense_rerank: RunDetail
|
||||
jaccard_topk: float
|
||||
rbo_topk: float
|
||||
staged_unique_files_topk: int
|
||||
dense_unique_files_topk: int
|
||||
staged_unique_dirs_topk: int
|
||||
dense_unique_dirs_topk: int
|
||||
|
||||
|
||||
def _run_once(
|
||||
engine: ChainSearchEngine,
|
||||
query: str,
|
||||
source_path: Path,
|
||||
*,
|
||||
strategy: str,
|
||||
k: int,
|
||||
coarse_k: int,
|
||||
options: Optional[SearchOptions] = None,
|
||||
) -> RunDetail:
|
||||
gc.collect()
|
||||
start_ms = _now_ms()
|
||||
try:
|
||||
result = engine.cascade_search(
|
||||
query=query,
|
||||
source_path=source_path,
|
||||
k=k,
|
||||
coarse_k=coarse_k,
|
||||
options=options,
|
||||
strategy=strategy,
|
||||
)
|
||||
latency_ms = _now_ms() - start_ms
|
||||
paths_raw = [r.path for r in (result.results or []) if getattr(r, "path", None)]
|
||||
paths = [_normalize_path_key(p) for p in paths_raw]
|
||||
topk: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for p in paths:
|
||||
if p in seen:
|
||||
continue
|
||||
seen.add(p)
|
||||
topk.append(p)
|
||||
if len(topk) >= k:
|
||||
break
|
||||
stage_stats = _extract_stage_stats(getattr(result.stats, "errors", []))
|
||||
return RunDetail(
|
||||
strategy=strategy,
|
||||
query=query,
|
||||
latency_ms=latency_ms,
|
||||
num_results=len(paths),
|
||||
topk_paths=topk,
|
||||
stage_stats=stage_stats,
|
||||
)
|
||||
except Exception as exc:
|
||||
latency_ms = _now_ms() - start_ms
|
||||
return RunDetail(
|
||||
strategy=strategy,
|
||||
query=query,
|
||||
latency_ms=latency_ms,
|
||||
num_results=0,
|
||||
topk_paths=[],
|
||||
stage_stats=None,
|
||||
error=repr(exc),
|
||||
)
|
||||
|
||||
|
||||
def _load_queries(path: Optional[Path], limit: Optional[int]) -> List[str]:
|
||||
if path is None:
|
||||
queries = list(DEFAULT_QUERIES)
|
||||
else:
|
||||
raw = path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
queries = []
|
||||
for line in raw:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
queries.append(line)
|
||||
if limit is not None:
|
||||
return queries[:limit]
|
||||
return queries
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compare staged realtime LSP pipeline vs direct dense_rerank cascade"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source",
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent / "src",
|
||||
help="Source directory to search (default: ./src)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--queries-file",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Optional file with one query per line (# comments supported)",
|
||||
)
|
||||
parser.add_argument("--queries", type=int, default=None, help="Limit number of queries")
|
||||
parser.add_argument("--k", type=int, default=10, help="Final result count (default 10)")
|
||||
parser.add_argument("--coarse-k", type=int, default=100, help="Coarse candidates (default 100)")
|
||||
parser.add_argument("--warmup", type=int, default=1, help="Warmup runs per strategy (default 1)")
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
default=Path(__file__).parent / "results" / "staged_realtime_vs_dense_rerank.json",
|
||||
help="Output JSON path",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.source.exists():
|
||||
raise SystemExit(f"Source path does not exist: {args.source}")
|
||||
|
||||
queries = _load_queries(args.queries_file, args.queries)
|
||||
if not queries:
|
||||
raise SystemExit("No queries to run")
|
||||
|
||||
# Match CLI behavior: load settings + apply global/workspace .env overrides.
|
||||
# This is important on Windows where ONNX/DirectML can sometimes crash under load;
|
||||
# many users pin EMBEDDING_BACKEND=litellm in ~/.codexlens/.env for stability.
|
||||
config = Config.load()
|
||||
config.cascade_strategy = "staged"
|
||||
config.staged_stage2_mode = "realtime"
|
||||
config.enable_staged_rerank = True
|
||||
# Stability: on some Windows setups, fastembed + DirectML can crash under load.
|
||||
# Dense_rerank uses the embedding backend that matches the index; force CPU here.
|
||||
config.embedding_use_gpu = False
|
||||
registry = RegistryStore()
|
||||
registry.initialize()
|
||||
mapper = PathMapper()
|
||||
engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config)
|
||||
|
||||
try:
|
||||
strategies = ["staged", "dense_rerank"]
|
||||
|
||||
# Warmup
|
||||
if args.warmup > 0:
|
||||
warm_query = queries[0]
|
||||
for s in strategies:
|
||||
for _ in range(args.warmup):
|
||||
try:
|
||||
_run_once(
|
||||
engine,
|
||||
warm_query,
|
||||
args.source,
|
||||
strategy=s,
|
||||
k=min(args.k, 5),
|
||||
coarse_k=min(args.coarse_k, 50),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
comparisons: List[CompareDetail] = []
|
||||
|
||||
for i, query in enumerate(queries, start=1):
|
||||
print(f"[{i}/{len(queries)}] {query}")
|
||||
|
||||
staged = _run_once(
|
||||
engine,
|
||||
query,
|
||||
args.source,
|
||||
strategy="staged",
|
||||
k=args.k,
|
||||
coarse_k=args.coarse_k,
|
||||
)
|
||||
dense = _run_once(
|
||||
engine,
|
||||
query,
|
||||
args.source,
|
||||
strategy="dense_rerank",
|
||||
k=args.k,
|
||||
coarse_k=args.coarse_k,
|
||||
)
|
||||
|
||||
staged_paths = staged.topk_paths
|
||||
dense_paths = dense.topk_paths
|
||||
|
||||
comparisons.append(
|
||||
CompareDetail(
|
||||
query=query,
|
||||
staged=staged,
|
||||
dense_rerank=dense,
|
||||
jaccard_topk=jaccard_topk(staged_paths, dense_paths),
|
||||
rbo_topk=rbo(staged_paths, dense_paths, p=0.9),
|
||||
staged_unique_files_topk=len(set(staged_paths)),
|
||||
dense_unique_files_topk=len(set(dense_paths)),
|
||||
staged_unique_dirs_topk=_unique_parent_dirs(staged_paths),
|
||||
dense_unique_dirs_topk=_unique_parent_dirs(dense_paths),
|
||||
)
|
||||
)
|
||||
|
||||
def _latencies(details: List[RunDetail]) -> List[float]:
|
||||
return [d.latency_ms for d in details if not d.error]
|
||||
|
||||
staged_runs = [c.staged for c in comparisons]
|
||||
dense_runs = [c.dense_rerank for c in comparisons]
|
||||
|
||||
summary = {
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"source": str(args.source),
|
||||
"k": args.k,
|
||||
"coarse_k": args.coarse_k,
|
||||
"query_count": len(comparisons),
|
||||
"avg_jaccard_topk": statistics.mean([c.jaccard_topk for c in comparisons]) if comparisons else 0.0,
|
||||
"avg_rbo_topk": statistics.mean([c.rbo_topk for c in comparisons]) if comparisons else 0.0,
|
||||
"staged": {
|
||||
"success": sum(1 for r in staged_runs if not r.error),
|
||||
"avg_latency_ms": statistics.mean(_latencies(staged_runs)) if _latencies(staged_runs) else 0.0,
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": sum(1 for r in dense_runs if not r.error),
|
||||
"avg_latency_ms": statistics.mean(_latencies(dense_runs)) if _latencies(dense_runs) else 0.0,
|
||||
},
|
||||
}
|
||||
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
payload = {
|
||||
"summary": summary,
|
||||
"comparisons": [asdict(c) for c in comparisons],
|
||||
}
|
||||
args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
||||
print(f"\nSaved: {args.output}")
|
||||
finally:
|
||||
try:
|
||||
engine.close()
|
||||
except Exception as exc:
|
||||
print(f"WARNING engine.close() failed: {exc!r}", file=sys.stderr)
|
||||
try:
|
||||
registry.close()
|
||||
except Exception as exc:
|
||||
print(f"WARNING registry.close() failed: {exc!r}", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
453
codex-lens/benchmarks/results/compare_2026-02-09.json
Normal file
453
codex-lens/benchmarks/results/compare_2026-02-09.json
Normal file
@@ -0,0 +1,453 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 11:08:47",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 7,
|
||||
"avg_jaccard_topk": 0.41421235160730957,
|
||||
"avg_rbo_topk": 0.22899068093857142,
|
||||
"staged": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 32009.68328570468
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 2783.3305999977247
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 40875.45489999652,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 10633.91399383545,
|
||||
"stage2_expand_ms": 12487.980365753174,
|
||||
"stage3_cluster_ms": 10781.587362289429,
|
||||
"stage4_rerank_ms": 6914.837837219238
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 149,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 3111.874899983406,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.06741929885142856,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 8,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 38541.18510001898,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 548.8920211791992,
|
||||
"stage2_expand_ms": 27176.724433898926,
|
||||
"stage3_cluster_ms": 8352.917671203613,
|
||||
"stage4_rerank_ms": 2392.6541805267334
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 101,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 2652.75,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.26666666666666666,
|
||||
"rbo_topk": 0.2983708721671428,
|
||||
"staged_unique_files_topk": 9,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 26319.983999997377,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 514.4834518432617,
|
||||
"stage2_expand_ms": 14329.241514205933,
|
||||
"stage3_cluster_ms": 9249.040842056274,
|
||||
"stage4_rerank_ms": 2159.9059104919434
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 100,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 2666.9745999872684,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.6666666666666666,
|
||||
"rbo_topk": 0.3571430355128571,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "graph expansion",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 25696.087299972773,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 560.4684352874756,
|
||||
"stage2_expand_ms": 13951.441526412964,
|
||||
"stage3_cluster_ms": 8879.387140274048,
|
||||
"stage4_rerank_ms": 2229.4514179229736
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 100,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 2544.8630999922752,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.42857142857142855,
|
||||
"rbo_topk": 0.13728894791142857,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "clustering strategy",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 27387.41929998994,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 625.0262260437012,
|
||||
"stage2_expand_ms": 14211.347103118896,
|
||||
"stage3_cluster_ms": 10269.58680152893,
|
||||
"stage4_rerank_ms": 2208.007335662842
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 100,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 2928.22389999032,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.17647058823529413,
|
||||
"rbo_topk": 0.07116480920571429,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "error handling",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "error handling",
|
||||
"latency_ms": 23732.33979997039,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 504.0884017944336,
|
||||
"stage2_expand_ms": 12899.415016174316,
|
||||
"stage3_cluster_ms": 7881.027936935425,
|
||||
"stage4_rerank_ms": 2372.1535205841064
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 100,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "error handling",
|
||||
"latency_ms": 2946.439900010824,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.6666666666666666,
|
||||
"rbo_topk": 0.19158624676285715,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "how to parse json",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 41515.31259998679,
|
||||
"num_results": 9,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 601.7005443572998,
|
||||
"stage2_expand_ms": 30052.319765090942,
|
||||
"stage3_cluster_ms": 8409.791231155396,
|
||||
"stage4_rerank_ms": 2371.1729049682617
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 100,
|
||||
"stage3_clustered": 20,
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 2632.1878000199795,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.5833333333333334,
|
||||
"rbo_topk": 0.4799615561585714,
|
||||
"staged_unique_files_topk": 9,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
73
codex-lens/benchmarks/results/tmp_compare1.json
Normal file
73
codex-lens/benchmarks/results/tmp_compare1.json
Normal file
@@ -0,0 +1,73 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-08 23:48:26",
|
||||
"source": "src",
|
||||
"k": 5,
|
||||
"coarse_k": 50,
|
||||
"query_count": 1,
|
||||
"avg_jaccard_topk": 0.0,
|
||||
"avg_rbo_topk": 0.0,
|
||||
"staged": {
|
||||
"success": 1,
|
||||
"avg_latency_ms": 30093.97499999404
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 1,
|
||||
"avg_latency_ms": 331.4424999952316
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 30093.97499999404,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 6421.706914901733,
|
||||
"stage2_expand_ms": 17591.988563537598,
|
||||
"stage3_cluster_ms": 3700.4549503326416,
|
||||
"stage4_rerank_ms": 2340.064525604248
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 99,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 331.4424999952316,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.0,
|
||||
"rbo_topk": 0.0,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
177
codex-lens/benchmarks/results/tmp_compare3_ok_cpu.json
Normal file
177
codex-lens/benchmarks/results/tmp_compare3_ok_cpu.json
Normal file
@@ -0,0 +1,177 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-08 23:58:56",
|
||||
"source": "src",
|
||||
"k": 5,
|
||||
"coarse_k": 50,
|
||||
"query_count": 3,
|
||||
"avg_jaccard_topk": 0.11574074074074074,
|
||||
"avg_rbo_topk": 0.14601366666666662,
|
||||
"staged": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 27868.044033328693
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 1339.25289999942
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 33643.06179998815,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 6201.4524936676025,
|
||||
"stage2_expand_ms": 17306.61702156067,
|
||||
"stage3_cluster_ms": 6829.557418823242,
|
||||
"stage4_rerank_ms": 3267.071485519409
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 99,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 1520.9955999851227,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.031347,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 1
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 26400.58900000155,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 404.60920333862305,
|
||||
"stage2_expand_ms": 20036.258697509766,
|
||||
"stage3_cluster_ms": 4919.439315795898,
|
||||
"stage4_rerank_ms": 1001.8632411956787
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 51,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 1264.3862999975681,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.125,
|
||||
"rbo_topk": 0.20334699999999994,
|
||||
"staged_unique_files_topk": 4,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 2
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 23560.481299996376,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 385.28990745544434,
|
||||
"stage2_expand_ms": 17787.648677825928,
|
||||
"stage3_cluster_ms": 4374.642372131348,
|
||||
"stage4_rerank_ms": 974.8115539550781
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 50,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 1232.3768000155687,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.20334699999999994,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
176
codex-lens/benchmarks/results/tmp_compare3_ok_cpu_dedup.json
Normal file
176
codex-lens/benchmarks/results/tmp_compare3_ok_cpu_dedup.json
Normal file
@@ -0,0 +1,176 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 00:08:47",
|
||||
"source": "src",
|
||||
"k": 5,
|
||||
"coarse_k": 50,
|
||||
"query_count": 3,
|
||||
"avg_jaccard_topk": 0.11574074074074074,
|
||||
"avg_rbo_topk": 0.14601366666666662,
|
||||
"staged": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 31720.555866663653
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 1401.2113333245118
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 40162.88519999385,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 6091.366767883301,
|
||||
"stage2_expand_ms": 17540.942907333374,
|
||||
"stage3_cluster_ms": 13169.558048248291,
|
||||
"stage4_rerank_ms": 3317.5392150878906
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 99,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 1571.1398999989033,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.031347,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 1
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 31623.380899995565,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 400.84290504455566,
|
||||
"stage2_expand_ms": 20529.58631515503,
|
||||
"stage3_cluster_ms": 9625.348806381226,
|
||||
"stage4_rerank_ms": 1027.686357498169
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 51,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 1376.3304999768734,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.125,
|
||||
"rbo_topk": 0.20334699999999994,
|
||||
"staged_unique_files_topk": 4,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 2
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 23375.40150000155,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 392.41671562194824,
|
||||
"stage2_expand_ms": 17760.897397994995,
|
||||
"stage3_cluster_ms": 4194.235563278198,
|
||||
"stage4_rerank_ms": 990.307092666626
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 50,
|
||||
"stage2_expanded": 50,
|
||||
"stage3_clustered": 10,
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 1256.1635999977589,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.20334699999999994,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 5,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user