feat: Add comprehensive tests for contentPattern and glob pattern matching

- Implemented final verification tests for contentPattern to validate behavior with empty strings, dangerous patterns, and normal patterns.
- Created glob pattern matching tests to verify regex conversion and matching functionality.
- Developed infinite loop risk tests using Worker threads to isolate potential blocking operations.
- Introduced optimized contentPattern tests to validate improvements in the findMatches function.
- Added verification tests to assess the effectiveness of contentPattern optimizations.
- Conducted safety tests for contentPattern to identify edge cases and potential vulnerabilities.
- Implemented unrestricted loop tests to analyze infinite loop risks without match limits.
- Developed tests for zero-width pattern detection logic to ensure proper handling of dangerous regex patterns.
This commit is contained in:
catlog22
2026-02-09 11:13:01 +08:00
parent dfe153778c
commit 964292ebdb
62 changed files with 7588 additions and 374 deletions

View File

@@ -0,0 +1,384 @@
#!/usr/bin/env python
"""Compare staged realtime LSP pipeline vs direct dense->rerank cascade.
This benchmark compares two retrieval pipelines:
1) staged+realtime: coarse (binary or dense fallback) -> realtime LSP graph expand -> clustering -> rerank
2) dense_rerank: dense ANN coarse -> cross-encoder rerank
Because most repos do not have ground-truth labels, this script reports:
- latency statistics
- top-k overlap metrics (Jaccard + RBO)
- diversity proxies (unique files/dirs)
- staged pipeline stage stats (if present)
Usage:
python benchmarks/compare_staged_realtime_vs_dense_rerank.py --source ./src
python benchmarks/compare_staged_realtime_vs_dense_rerank.py --queries-file benchmarks/queries.txt
"""
from __future__ import annotations
import argparse
import gc
import json
import os
import re
import statistics
import sys
import time
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
# Add src to path (match other benchmark scripts)
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from codexlens.config import Config
from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
from codexlens.storage.path_mapper import PathMapper
from codexlens.storage.registry import RegistryStore
DEFAULT_QUERIES = [
"class Config",
"def search",
"LspBridge",
"graph expansion",
"clustering strategy",
"error handling",
"how to parse json",
]
def _now_ms() -> float:
return time.perf_counter() * 1000.0
def _safe_relpath(path: str, root: Path) -> str:
try:
return str(Path(path).resolve().relative_to(root.resolve()))
except Exception:
return path
def _normalize_path_key(path: str) -> str:
"""Normalize file paths for overlap/dedup metrics (Windows-safe)."""
try:
p = Path(path)
# Don't explode on non-files like "<memory>".
if str(p) and (p.is_absolute() or re.match(r"^[A-Za-z]:", str(p))):
norm = str(p.resolve())
else:
norm = str(p)
except Exception:
norm = path
norm = norm.replace("/", "\\")
if os.name == "nt":
norm = norm.lower()
return norm
def _extract_stage_stats(errors: List[str]) -> Optional[Dict[str, Any]]:
"""Extract STAGE_STATS JSON blob from SearchStats.errors."""
for item in errors or []:
if not isinstance(item, str):
continue
if not item.startswith("STAGE_STATS:"):
continue
payload = item[len("STAGE_STATS:") :]
try:
return json.loads(payload)
except Exception:
return None
return None
def jaccard_topk(a: List[str], b: List[str]) -> float:
sa, sb = set(a), set(b)
if not sa and not sb:
return 1.0
if not sa or not sb:
return 0.0
return len(sa & sb) / len(sa | sb)
def rbo(a: List[str], b: List[str], p: float = 0.9) -> float:
"""Rank-biased overlap for two ranked lists."""
if p <= 0.0 or p >= 1.0:
raise ValueError("p must be in (0, 1)")
if not a and not b:
return 1.0
depth = max(len(a), len(b))
seen_a: set[str] = set()
seen_b: set[str] = set()
score = 0.0
for d in range(1, depth + 1):
if d <= len(a):
seen_a.add(a[d - 1])
if d <= len(b):
seen_b.add(b[d - 1])
overlap = len(seen_a & seen_b)
score += (overlap / d) * ((1.0 - p) * (p ** (d - 1)))
return score
def _unique_parent_dirs(paths: Iterable[str]) -> int:
dirs = set()
for p in paths:
try:
dirs.add(str(Path(p).parent))
except Exception:
continue
return len(dirs)
@dataclass
class RunDetail:
strategy: str
query: str
latency_ms: float
num_results: int
topk_paths: List[str]
stage_stats: Optional[Dict[str, Any]] = None
error: Optional[str] = None
@dataclass
class CompareDetail:
query: str
staged: RunDetail
dense_rerank: RunDetail
jaccard_topk: float
rbo_topk: float
staged_unique_files_topk: int
dense_unique_files_topk: int
staged_unique_dirs_topk: int
dense_unique_dirs_topk: int
def _run_once(
engine: ChainSearchEngine,
query: str,
source_path: Path,
*,
strategy: str,
k: int,
coarse_k: int,
options: Optional[SearchOptions] = None,
) -> RunDetail:
gc.collect()
start_ms = _now_ms()
try:
result = engine.cascade_search(
query=query,
source_path=source_path,
k=k,
coarse_k=coarse_k,
options=options,
strategy=strategy,
)
latency_ms = _now_ms() - start_ms
paths_raw = [r.path for r in (result.results or []) if getattr(r, "path", None)]
paths = [_normalize_path_key(p) for p in paths_raw]
topk: List[str] = []
seen: set[str] = set()
for p in paths:
if p in seen:
continue
seen.add(p)
topk.append(p)
if len(topk) >= k:
break
stage_stats = _extract_stage_stats(getattr(result.stats, "errors", []))
return RunDetail(
strategy=strategy,
query=query,
latency_ms=latency_ms,
num_results=len(paths),
topk_paths=topk,
stage_stats=stage_stats,
)
except Exception as exc:
latency_ms = _now_ms() - start_ms
return RunDetail(
strategy=strategy,
query=query,
latency_ms=latency_ms,
num_results=0,
topk_paths=[],
stage_stats=None,
error=repr(exc),
)
def _load_queries(path: Optional[Path], limit: Optional[int]) -> List[str]:
if path is None:
queries = list(DEFAULT_QUERIES)
else:
raw = path.read_text(encoding="utf-8", errors="ignore").splitlines()
queries = []
for line in raw:
line = line.strip()
if not line or line.startswith("#"):
continue
queries.append(line)
if limit is not None:
return queries[:limit]
return queries
def main() -> None:
parser = argparse.ArgumentParser(
description="Compare staged realtime LSP pipeline vs direct dense_rerank cascade"
)
parser.add_argument(
"--source",
type=Path,
default=Path(__file__).parent.parent / "src",
help="Source directory to search (default: ./src)",
)
parser.add_argument(
"--queries-file",
type=Path,
default=None,
help="Optional file with one query per line (# comments supported)",
)
parser.add_argument("--queries", type=int, default=None, help="Limit number of queries")
parser.add_argument("--k", type=int, default=10, help="Final result count (default 10)")
parser.add_argument("--coarse-k", type=int, default=100, help="Coarse candidates (default 100)")
parser.add_argument("--warmup", type=int, default=1, help="Warmup runs per strategy (default 1)")
parser.add_argument(
"--output",
type=Path,
default=Path(__file__).parent / "results" / "staged_realtime_vs_dense_rerank.json",
help="Output JSON path",
)
args = parser.parse_args()
if not args.source.exists():
raise SystemExit(f"Source path does not exist: {args.source}")
queries = _load_queries(args.queries_file, args.queries)
if not queries:
raise SystemExit("No queries to run")
# Match CLI behavior: load settings + apply global/workspace .env overrides.
# This is important on Windows where ONNX/DirectML can sometimes crash under load;
# many users pin EMBEDDING_BACKEND=litellm in ~/.codexlens/.env for stability.
config = Config.load()
config.cascade_strategy = "staged"
config.staged_stage2_mode = "realtime"
config.enable_staged_rerank = True
# Stability: on some Windows setups, fastembed + DirectML can crash under load.
# Dense_rerank uses the embedding backend that matches the index; force CPU here.
config.embedding_use_gpu = False
registry = RegistryStore()
registry.initialize()
mapper = PathMapper()
engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config)
try:
strategies = ["staged", "dense_rerank"]
# Warmup
if args.warmup > 0:
warm_query = queries[0]
for s in strategies:
for _ in range(args.warmup):
try:
_run_once(
engine,
warm_query,
args.source,
strategy=s,
k=min(args.k, 5),
coarse_k=min(args.coarse_k, 50),
)
except Exception:
pass
comparisons: List[CompareDetail] = []
for i, query in enumerate(queries, start=1):
print(f"[{i}/{len(queries)}] {query}")
staged = _run_once(
engine,
query,
args.source,
strategy="staged",
k=args.k,
coarse_k=args.coarse_k,
)
dense = _run_once(
engine,
query,
args.source,
strategy="dense_rerank",
k=args.k,
coarse_k=args.coarse_k,
)
staged_paths = staged.topk_paths
dense_paths = dense.topk_paths
comparisons.append(
CompareDetail(
query=query,
staged=staged,
dense_rerank=dense,
jaccard_topk=jaccard_topk(staged_paths, dense_paths),
rbo_topk=rbo(staged_paths, dense_paths, p=0.9),
staged_unique_files_topk=len(set(staged_paths)),
dense_unique_files_topk=len(set(dense_paths)),
staged_unique_dirs_topk=_unique_parent_dirs(staged_paths),
dense_unique_dirs_topk=_unique_parent_dirs(dense_paths),
)
)
def _latencies(details: List[RunDetail]) -> List[float]:
return [d.latency_ms for d in details if not d.error]
staged_runs = [c.staged for c in comparisons]
dense_runs = [c.dense_rerank for c in comparisons]
summary = {
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"source": str(args.source),
"k": args.k,
"coarse_k": args.coarse_k,
"query_count": len(comparisons),
"avg_jaccard_topk": statistics.mean([c.jaccard_topk for c in comparisons]) if comparisons else 0.0,
"avg_rbo_topk": statistics.mean([c.rbo_topk for c in comparisons]) if comparisons else 0.0,
"staged": {
"success": sum(1 for r in staged_runs if not r.error),
"avg_latency_ms": statistics.mean(_latencies(staged_runs)) if _latencies(staged_runs) else 0.0,
},
"dense_rerank": {
"success": sum(1 for r in dense_runs if not r.error),
"avg_latency_ms": statistics.mean(_latencies(dense_runs)) if _latencies(dense_runs) else 0.0,
},
}
args.output.parent.mkdir(parents=True, exist_ok=True)
payload = {
"summary": summary,
"comparisons": [asdict(c) for c in comparisons],
}
args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
print(f"\nSaved: {args.output}")
finally:
try:
engine.close()
except Exception as exc:
print(f"WARNING engine.close() failed: {exc!r}", file=sys.stderr)
try:
registry.close()
except Exception as exc:
print(f"WARNING registry.close() failed: {exc!r}", file=sys.stderr)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,453 @@
{
"summary": {
"timestamp": "2026-02-09 11:08:47",
"source": "src",
"k": 10,
"coarse_k": 100,
"query_count": 7,
"avg_jaccard_topk": 0.41421235160730957,
"avg_rbo_topk": 0.22899068093857142,
"staged": {
"success": 7,
"avg_latency_ms": 32009.68328570468
},
"dense_rerank": {
"success": 7,
"avg_latency_ms": 2783.3305999977247
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 40875.45489999652,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 10633.91399383545,
"stage2_expand_ms": 12487.980365753174,
"stage3_cluster_ms": 10781.587362289429,
"stage4_rerank_ms": 6914.837837219238
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 149,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 3111.874899983406,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.06741929885142856,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 8,
"dense_unique_dirs_topk": 4
},
{
"query": "def search",
"staged": {
"strategy": "staged",
"query": "def search",
"latency_ms": 38541.18510001898,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 548.8920211791992,
"stage2_expand_ms": 27176.724433898926,
"stage3_cluster_ms": 8352.917671203613,
"stage4_rerank_ms": 2392.6541805267334
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 101,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "def search",
"latency_ms": 2652.75,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.26666666666666666,
"rbo_topk": 0.2983708721671428,
"staged_unique_files_topk": 9,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "LspBridge",
"staged": {
"strategy": "staged",
"query": "LspBridge",
"latency_ms": 26319.983999997377,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 514.4834518432617,
"stage2_expand_ms": 14329.241514205933,
"stage3_cluster_ms": 9249.040842056274,
"stage4_rerank_ms": 2159.9059104919434
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 100,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "LspBridge",
"latency_ms": 2666.9745999872684,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.6666666666666666,
"rbo_topk": 0.3571430355128571,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "graph expansion",
"staged": {
"strategy": "staged",
"query": "graph expansion",
"latency_ms": 25696.087299972773,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 560.4684352874756,
"stage2_expand_ms": 13951.441526412964,
"stage3_cluster_ms": 8879.387140274048,
"stage4_rerank_ms": 2229.4514179229736
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 100,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "graph expansion",
"latency_ms": 2544.8630999922752,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.42857142857142855,
"rbo_topk": 0.13728894791142857,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "clustering strategy",
"staged": {
"strategy": "staged",
"query": "clustering strategy",
"latency_ms": 27387.41929998994,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 625.0262260437012,
"stage2_expand_ms": 14211.347103118896,
"stage3_cluster_ms": 10269.58680152893,
"stage4_rerank_ms": 2208.007335662842
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 100,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "clustering strategy",
"latency_ms": 2928.22389999032,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.17647058823529413,
"rbo_topk": 0.07116480920571429,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "error handling",
"staged": {
"strategy": "staged",
"query": "error handling",
"latency_ms": 23732.33979997039,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 504.0884017944336,
"stage2_expand_ms": 12899.415016174316,
"stage3_cluster_ms": 7881.027936935425,
"stage4_rerank_ms": 2372.1535205841064
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 100,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "error handling",
"latency_ms": 2946.439900010824,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.6666666666666666,
"rbo_topk": 0.19158624676285715,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "how to parse json",
"staged": {
"strategy": "staged",
"query": "how to parse json",
"latency_ms": 41515.31259998679,
"num_results": 9,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 601.7005443572998,
"stage2_expand_ms": 30052.319765090942,
"stage3_cluster_ms": 8409.791231155396,
"stage4_rerank_ms": 2371.1729049682617
},
"stage_counts": {
"stage1_candidates": 100,
"stage2_expanded": 100,
"stage3_clustered": 20,
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "how to parse json",
"latency_ms": 2632.1878000199795,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.5833333333333334,
"rbo_topk": 0.4799615561585714,
"staged_unique_files_topk": 9,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
}
]
}

View File

@@ -0,0 +1,73 @@
{
"summary": {
"timestamp": "2026-02-08 23:48:26",
"source": "src",
"k": 5,
"coarse_k": 50,
"query_count": 1,
"avg_jaccard_topk": 0.0,
"avg_rbo_topk": 0.0,
"staged": {
"success": 1,
"avg_latency_ms": 30093.97499999404
},
"dense_rerank": {
"success": 1,
"avg_latency_ms": 331.4424999952316
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 30093.97499999404,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 6421.706914901733,
"stage2_expand_ms": 17591.988563537598,
"stage3_cluster_ms": 3700.4549503326416,
"stage4_rerank_ms": 2340.064525604248
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 99,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 331.4424999952316,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.0,
"rbo_topk": 0.0,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 1
}
]
}

View File

@@ -0,0 +1,177 @@
{
"summary": {
"timestamp": "2026-02-08 23:58:56",
"source": "src",
"k": 5,
"coarse_k": 50,
"query_count": 3,
"avg_jaccard_topk": 0.11574074074074074,
"avg_rbo_topk": 0.14601366666666662,
"staged": {
"success": 3,
"avg_latency_ms": 27868.044033328693
},
"dense_rerank": {
"success": 3,
"avg_latency_ms": 1339.25289999942
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 33643.06179998815,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 6201.4524936676025,
"stage2_expand_ms": 17306.61702156067,
"stage3_cluster_ms": 6829.557418823242,
"stage4_rerank_ms": 3267.071485519409
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 99,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 1520.9955999851227,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.031347,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 5,
"dense_unique_dirs_topk": 1
},
{
"query": "def search",
"staged": {
"strategy": "staged",
"query": "def search",
"latency_ms": 26400.58900000155,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 404.60920333862305,
"stage2_expand_ms": 20036.258697509766,
"stage3_cluster_ms": 4919.439315795898,
"stage4_rerank_ms": 1001.8632411956787
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 51,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "def search",
"latency_ms": 1264.3862999975681,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.125,
"rbo_topk": 0.20334699999999994,
"staged_unique_files_topk": 4,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 3,
"dense_unique_dirs_topk": 2
},
{
"query": "LspBridge",
"staged": {
"strategy": "staged",
"query": "LspBridge",
"latency_ms": 23560.481299996376,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 385.28990745544434,
"stage2_expand_ms": 17787.648677825928,
"stage3_cluster_ms": 4374.642372131348,
"stage4_rerank_ms": 974.8115539550781
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 50,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "LspBridge",
"latency_ms": 1232.3768000155687,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.20334699999999994,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 1
}
]
}

View File

@@ -0,0 +1,176 @@
{
"summary": {
"timestamp": "2026-02-09 00:08:47",
"source": "src",
"k": 5,
"coarse_k": 50,
"query_count": 3,
"avg_jaccard_topk": 0.11574074074074074,
"avg_rbo_topk": 0.14601366666666662,
"staged": {
"success": 3,
"avg_latency_ms": 31720.555866663653
},
"dense_rerank": {
"success": 3,
"avg_latency_ms": 1401.2113333245118
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 40162.88519999385,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 6091.366767883301,
"stage2_expand_ms": 17540.942907333374,
"stage3_cluster_ms": 13169.558048248291,
"stage4_rerank_ms": 3317.5392150878906
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 99,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 1571.1398999989033,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.031347,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 5,
"dense_unique_dirs_topk": 1
},
{
"query": "def search",
"staged": {
"strategy": "staged",
"query": "def search",
"latency_ms": 31623.380899995565,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 400.84290504455566,
"stage2_expand_ms": 20529.58631515503,
"stage3_cluster_ms": 9625.348806381226,
"stage4_rerank_ms": 1027.686357498169
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 51,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "def search",
"latency_ms": 1376.3304999768734,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.125,
"rbo_topk": 0.20334699999999994,
"staged_unique_files_topk": 4,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 3,
"dense_unique_dirs_topk": 2
},
{
"query": "LspBridge",
"staged": {
"strategy": "staged",
"query": "LspBridge",
"latency_ms": 23375.40150000155,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 392.41671562194824,
"stage2_expand_ms": 17760.897397994995,
"stage3_cluster_ms": 4194.235563278198,
"stage4_rerank_ms": 990.307092666626
},
"stage_counts": {
"stage1_candidates": 50,
"stage2_expanded": 50,
"stage3_clustered": 10,
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "LspBridge",
"latency_ms": 1256.1635999977589,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.20334699999999994,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 5,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 1
}
]
}