Add benchmark results and tests for LSP graph builder and staged search

- Introduced a new benchmark results file for performance comparison on 2026-02-09.
- Added a test for LspGraphBuilder to ensure it does not expand nodes at maximum depth.
- Created a test for the staged search pipeline to validate fallback behavior when stage 1 returns empty results.
This commit is contained in:
catlog22
2026-02-09 21:43:13 +08:00
parent 4344e79e68
commit 362f354f1c
25 changed files with 2613 additions and 51 deletions

View File

@@ -0,0 +1,466 @@
{
"summary": {
"timestamp": "2026-02-09 20:48:55",
"source": "src",
"k": 10,
"coarse_k": 100,
"query_count": 7,
"avg_jaccard_topk": 0.11418494830148965,
"avg_rbo_topk": 0.08910725003591835,
"staged": {
"success": 7,
"avg_latency_ms": 16443.109000005894
},
"dense_rerank": {
"success": 7,
"avg_latency_ms": 2919.481471432107
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 6056.956700026989,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 113.12270164489746,
"stage1_fallback_search_ms": 262.55249977111816,
"stage2_expand_ms": 3022.8426456451416,
"stage3_cluster_ms": 1.155853271484375,
"stage4_rerank_ms": 2554.953098297119
},
"stage_counts": {
"stage1_candidates": 37,
"stage1_fallback_used": 1,
"stage2_expanded": 86,
"stage2_unique_paths": 53,
"stage2_duplicate_paths": 33,
"stage3_clustered": 20,
"stage3_strategy": "dir_rr",
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 2788.0383999943733,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.05263157894736842,
"rbo_topk": 0.014635885139999999,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 8,
"dense_unique_dirs_topk": 4
},
{
"query": "def search",
"staged": {
"strategy": "staged",
"query": "def search",
"latency_ms": 12229.477500021458,
"num_results": 3,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 108.82282257080078,
"stage2_expand_ms": 9422.304153442383,
"stage3_cluster_ms": 0.001430511474609375,
"stage4_rerank_ms": 2611.234664916992
},
"stage_counts": {
"stage1_candidates": 3,
"stage2_expanded": 4,
"stage2_unique_paths": 3,
"stage2_duplicate_paths": 1,
"stage3_clustered": 4,
"stage3_strategy": "dir_rr",
"stage4_reranked": 4
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "def search",
"latency_ms": 2823.377499997616,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.09090909090909091,
"rbo_topk": 0.23541639942571424,
"staged_unique_files_topk": 2,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 2,
"dense_unique_dirs_topk": 4
},
{
"query": "LspBridge",
"staged": {
"strategy": "staged",
"query": "LspBridge",
"latency_ms": 33805.434699982405,
"num_results": 5,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 100.5556583404541,
"stage1_fallback_search_ms": 176.71489715576172,
"stage2_expand_ms": 31017.661809921265,
"stage3_cluster_ms": 0.001430511474609375,
"stage4_rerank_ms": 2403.3148288726807
},
"stage_counts": {
"stage1_candidates": 5,
"stage1_fallback_used": 1,
"stage2_expanded": 5,
"stage2_unique_paths": 5,
"stage2_duplicate_paths": 0,
"stage3_clustered": 5,
"stage3_strategy": "dir_rr",
"stage4_reranked": 5
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "LspBridge",
"latency_ms": 2906.127400010824,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.07142857142857142,
"rbo_topk": 0.045191399425714276,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 2,
"dense_unique_dirs_topk": 4
},
{
"query": "graph expansion",
"staged": {
"strategy": "staged",
"query": "graph expansion",
"latency_ms": 16790.213800013065,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 110.00967025756836,
"stage1_fallback_search_ms": 176.9556999206543,
"stage2_expand_ms": 13929.782629013062,
"stage3_cluster_ms": 0.45800209045410156,
"stage4_rerank_ms": 2486.6883754730225
},
"stage_counts": {
"stage1_candidates": 11,
"stage1_fallback_used": 1,
"stage2_expanded": 29,
"stage2_unique_paths": 14,
"stage2_duplicate_paths": 15,
"stage3_clustered": 20,
"stage3_strategy": "dir_rr",
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "graph expansion",
"latency_ms": 2866.819000005722,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1875,
"rbo_topk": 0.06893318399142857,
"staged_unique_files_topk": 9,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 8,
"dense_unique_dirs_topk": 4
},
{
"query": "clustering strategy",
"staged": {
"strategy": "staged",
"query": "clustering strategy",
"latency_ms": 9090.759900003672,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 85.28780937194824,
"stage1_fallback_search_ms": 183.7012767791748,
"stage2_expand_ms": 5557.527780532837,
"stage3_cluster_ms": 0.001430511474609375,
"stage4_rerank_ms": 3164.6268367767334
},
"stage_counts": {
"stage1_candidates": 10,
"stage1_fallback_used": 1,
"stage2_expanded": 10,
"stage2_unique_paths": 10,
"stage2_duplicate_paths": 0,
"stage3_clustered": 10,
"stage3_strategy": "dir_rr",
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "clustering strategy",
"latency_ms": 3062.4616000056267,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.04670528456571428,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 3,
"dense_unique_dirs_topk": 4
},
{
"query": "error handling",
"staged": {
"strategy": "staged",
"query": "error handling",
"latency_ms": 19777.87659996748,
"num_results": 6,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 65.9482479095459,
"stage1_fallback_search_ms": 181.9770336151123,
"stage2_expand_ms": 16960.813760757446,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2472.1477031707764
},
"stage_counts": {
"stage1_candidates": 5,
"stage1_fallback_used": 1,
"stage2_expanded": 13,
"stage2_unique_paths": 6,
"stage2_duplicate_paths": 7,
"stage3_clustered": 13,
"stage3_strategy": "dir_rr",
"stage4_reranked": 13
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "error handling",
"latency_ms": 2854.169200003147,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.07142857142857142,
"rbo_topk": 0.045191399425714276,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "how to parse json",
"staged": {
"strategy": "staged",
"query": "how to parse json",
"latency_ms": 17351.04380002618,
"num_results": 7,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 119.1408634185791,
"stage1_fallback_search_ms": 246.2625503540039,
"stage2_expand_ms": 14137.234449386597,
"stage3_cluster_ms": 0.0011920928955078125,
"stage4_rerank_ms": 2750.417470932007
},
"stage_counts": {
"stage1_candidates": 4,
"stage1_fallback_used": 1,
"stage2_expanded": 11,
"stage2_unique_paths": 7,
"stage2_duplicate_paths": 4,
"stage3_clustered": 11,
"stage3_strategy": "dir_rr",
"stage4_reranked": 11
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "how to parse json",
"latency_ms": 3135.3772000074387,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.21428571428571427,
"rbo_topk": 0.16767719827714284,
"staged_unique_files_topk": 7,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 5,
"dense_unique_dirs_topk": 4
}
]
}

View File

@@ -0,0 +1,467 @@
{
"summary": {
"timestamp": "2026-02-09 20:56:02",
"source": "src",
"k": 10,
"coarse_k": 100,
"query_count": 7,
"avg_jaccard_topk": 0.11350467619264612,
"avg_rbo_topk": 0.09062624799510204,
"staged": {
"success": 7,
"avg_latency_ms": 8679.35167142323
},
"dense_rerank": {
"success": 7,
"avg_latency_ms": 3097.294714289052
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 6814.465099990368,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 85.55030822753906,
"stage1_fallback_search_ms": 197.95989990234375,
"stage2_expand_ms": 3032.4549674987793,
"stage3_cluster_ms": 1.1937618255615234,
"stage4_rerank_ms": 3402.9476642608643
},
"stage_counts": {
"stage1_candidates": 37,
"stage1_fallback_used": 1,
"stage2_expanded": 86,
"stage2_unique_paths": 53,
"stage2_duplicate_paths": 33,
"stage3_clustered": 20,
"stage3_strategy": "dir_rr",
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 3175.0339000225067,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.05263157894736842,
"rbo_topk": 0.014635885139999999,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 8,
"dense_unique_dirs_topk": 4
},
{
"query": "def search",
"staged": {
"strategy": "staged",
"query": "def search",
"latency_ms": 8990.238099992275,
"num_results": 3,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 90.6367301940918,
"stage2_expand_ms": 6272.260665893555,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2531.4290523529053
},
"stage_counts": {
"stage1_candidates": 3,
"stage2_expanded": 4,
"stage2_unique_paths": 3,
"stage2_duplicate_paths": 1,
"stage3_clustered": 4,
"stage3_strategy": "dir_rr",
"stage4_reranked": 4
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "def search",
"latency_ms": 3434.4095999896526,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.09090909090909091,
"rbo_topk": 0.23541639942571424,
"staged_unique_files_topk": 2,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 2,
"dense_unique_dirs_topk": 4
},
{
"query": "LspBridge",
"staged": {
"strategy": "staged",
"query": "LspBridge",
"latency_ms": 9296.205000013113,
"num_results": 7,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 86.64774894714355,
"stage1_fallback_search_ms": 163.8650894165039,
"stage2_expand_ms": 6144.1497802734375,
"stage3_cluster_ms": 0.4100799560546875,
"stage4_rerank_ms": 2807.274580001831
},
"stage_counts": {
"stage1_candidates": 5,
"stage1_fallback_used": 1,
"stage2_expanded": 31,
"stage2_unique_paths": 11,
"stage2_duplicate_paths": 20,
"stage3_clustered": 20,
"stage3_strategy": "dir_rr",
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "LspBridge",
"latency_ms": 3043.4417999982834,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.06666666666666667,
"rbo_topk": 0.045191399425714276,
"staged_unique_files_topk": 6,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 2,
"dense_unique_dirs_topk": 4
},
{
"query": "graph expansion",
"staged": {
"strategy": "staged",
"query": "graph expansion",
"latency_ms": 9086.15110000968,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 72.22437858581543,
"stage1_fallback_search_ms": 166.3804054260254,
"stage2_expand_ms": 6179.303169250488,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2575.9027004241943
},
"stage_counts": {
"stage1_candidates": 11,
"stage1_fallback_used": 1,
"stage2_expanded": 16,
"stage2_unique_paths": 13,
"stage2_duplicate_paths": 3,
"stage3_clustered": 16,
"stage3_strategy": "dir_rr",
"stage4_reranked": 16
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "graph expansion",
"latency_ms": 2793.8257000148296,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1875,
"rbo_topk": 0.06134116970571428,
"staged_unique_files_topk": 9,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 7,
"dense_unique_dirs_topk": 4
},
{
"query": "clustering strategy",
"staged": {
"strategy": "staged",
"query": "clustering strategy",
"latency_ms": 8401.927499979734,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 72.67880439758301,
"stage1_fallback_search_ms": 166.71442985534668,
"stage2_expand_ms": 5561.89489364624,
"stage3_cluster_ms": 0.0007152557373046875,
"stage4_rerank_ms": 2517.7178382873535
},
"stage_counts": {
"stage1_candidates": 10,
"stage1_fallback_used": 1,
"stage2_expanded": 10,
"stage2_unique_paths": 10,
"stage2_duplicate_paths": 0,
"stage3_clustered": 10,
"stage3_strategy": "dir_rr",
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "clustering strategy",
"latency_ms": 3192.0045999884605,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.04670528456571428,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 3,
"dense_unique_dirs_topk": 4
},
{
"query": "error handling",
"staged": {
"strategy": "staged",
"query": "error handling",
"latency_ms": 9032.269400000572,
"num_results": 6,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 78.59635353088379,
"stage1_fallback_search_ms": 180.96280097961426,
"stage2_expand_ms": 6175.840377807617,
"stage3_cluster_ms": 0.001430511474609375,
"stage4_rerank_ms": 2503.4260749816895
},
"stage_counts": {
"stage1_candidates": 5,
"stage1_fallback_used": 1,
"stage2_expanded": 13,
"stage2_unique_paths": 6,
"stage2_duplicate_paths": 7,
"stage3_clustered": 13,
"stage3_strategy": "dir_rr",
"stage4_reranked": 13
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "error handling",
"latency_ms": 3076.744800001383,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.07142857142857142,
"rbo_topk": 0.045191399425714276,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "how to parse json",
"staged": {
"strategy": "staged",
"query": "how to parse json",
"latency_ms": 9134.205499976873,
"num_results": 7,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 117.79379844665527,
"stage1_fallback_search_ms": 187.53886222839355,
"stage2_expand_ms": 6218.849658966064,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2515.6633853912354
},
"stage_counts": {
"stage1_candidates": 4,
"stage1_fallback_used": 1,
"stage2_expanded": 9,
"stage2_unique_paths": 7,
"stage2_duplicate_paths": 2,
"stage3_clustered": 9,
"stage3_strategy": "dir_rr",
"stage4_reranked": 9
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "how to parse json",
"latency_ms": 2965.6026000082493,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.21428571428571427,
"rbo_topk": 0.18590219827714285,
"staged_unique_files_topk": 7,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 5,
"dense_unique_dirs_topk": 4
}
]
}

View File

@@ -0,0 +1,465 @@
{
"summary": {
"timestamp": "2026-02-09 20:53:01",
"source": "src",
"k": 10,
"coarse_k": 100,
"query_count": 7,
"avg_jaccard_topk": 0.12384302205730777,
"avg_rbo_topk": 0.09816673566816325,
"staged": {
"success": 7,
"avg_latency_ms": 8696.564499999795
},
"dense_rerank": {
"success": 7,
"avg_latency_ms": 2936.2583857136115
}
},
"comparisons": [
{
"query": "class Config",
"staged": {
"strategy": "staged",
"query": "class Config",
"latency_ms": 6108.304299980402,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 90.47985076904297,
"stage1_fallback_search_ms": 224.38788414001465,
"stage2_expand_ms": 3031.7258834838867,
"stage3_cluster_ms": 0.02956390380859375,
"stage4_rerank_ms": 2655.31849861145
},
"stage_counts": {
"stage1_candidates": 37,
"stage1_fallback_used": 1,
"stage2_expanded": 86,
"stage2_unique_paths": 53,
"stage2_duplicate_paths": 33,
"stage3_clustered": 20,
"stage3_strategy": "score",
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "class Config",
"latency_ms": 2873.6466999948025,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.125,
"rbo_topk": 0.06741929885142856,
"staged_unique_files_topk": 8,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 5,
"dense_unique_dirs_topk": 4
},
{
"query": "def search",
"staged": {
"strategy": "staged",
"query": "def search",
"latency_ms": 9321.754200011492,
"num_results": 3,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 140.43283462524414,
"stage2_expand_ms": 6410.467863082886,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2675.7972240448
},
"stage_counts": {
"stage1_candidates": 3,
"stage2_expanded": 4,
"stage2_unique_paths": 3,
"stage2_duplicate_paths": 1,
"stage3_clustered": 4,
"stage3_strategy": "score",
"stage4_reranked": 4
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "def search",
"latency_ms": 3104.7773999869823,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.09090909090909091,
"rbo_topk": 0.23541639942571424,
"staged_unique_files_topk": 2,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 2,
"dense_unique_dirs_topk": 4
},
{
"query": "LspBridge",
"staged": {
"strategy": "staged",
"query": "LspBridge",
"latency_ms": 9527.073799997568,
"num_results": 6,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 98.59919548034668,
"stage1_fallback_search_ms": 172.26457595825195,
"stage2_expand_ms": 6125.282049179077,
"stage3_cluster_ms": 0.017404556274414062,
"stage4_rerank_ms": 3023.9248275756836
},
"stage_counts": {
"stage1_candidates": 5,
"stage1_fallback_used": 1,
"stage2_expanded": 31,
"stage2_unique_paths": 11,
"stage2_duplicate_paths": 20,
"stage3_clustered": 20,
"stage3_strategy": "score",
"stage4_reranked": 20
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "LspBridge",
"latency_ms": 2901.0302999913692,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.06666666666666667,
"rbo_topk": 0.045191399425714276,
"staged_unique_files_topk": 6,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 2,
"dense_unique_dirs_topk": 4
},
{
"query": "graph expansion",
"staged": {
"strategy": "staged",
"query": "graph expansion",
"latency_ms": 9120.886200010777,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 91.48454666137695,
"stage1_fallback_search_ms": 172.12390899658203,
"stage2_expand_ms": 6166.24903678894,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2601.947546005249
},
"stage_counts": {
"stage1_candidates": 11,
"stage1_fallback_used": 1,
"stage2_expanded": 16,
"stage2_unique_paths": 13,
"stage2_duplicate_paths": 3,
"stage3_clustered": 16,
"stage3_strategy": "score",
"stage4_reranked": 16
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "graph expansion",
"latency_ms": 2847.6964999735355,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1875,
"rbo_topk": 0.06134116970571428,
"staged_unique_files_topk": 9,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 7,
"dense_unique_dirs_topk": 4
},
{
"query": "clustering strategy",
"staged": {
"strategy": "staged",
"query": "clustering strategy",
"latency_ms": 8424.535699993372,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 92.8945541381836,
"stage1_fallback_search_ms": 192.06547737121582,
"stage2_expand_ms": 5568.126440048218,
"stage3_cluster_ms": 0.0011920928955078125,
"stage4_rerank_ms": 2480.673313140869
},
"stage_counts": {
"stage1_candidates": 10,
"stage1_fallback_used": 1,
"stage2_expanded": 10,
"stage2_unique_paths": 10,
"stage2_duplicate_paths": 0,
"stage3_clustered": 10,
"stage3_strategy": "score",
"stage4_reranked": 10
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "clustering strategy",
"latency_ms": 2974.9999000132084,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.1111111111111111,
"rbo_topk": 0.04670528456571428,
"staged_unique_files_topk": 10,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 3,
"dense_unique_dirs_topk": 4
},
{
"query": "error handling",
"staged": {
"strategy": "staged",
"query": "error handling",
"latency_ms": 9253.624700009823,
"num_results": 6,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 102.18691825866699,
"stage1_fallback_search_ms": 176.97691917419434,
"stage2_expand_ms": 6113.626480102539,
"stage3_cluster_ms": 0.00095367431640625,
"stage4_rerank_ms": 2774.4452953338623
},
"stage_counts": {
"stage1_candidates": 5,
"stage1_fallback_used": 1,
"stage2_expanded": 13,
"stage2_unique_paths": 6,
"stage2_duplicate_paths": 7,
"stage3_clustered": 13,
"stage3_strategy": "score",
"stage4_reranked": 13
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "error handling",
"latency_ms": 2860.619900047779,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.07142857142857142,
"rbo_topk": 0.045191399425714276,
"staged_unique_files_topk": 5,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 4,
"dense_unique_dirs_topk": 4
},
{
"query": "how to parse json",
"staged": {
"strategy": "staged",
"query": "how to parse json",
"latency_ms": 9119.772599995136,
"num_results": 7,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py"
],
"stage_stats": {
"stage_times": {
"stage1_binary_ms": 90.18850326538086,
"stage1_fallback_search_ms": 157.95397758483887,
"stage2_expand_ms": 6293.469429016113,
"stage3_cluster_ms": 0.0011920928955078125,
"stage4_rerank_ms": 2486.8383407592773
},
"stage_counts": {
"stage1_candidates": 4,
"stage1_fallback_used": 1,
"stage2_expanded": 9,
"stage2_unique_paths": 7,
"stage2_duplicate_paths": 2,
"stage3_clustered": 9,
"stage3_strategy": "score",
"stage4_reranked": 9
}
},
"error": null
},
"dense_rerank": {
"strategy": "dense_rerank",
"query": "how to parse json",
"latency_ms": 2991.0379999876022,
"num_results": 10,
"topk_paths": [
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
],
"stage_stats": null,
"error": null
},
"jaccard_topk": 0.21428571428571427,
"rbo_topk": 0.18590219827714285,
"staged_unique_files_topk": 7,
"dense_unique_files_topk": 10,
"staged_unique_dirs_topk": 5,
"dense_unique_dirs_topk": 4
}
]
}

View File

@@ -134,7 +134,10 @@ class LspGraphBuilder:
# Skip if already visited or at max depth
if node.id in visited:
return []
if depth > self.max_depth:
# Depth is 0 for seeds. To limit expansion to N hops from seeds,
# we expand nodes with depth < max_depth.
if depth >= self.max_depth:
visited.add(node.id)
return []
if len(graph.nodes) >= self.max_nodes:
return []

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock
import pytest
from codexlens.hybrid_search.data_structures import CodeAssociationGraph, CodeSymbolNode, Range
from codexlens.lsp.lsp_graph_builder import LspGraphBuilder
@pytest.mark.asyncio
async def test_lsp_graph_builder_does_not_expand_at_max_depth() -> None:
"""Depth semantics: max_depth is the number of hops from seeds."""
builder = LspGraphBuilder(max_depth=1, max_nodes=10, max_concurrent=1, resolve_symbols=False)
bridge = AsyncMock()
bridge.get_references.side_effect = RuntimeError("should not call references")
bridge.get_call_hierarchy.side_effect = RuntimeError("should not call call hierarchy")
node = CodeSymbolNode(
id="x.py:foo:1",
name="foo",
kind="function",
file_path="x.py",
range=Range(start_line=1, start_character=1, end_line=1, end_character=1),
)
graph = CodeAssociationGraph()
visited: set[str] = set()
sem = asyncio.Semaphore(1)
# Seeds are depth=0. A node at depth==max_depth should not be expanded.
new_nodes = await builder._expand_node(node, 1, graph, bridge, visited, sem) # type: ignore[attr-defined]
assert new_nodes == []
assert node.id in visited

View File

@@ -0,0 +1,49 @@
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import MagicMock
from codexlens.config import Config
from codexlens.entities import SearchResult
from codexlens.search.chain_search import ChainSearchEngine, ChainSearchResult, SearchOptions
def _extract_stage_stats(result: ChainSearchResult) -> dict:
for item in result.stats.errors or []:
if isinstance(item, str) and item.startswith("STAGE_STATS:"):
return json.loads(item[len("STAGE_STATS:") :])
raise AssertionError("missing STAGE_STATS payload")
def test_staged_pipeline_seeds_from_fts_when_stage1_empty(monkeypatch) -> None:
cfg = Config.load()
cfg.enable_staged_rerank = False
cfg.staged_stage2_mode = "realtime" # ensure we pass through stage2 wrapper
cfg.staged_clustering_strategy = "score"
engine = ChainSearchEngine(registry=MagicMock(), mapper=MagicMock(), config=cfg)
# Avoid touching registry/mapper/index stores.
monkeypatch.setattr(engine, "_find_start_index", lambda *_a, **_k: Path("X:/fake/_index.db"))
monkeypatch.setattr(engine, "_collect_index_paths", lambda *_a, **_k: [Path("X:/fake/_index.db")])
# Force Stage 1 to return empty so the FTS seeding path is exercised.
monkeypatch.setattr(engine, "_stage1_binary_search", lambda *_a, **_k: ([], Path("X:/fake")))
seed_results = [SearchResult(path="D:/p/a.py", score=1.0), SearchResult(path="D:/p/b.py", score=0.9)]
# Provide a stable SearchStats instance for the fallback search call.
from codexlens.search.chain_search import SearchStats
monkeypatch.setattr(engine, "search", lambda *_a, **_k: ChainSearchResult(query="q", results=seed_results, symbols=[], stats=SearchStats()))
# Make later stages no-ops so we only validate plumbing.
monkeypatch.setattr(engine, "_stage2_lsp_expand", lambda results, *_a, **_k: results)
monkeypatch.setattr(engine, "_stage3_cluster_prune", lambda results, *_a, **_k: results)
result = engine.staged_cascade_search("q", Path("."), k=2, coarse_k=5, options=SearchOptions())
stage_stats = _extract_stage_stats(result)
assert stage_stats["stage_counts"].get("stage1_fallback_used") == 1
assert result.results and [r.path for r in result.results] == ["D:/p/a.py", "D:/p/b.py"]