mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
Add benchmark results for fast3 and fast4, implement KeepAliveLspBridge, and add tests for staged strategies
- Added new benchmark result files: compare_2026-02-09_score_fast3.json and compare_2026-02-09_score_fast4.json. - Implemented KeepAliveLspBridge to maintain a persistent LSP connection across multiple queries, improving performance. - Created unit tests for staged clustering strategies in test_staged_stage3_fast_strategies.py, ensuring correct behavior of score and dir_rr strategies.
This commit is contained in:
@@ -249,6 +249,12 @@ def main() -> None:
|
||||
parser.add_argument("--k", type=int, default=10, help="Final result count (default 10)")
|
||||
parser.add_argument("--coarse-k", type=int, default=100, help="Coarse candidates (default 100)")
|
||||
parser.add_argument("--warmup", type=int, default=1, help="Warmup runs per strategy (default 1)")
|
||||
parser.add_argument(
|
||||
"--staged-cluster-strategy",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Override Config.staged_clustering_strategy for staged pipeline (e.g. auto, dir_rr, score, path)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=Path,
|
||||
@@ -271,6 +277,8 @@ def main() -> None:
|
||||
config.cascade_strategy = "staged"
|
||||
config.staged_stage2_mode = "realtime"
|
||||
config.enable_staged_rerank = True
|
||||
if args.staged_cluster_strategy:
|
||||
config.staged_clustering_strategy = str(args.staged_cluster_strategy)
|
||||
# Stability: on some Windows setups, fastembed + DirectML can crash under load.
|
||||
# Dense_rerank uses the embedding backend that matches the index; force CPU here.
|
||||
config.embedding_use_gpu = False
|
||||
|
||||
@@ -0,0 +1,356 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 20:37:28",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 7,
|
||||
"avg_jaccard_topk": 0.12095811211246858,
|
||||
"avg_rbo_topk": 0.09594444061244897,
|
||||
"staged": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 2471.239057132176
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 3087.217985710927
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 312.2674999535084,
|
||||
"num_results": 37,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 2672.6916999816895,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.05263157894736842,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 15344.861499994993,
|
||||
"num_results": 3,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 81.70747756958008,
|
||||
"stage2_expand_ms": 12762.907266616821,
|
||||
"stage3_cluster_ms": 0.0021457672119140625,
|
||||
"stage4_rerank_ms": 2422.7287769317627
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 3,
|
||||
"stage2_expanded": 4,
|
||||
"stage2_unique_paths": 3,
|
||||
"stage2_duplicate_paths": 1,
|
||||
"stage3_clustered": 4,
|
||||
"stage3_strategy": "dir_rr",
|
||||
"stage4_reranked": 4
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 2908.5530000030994,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.09090909090909091,
|
||||
"rbo_topk": 0.23541639942571424,
|
||||
"staged_unique_files_topk": 2,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 328.4989999830723,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 3426.8526000082493,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "graph expansion",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 359.32230001688004,
|
||||
"num_results": 11,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 3472.025099992752,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.17647058823529413,
|
||||
"rbo_topk": 0.06801300374142856,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 7,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "clustering strategy",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 289.3139999806881,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 2859.5299999713898,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.04670528456571428,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "error handling",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "error handling",
|
||||
"latency_ms": 305.66699999570847,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "error handling",
|
||||
"latency_ms": 3101.3711999952793,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "how to parse json",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 358.74210000038147,
|
||||
"num_results": 4,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 3169.5023000240326,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.2727272727272727,
|
||||
"rbo_topk": 0.18590219827714285,
|
||||
"staged_unique_files_topk": 4,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
171
codex-lens/benchmarks/results/compare_2026-02-09_keepalive3.json
Normal file
171
codex-lens/benchmarks/results/compare_2026-02-09_keepalive3.json
Normal file
@@ -0,0 +1,171 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 19:16:45",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 3,
|
||||
"avg_jaccard_topk": 0.07165641376167692,
|
||||
"avg_rbo_topk": 0.10859973275904759,
|
||||
"staged": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 7919.317766676347
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 2812.574933330218
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 6351.961700022221,
|
||||
"num_results": 37,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 4424.698300004005,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.05263157894736842,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 17239.81479999423,
|
||||
"num_results": 3,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 18.40996742248535,
|
||||
"stage2_expand_ms": 16024.681329727173,
|
||||
"stage3_cluster_ms": 0.00095367431640625,
|
||||
"stage4_rerank_ms": 1160.1319313049316
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 3,
|
||||
"stage2_expanded": 4,
|
||||
"stage2_unique_paths": 3,
|
||||
"stage2_duplicate_paths": 1,
|
||||
"stage3_clustered": 4,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 4
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 2086.8772999942303,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.09090909090909091,
|
||||
"rbo_topk": 0.23541639942571424,
|
||||
"staged_unique_files_topk": 2,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 166.1768000125885,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 1926.1491999924183,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 19:19:13",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 3,
|
||||
"avg_jaccard_topk": 0.07165641376167692,
|
||||
"avg_rbo_topk": 0.10859973275904759,
|
||||
"staged": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 8272.264699995518
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 2753.5123999913535
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 6453.665100008249,
|
||||
"num_results": 37,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 4530.146999955177,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.05263157894736842,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 18202.905599981546,
|
||||
"num_results": 3,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 15.580177307128906,
|
||||
"stage2_expand_ms": 16622.225522994995,
|
||||
"stage3_cluster_ms": 0.00095367431640625,
|
||||
"stage4_rerank_ms": 1516.9692039489746
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 3,
|
||||
"stage2_expanded": 4,
|
||||
"stage2_unique_paths": 3,
|
||||
"stage2_duplicate_paths": 1,
|
||||
"stage3_clustered": 4,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 4
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 1746.9925000071526,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.09090909090909091,
|
||||
"rbo_topk": 0.23541639942571424,
|
||||
"staged_unique_files_topk": 2,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 160.2233999967575,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 1983.3977000117302,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 17:27:26",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 3,
|
||||
"avg_jaccard_topk": 0.5809523809523809,
|
||||
"avg_rbo_topk": 0.31359567182809517,
|
||||
"staged": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 22826.711433331173
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 3,
|
||||
"avg_latency_ms": 2239.804533312718
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 26690.878500014544,
|
||||
"num_results": 6,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 8534.121036529541,
|
||||
"stage2_expand_ms": 13298.827648162842,
|
||||
"stage3_cluster_ms": 0.026226043701171875,
|
||||
"stage4_rerank_ms": 4805.774688720703
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 149,
|
||||
"stage2_unique_paths": 43,
|
||||
"stage2_duplicate_paths": 106,
|
||||
"stage3_clustered": 20,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 2416.653799980879,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.14285714285714285,
|
||||
"rbo_topk": 0.25764429885142853,
|
||||
"staged_unique_files_topk": 6,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 26188.838399976492,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 525.7587432861328,
|
||||
"stage2_expand_ms": 23659.400939941406,
|
||||
"stage3_cluster_ms": 0.021696090698242188,
|
||||
"stage4_rerank_ms": 1928.950309753418
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 101,
|
||||
"stage2_unique_paths": 23,
|
||||
"stage2_duplicate_paths": 78,
|
||||
"stage3_clustered": 20,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 1953.0992999970913,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.9,
|
||||
"rbo_topk": 0.39374892065285705,
|
||||
"staged_unique_files_topk": 9,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 15600.41740000248,
|
||||
"num_results": 7,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 475.54636001586914,
|
||||
"stage2_expand_ms": 13318.811893463135,
|
||||
"stage3_cluster_ms": 0.03218650817871094,
|
||||
"stage4_rerank_ms": 1755.7547092437744
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 100,
|
||||
"stage2_expanded": 100,
|
||||
"stage2_unique_paths": 21,
|
||||
"stage2_duplicate_paths": 79,
|
||||
"stage3_clustered": 20,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 2349.660499960184,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.7,
|
||||
"rbo_topk": 0.28939379598,
|
||||
"staged_unique_files_topk": 7,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,356 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 20:36:02",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 7,
|
||||
"avg_jaccard_topk": 0.12095811211246858,
|
||||
"avg_rbo_topk": 0.09594444061244897,
|
||||
"staged": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 2436.7641000066483
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 2593.7630428629263
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 285.091000020504,
|
||||
"num_results": 37,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 2412.1290000081062,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.05263157894736842,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 15029.73520001769,
|
||||
"num_results": 3,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 101.95636749267578,
|
||||
"stage2_expand_ms": 12690.008640289307,
|
||||
"stage3_cluster_ms": 0.001430511474609375,
|
||||
"stage4_rerank_ms": 2155.757427215576
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 3,
|
||||
"stage2_expanded": 4,
|
||||
"stage2_unique_paths": 3,
|
||||
"stage2_duplicate_paths": 1,
|
||||
"stage3_clustered": 4,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 4
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 2424.7003000080585,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.09090909090909091,
|
||||
"rbo_topk": 0.23541639942571424,
|
||||
"staged_unique_files_topk": 2,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 324.4240999817848,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 2497.174100011587,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "graph expansion",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 359.32159999012947,
|
||||
"num_results": 11,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 2553.8585999906063,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.17647058823529413,
|
||||
"rbo_topk": 0.06801300374142856,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 7,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "clustering strategy",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 286.38240000605583,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 2570.379099994898,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.04670528456571428,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "error handling",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "error handling",
|
||||
"latency_ms": 412.58780002593994,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "error handling",
|
||||
"latency_ms": 2894.3279000222683,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "how to parse json",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 359.8066000044346,
|
||||
"num_results": 4,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 2803.772300004959,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.2727272727272727,
|
||||
"rbo_topk": 0.18590219827714285,
|
||||
"staged_unique_files_topk": 4,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,462 @@
|
||||
{
|
||||
"summary": {
|
||||
"timestamp": "2026-02-09 20:45:10",
|
||||
"source": "src",
|
||||
"k": 10,
|
||||
"coarse_k": 100,
|
||||
"query_count": 7,
|
||||
"avg_jaccard_topk": 0.1283498247783962,
|
||||
"avg_rbo_topk": 0.09664773770897958,
|
||||
"staged": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 16394.152085712976
|
||||
},
|
||||
"dense_rerank": {
|
||||
"success": 7,
|
||||
"avg_latency_ms": 2839.464457145759
|
||||
}
|
||||
},
|
||||
"comparisons": [
|
||||
{
|
||||
"query": "class Config",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "class Config",
|
||||
"latency_ms": 6233.342700004578,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 125.80323219299316,
|
||||
"stage1_fallback_search_ms": 277.1914005279541,
|
||||
"stage2_expand_ms": 3032.3121547698975,
|
||||
"stage3_cluster_ms": 0.02765655517578125,
|
||||
"stage4_rerank_ms": 2699.3532180786133
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 37,
|
||||
"stage1_fallback_used": 1,
|
||||
"stage2_expanded": 86,
|
||||
"stage2_unique_paths": 53,
|
||||
"stage2_duplicate_paths": 33,
|
||||
"stage3_clustered": 20,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "class Config",
|
||||
"latency_ms": 3036.3474999964237,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.125,
|
||||
"rbo_topk": 0.06741929885142856,
|
||||
"staged_unique_files_topk": 8,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "def search",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "def search",
|
||||
"latency_ms": 12703.503900021315,
|
||||
"num_results": 3,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 83.4202766418457,
|
||||
"stage2_expand_ms": 9856.60433769226,
|
||||
"stage3_cluster_ms": 0.0011920928955078125,
|
||||
"stage4_rerank_ms": 2664.630174636841
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 3,
|
||||
"stage2_expanded": 4,
|
||||
"stage2_unique_paths": 3,
|
||||
"stage2_duplicate_paths": 1,
|
||||
"stage3_clustered": 4,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 4
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "def search",
|
||||
"latency_ms": 2888.501700013876,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.09090909090909091,
|
||||
"rbo_topk": 0.23541639942571424,
|
||||
"staged_unique_files_topk": 2,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "LspBridge",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 33684.76710000634,
|
||||
"num_results": 5,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 78.8118839263916,
|
||||
"stage1_fallback_search_ms": 174.6652126312256,
|
||||
"stage2_expand_ms": 31018.909692764282,
|
||||
"stage3_cluster_ms": 0.0016689300537109375,
|
||||
"stage4_rerank_ms": 2316.9021606445312
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 5,
|
||||
"stage1_fallback_used": 1,
|
||||
"stage2_expanded": 5,
|
||||
"stage2_unique_paths": 5,
|
||||
"stage2_duplicate_paths": 0,
|
||||
"stage3_clustered": 5,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 5
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "LspBridge",
|
||||
"latency_ms": 2824.729699999094,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 2,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "graph expansion",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 16910.090099990368,
|
||||
"num_results": 8,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 99.6243953704834,
|
||||
"stage1_fallback_search_ms": 207.89742469787598,
|
||||
"stage2_expand_ms": 13929.257154464722,
|
||||
"stage3_cluster_ms": 0.016927719116210938,
|
||||
"stage4_rerank_ms": 2586.843729019165
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 11,
|
||||
"stage1_fallback_used": 1,
|
||||
"stage2_expanded": 29,
|
||||
"stage2_unique_paths": 14,
|
||||
"stage2_duplicate_paths": 15,
|
||||
"stage3_clustered": 20,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 20
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "graph expansion",
|
||||
"latency_ms": 2765.958099991083,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.21428571428571427,
|
||||
"rbo_topk": 0.06893318399142857,
|
||||
"staged_unique_files_topk": 7,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 6,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "clustering strategy",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 8380.20839998126,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 95.42632102966309,
|
||||
"stage1_fallback_search_ms": 187.4692440032959,
|
||||
"stage2_expand_ms": 5561.658143997192,
|
||||
"stage3_cluster_ms": 0.0007152557373046875,
|
||||
"stage4_rerank_ms": 2441.287040710449
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 10,
|
||||
"stage1_fallback_used": 1,
|
||||
"stage2_expanded": 10,
|
||||
"stage2_unique_paths": 10,
|
||||
"stage2_duplicate_paths": 0,
|
||||
"stage3_clustered": 10,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 10
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "clustering strategy",
|
||||
"latency_ms": 2788.0665000081062,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.1111111111111111,
|
||||
"rbo_topk": 0.04670528456571428,
|
||||
"staged_unique_files_topk": 10,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 3,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "error handling",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "error handling",
|
||||
"latency_ms": 19897.71709999442,
|
||||
"num_results": 6,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 114.1653060913086,
|
||||
"stage1_fallback_search_ms": 235.73827743530273,
|
||||
"stage2_expand_ms": 16702.077865600586,
|
||||
"stage3_cluster_ms": 0.00095367431640625,
|
||||
"stage4_rerank_ms": 2757.4093341827393
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 5,
|
||||
"stage1_fallback_used": 1,
|
||||
"stage2_expanded": 13,
|
||||
"stage2_unique_paths": 6,
|
||||
"stage2_duplicate_paths": 7,
|
||||
"stage3_clustered": 13,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 13
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "error handling",
|
||||
"latency_ms": 2874.178600013256,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.07142857142857142,
|
||||
"rbo_topk": 0.045191399425714276,
|
||||
"staged_unique_files_topk": 5,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 4,
|
||||
"dense_unique_dirs_topk": 4
|
||||
},
|
||||
{
|
||||
"query": "how to parse json",
|
||||
"staged": {
|
||||
"strategy": "staged",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 16949.43529999256,
|
||||
"num_results": 7,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py"
|
||||
],
|
||||
"stage_stats": {
|
||||
"stage_times": {
|
||||
"stage1_binary_ms": 104.50935363769531,
|
||||
"stage1_fallback_search_ms": 190.6723976135254,
|
||||
"stage2_expand_ms": 14165.841102600098,
|
||||
"stage3_cluster_ms": 0.0011920928955078125,
|
||||
"stage4_rerank_ms": 2399.226188659668
|
||||
},
|
||||
"stage_counts": {
|
||||
"stage1_candidates": 4,
|
||||
"stage1_fallback_used": 1,
|
||||
"stage2_expanded": 11,
|
||||
"stage2_unique_paths": 7,
|
||||
"stage2_duplicate_paths": 4,
|
||||
"stage3_clustered": 11,
|
||||
"stage3_strategy": "score",
|
||||
"stage4_reranked": 11
|
||||
}
|
||||
},
|
||||
"error": null
|
||||
},
|
||||
"dense_rerank": {
|
||||
"strategy": "dense_rerank",
|
||||
"query": "how to parse json",
|
||||
"latency_ms": 2698.469099998474,
|
||||
"num_results": 10,
|
||||
"topk_paths": [
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py",
|
||||
"d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py"
|
||||
],
|
||||
"stage_stats": null,
|
||||
"error": null
|
||||
},
|
||||
"jaccard_topk": 0.21428571428571427,
|
||||
"rbo_topk": 0.16767719827714284,
|
||||
"staged_unique_files_topk": 7,
|
||||
"dense_unique_files_topk": 10,
|
||||
"staged_unique_dirs_topk": 5,
|
||||
"dense_unique_dirs_topk": 4
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -3486,6 +3486,81 @@ def index_binary(
|
||||
console.print(f" [dim]... and {len(errors_list) - 3} more[/dim]")
|
||||
|
||||
|
||||
@index_app.command("binary-mmap")
|
||||
def index_binary_mmap(
|
||||
path: Annotated[Path, typer.Argument(help="Project directory (indexed) or _index.db file")],
|
||||
force: Annotated[bool, typer.Option("--force", "-f", help="Force rebuild binary mmap + metadata")] = False,
|
||||
embedding_dim: Annotated[Optional[int], typer.Option("--embedding-dim", help="Only use embeddings with this dimension (e.g. 768)")] = None,
|
||||
json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False,
|
||||
verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False,
|
||||
) -> None:
|
||||
"""Build centralized `_binary_vectors.mmap` from existing embeddings (no model calls).
|
||||
|
||||
This command enables the staged binary coarse search without regenerating
|
||||
embeddings and without triggering global model locks. It:
|
||||
- scans distributed semantic_chunks.embedding blobs under the index root
|
||||
- assigns global chunk_ids
|
||||
- writes `<index_root>/_binary_vectors.mmap` (+ `.meta.json`)
|
||||
- writes `<index_root>/_vectors_meta.db` (chunk_metadata + binary_vectors)
|
||||
"""
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
from codexlens.cli.embedding_manager import build_centralized_binary_vectors_from_existing
|
||||
|
||||
target_path = path.expanduser().resolve()
|
||||
|
||||
# Resolve index_root similar to other index commands.
|
||||
if target_path.is_file() and target_path.name == "_index.db":
|
||||
index_root = target_path.parent
|
||||
else:
|
||||
registry = RegistryStore()
|
||||
try:
|
||||
registry.initialize()
|
||||
mapper = PathMapper()
|
||||
index_db = mapper.source_to_index_db(target_path)
|
||||
if not index_db.exists():
|
||||
msg = f"No index found for {target_path}"
|
||||
if json_mode:
|
||||
print_json(success=False, error=msg)
|
||||
else:
|
||||
console.print(f"[red]Error:[/red] {msg}")
|
||||
console.print("Run `codexlens index init` first to create an index.")
|
||||
raise typer.Exit(code=1)
|
||||
index_root = index_db.parent
|
||||
finally:
|
||||
registry.close()
|
||||
|
||||
def progress_update(message: str) -> None:
|
||||
if json_mode:
|
||||
return
|
||||
console.print(f"[dim]{message}[/dim]")
|
||||
|
||||
result = build_centralized_binary_vectors_from_existing(
|
||||
index_root,
|
||||
force=force,
|
||||
embedding_dim=embedding_dim,
|
||||
progress_callback=progress_update,
|
||||
)
|
||||
|
||||
if json_mode:
|
||||
print_json(**result)
|
||||
return
|
||||
|
||||
if not result.get("success"):
|
||||
console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}")
|
||||
hint = result.get("hint")
|
||||
if hint:
|
||||
console.print(f"[dim]{hint}[/dim]")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
data = result.get("result", {})
|
||||
console.print("\n[green]Binary mmap build complete[/green]")
|
||||
console.print(f" Index root: {data.get('index_root')}")
|
||||
console.print(f" Chunks written: {data.get('chunks_written'):,}")
|
||||
console.print(f" Binary mmap: {data.get('binary_mmap')}")
|
||||
console.print(f" Meta DB: {data.get('vectors_meta_db')}")
|
||||
|
||||
|
||||
# ==================== Index Status Command ====================
|
||||
|
||||
@index_app.command("status")
|
||||
|
||||
@@ -860,6 +860,294 @@ def _discover_index_dbs_internal(index_root: Path) -> List[Path]:
|
||||
return sorted(index_root.rglob("_index.db"))
|
||||
|
||||
|
||||
def build_centralized_binary_vectors_from_existing(
|
||||
index_root: Path,
|
||||
*,
|
||||
force: bool = False,
|
||||
embedding_dim: Optional[int] = None,
|
||||
progress_callback: Optional[callable] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build centralized binary vectors + metadata from existing semantic_chunks embeddings.
|
||||
|
||||
This is a fast-path for enabling the staged binary coarse search without
|
||||
regenerating embeddings (and without triggering global model locks).
|
||||
|
||||
It scans all distributed `_index.db` files under `index_root`, reads
|
||||
existing `semantic_chunks.embedding` blobs, assigns new global chunk_ids,
|
||||
and writes:
|
||||
- `<index_root>/_binary_vectors.mmap` (+ `.meta.json`)
|
||||
- `<index_root>/_vectors_meta.db` (chunk_metadata + binary_vectors)
|
||||
"""
|
||||
from codexlens.config import BINARY_VECTORS_MMAP_NAME, VECTORS_META_DB_NAME
|
||||
from codexlens.storage.vector_meta_store import VectorMetadataStore
|
||||
|
||||
index_root = Path(index_root).resolve()
|
||||
vectors_meta_path = index_root / VECTORS_META_DB_NAME
|
||||
mmap_path = index_root / BINARY_VECTORS_MMAP_NAME
|
||||
meta_path = mmap_path.with_suffix(".meta.json")
|
||||
|
||||
index_files = _discover_index_dbs_internal(index_root)
|
||||
if not index_files:
|
||||
return {"success": False, "error": f"No _index.db files found under {index_root}"}
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"Scanning {len(index_files)} index databases for existing embeddings...")
|
||||
|
||||
# First pass: detect embedding dims present.
|
||||
dims_seen: Dict[int, int] = {}
|
||||
selected_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
for index_path in index_files:
|
||||
try:
|
||||
with sqlite3.connect(index_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
has_table = conn.execute(
|
||||
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
|
||||
).fetchone()
|
||||
if not has_table:
|
||||
continue
|
||||
|
||||
dim_row = conn.execute(
|
||||
"SELECT backend, model_profile, model_name, embedding_dim FROM embeddings_config WHERE id=1"
|
||||
).fetchone()
|
||||
if dim_row and dim_row[3]:
|
||||
dim_val = int(dim_row[3])
|
||||
dims_seen[dim_val] = dims_seen.get(dim_val, 0) + 1
|
||||
if selected_config is None:
|
||||
selected_config = {
|
||||
"backend": dim_row[0],
|
||||
"model_profile": dim_row[1],
|
||||
"model_name": dim_row[2],
|
||||
"embedding_dim": dim_val,
|
||||
}
|
||||
|
||||
# We count per-dim later after selecting a target dim.
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not dims_seen:
|
||||
return {"success": False, "error": "No embeddings_config found under index_root"}
|
||||
|
||||
if embedding_dim is None:
|
||||
# Default: pick the most common embedding dim across indexes.
|
||||
embedding_dim = max(dims_seen.items(), key=lambda kv: kv[1])[0]
|
||||
|
||||
embedding_dim = int(embedding_dim)
|
||||
|
||||
if progress_callback and len(dims_seen) > 1:
|
||||
progress_callback(f"Mixed embedding dims detected, selecting dim={embedding_dim} (seen={dims_seen})")
|
||||
|
||||
# Re-detect the selected model config for this dim (do not reuse an arbitrary first-seen config).
|
||||
selected_config = None
|
||||
|
||||
# Second pass: count only chunks matching selected dim.
|
||||
total_chunks = 0
|
||||
for index_path in index_files:
|
||||
try:
|
||||
with sqlite3.connect(index_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
has_table = conn.execute(
|
||||
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
|
||||
).fetchone()
|
||||
if not has_table:
|
||||
continue
|
||||
|
||||
dim_row = conn.execute(
|
||||
"SELECT backend, model_profile, model_name, embedding_dim FROM embeddings_config WHERE id=1"
|
||||
).fetchone()
|
||||
dim_val = int(dim_row[3]) if dim_row and dim_row[3] else None
|
||||
if dim_val != embedding_dim:
|
||||
continue
|
||||
|
||||
if selected_config is None:
|
||||
selected_config = {
|
||||
"backend": dim_row[0],
|
||||
"model_profile": dim_row[1],
|
||||
"model_name": dim_row[2],
|
||||
"embedding_dim": dim_val,
|
||||
}
|
||||
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) FROM semantic_chunks WHERE embedding IS NOT NULL AND length(embedding) > 0"
|
||||
).fetchone()
|
||||
total_chunks += int(row[0] if row else 0)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not total_chunks:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"No existing embeddings found for embedding_dim={embedding_dim}",
|
||||
"dims_seen": dims_seen,
|
||||
}
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"Found {total_chunks} embedded chunks (dim={embedding_dim}). Building binary vectors...")
|
||||
|
||||
# Prepare output files / DB.
|
||||
try:
|
||||
import numpy as np
|
||||
except Exception as exc:
|
||||
return {"success": False, "error": f"numpy required to build binary vectors: {exc}"}
|
||||
|
||||
store = VectorMetadataStore(vectors_meta_path)
|
||||
store._ensure_schema()
|
||||
|
||||
if force:
|
||||
try:
|
||||
store.clear()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
store.clear_binary_vectors()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if mmap_path.exists():
|
||||
mmap_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
bytes_per_vec = (int(embedding_dim) + 7) // 8
|
||||
mmap = np.memmap(
|
||||
str(mmap_path),
|
||||
dtype=np.uint8,
|
||||
mode="w+",
|
||||
shape=(int(total_chunks), int(bytes_per_vec)),
|
||||
)
|
||||
|
||||
chunk_ids: List[int] = []
|
||||
chunks_batch: List[Dict[str, Any]] = []
|
||||
bin_ids_batch: List[int] = []
|
||||
bin_vecs_batch: List[bytes] = []
|
||||
batch_limit = 500
|
||||
|
||||
global_id = 1
|
||||
write_idx = 0
|
||||
|
||||
skipped_indexes: Dict[str, int] = {}
|
||||
for index_path in index_files:
|
||||
try:
|
||||
with sqlite3.connect(index_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
has_table = conn.execute(
|
||||
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
|
||||
).fetchone()
|
||||
if not has_table:
|
||||
continue
|
||||
|
||||
dim_row = conn.execute(
|
||||
"SELECT embedding_dim FROM embeddings_config WHERE id=1"
|
||||
).fetchone()
|
||||
dim_val = int(dim_row[0]) if dim_row and dim_row[0] else None
|
||||
if dim_val != embedding_dim:
|
||||
skipped_indexes[str(index_path)] = dim_val or -1
|
||||
continue
|
||||
|
||||
rows = conn.execute(
|
||||
"SELECT file_path, content, embedding, metadata, category FROM semantic_chunks "
|
||||
"WHERE embedding IS NOT NULL AND length(embedding) > 0"
|
||||
).fetchall()
|
||||
|
||||
for row in rows:
|
||||
emb = np.frombuffer(row["embedding"], dtype=np.float32)
|
||||
if emb.size != int(embedding_dim):
|
||||
continue
|
||||
|
||||
packed = np.packbits((emb > 0).astype(np.uint8))
|
||||
if packed.size != bytes_per_vec:
|
||||
continue
|
||||
|
||||
mmap[write_idx] = packed
|
||||
write_idx += 1
|
||||
|
||||
cid = global_id
|
||||
global_id += 1
|
||||
chunk_ids.append(cid)
|
||||
|
||||
meta_raw = row["metadata"]
|
||||
meta_dict: Dict[str, Any] = {}
|
||||
if meta_raw:
|
||||
try:
|
||||
meta_dict = json.loads(meta_raw) if isinstance(meta_raw, str) else dict(meta_raw)
|
||||
except Exception:
|
||||
meta_dict = {}
|
||||
|
||||
chunks_batch.append(
|
||||
{
|
||||
"chunk_id": cid,
|
||||
"file_path": row["file_path"],
|
||||
"content": row["content"],
|
||||
"start_line": meta_dict.get("start_line"),
|
||||
"end_line": meta_dict.get("end_line"),
|
||||
"category": row["category"],
|
||||
"metadata": meta_dict,
|
||||
"source_index_db": str(index_path),
|
||||
}
|
||||
)
|
||||
|
||||
bin_ids_batch.append(cid)
|
||||
bin_vecs_batch.append(packed.tobytes())
|
||||
|
||||
if len(chunks_batch) >= batch_limit:
|
||||
store.add_chunks(chunks_batch)
|
||||
store.add_binary_vectors(bin_ids_batch, bin_vecs_batch)
|
||||
chunks_batch = []
|
||||
bin_ids_batch = []
|
||||
bin_vecs_batch = []
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if chunks_batch:
|
||||
store.add_chunks(chunks_batch)
|
||||
store.add_binary_vectors(bin_ids_batch, bin_vecs_batch)
|
||||
|
||||
mmap.flush()
|
||||
del mmap
|
||||
|
||||
# If we skipped inconsistent vectors, truncate metadata to actual write count.
|
||||
chunk_ids = chunk_ids[:write_idx]
|
||||
|
||||
# Write sidecar metadata.
|
||||
with open(meta_path, "w", encoding="utf-8") as f:
|
||||
json.dump(
|
||||
{
|
||||
"shape": [int(write_idx), int(bytes_per_vec)],
|
||||
"chunk_ids": chunk_ids,
|
||||
"embedding_dim": int(embedding_dim),
|
||||
"backend": (selected_config or {}).get("backend"),
|
||||
"model_profile": (selected_config or {}).get("model_profile"),
|
||||
"model_name": (selected_config or {}).get("model_name"),
|
||||
},
|
||||
f,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"Binary vectors ready: {mmap_path} (rows={write_idx})")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"result": {
|
||||
"index_root": str(index_root),
|
||||
"index_files_scanned": len(index_files),
|
||||
"chunks_total": int(total_chunks),
|
||||
"chunks_written": int(write_idx),
|
||||
"embedding_dim": int(embedding_dim),
|
||||
"bytes_per_vector": int(bytes_per_vec),
|
||||
"skipped_indexes": len(skipped_indexes),
|
||||
"vectors_meta_db": str(vectors_meta_path),
|
||||
"binary_mmap": str(mmap_path),
|
||||
"binary_meta_json": str(meta_path),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def discover_all_index_dbs(index_root: Path) -> List[Path]:
|
||||
"""Recursively find all _index.db files in an index tree.
|
||||
|
||||
@@ -1804,4 +2092,4 @@ def check_global_model_lock(
|
||||
"has_conflict": has_conflict,
|
||||
"locked_config": locked_config,
|
||||
"target_config": {"backend": target_backend, "model": target_model},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ class Config:
|
||||
staged_realtime_lsp_max_concurrent: int = 2 # Max concurrent LSP requests during graph expansion
|
||||
staged_realtime_lsp_warmup_s: float = 3.0 # Wait for server analysis after opening seed docs
|
||||
staged_realtime_lsp_resolve_symbols: bool = False # If True, resolves symbol names via documentSymbol (slower)
|
||||
staged_clustering_strategy: str = "auto" # "auto", "hdbscan", "dbscan", "frequency", "noop"
|
||||
staged_clustering_strategy: str = "auto" # "auto", "hdbscan", "dbscan", "frequency", "noop", "score", "dir_rr", "path"
|
||||
staged_clustering_min_size: int = 3 # Minimum cluster size for Stage 3 grouping
|
||||
enable_staged_rerank: bool = True # Enable optional cross-encoder reranking in Stage 4
|
||||
|
||||
|
||||
135
codex-lens/src/codexlens/lsp/keepalive_bridge.py
Normal file
135
codex-lens/src/codexlens/lsp/keepalive_bridge.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""Keep-alive wrapper for Standalone LSP servers in synchronous workflows.
|
||||
|
||||
The staged realtime pipeline calls into LSP from synchronous code paths.
|
||||
Creating a fresh asyncio loop per query (via asyncio.run) forces language
|
||||
servers to start/stop every time, which is slow and can trigger shutdown
|
||||
timeouts on Windows.
|
||||
|
||||
This module runs an asyncio event loop in a background thread and keeps a
|
||||
single LspBridge (and its StandaloneLspManager + subprocesses) alive across
|
||||
multiple queries. Callers submit coroutines that operate on the shared bridge.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import asyncio
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from typing import Awaitable, Callable, Optional, TypeVar
|
||||
|
||||
from codexlens.lsp.lsp_bridge import LspBridge
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class KeepAliveKey:
|
||||
workspace_root: str
|
||||
config_file: Optional[str]
|
||||
timeout: float
|
||||
|
||||
|
||||
class KeepAliveLspBridge:
|
||||
"""Runs a shared LspBridge on a dedicated event loop thread."""
|
||||
|
||||
def __init__(self, *, workspace_root: str, config_file: Optional[str], timeout: float) -> None:
|
||||
self._key = KeepAliveKey(workspace_root=workspace_root, config_file=config_file, timeout=float(timeout))
|
||||
self._lock = threading.RLock()
|
||||
self._call_lock = threading.RLock()
|
||||
self._ready = threading.Event()
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._bridge: Optional[LspBridge] = None
|
||||
self._stopped = False
|
||||
|
||||
atexit.register(self.stop)
|
||||
|
||||
@property
|
||||
def key(self) -> KeepAliveKey:
|
||||
return self._key
|
||||
|
||||
def start(self) -> None:
|
||||
with self._lock:
|
||||
if self._stopped:
|
||||
raise RuntimeError("KeepAliveLspBridge is stopped")
|
||||
if self._thread is not None and self._thread.is_alive():
|
||||
return
|
||||
|
||||
self._ready.clear()
|
||||
thread = threading.Thread(target=self._run, name="codexlens-lsp-keepalive", daemon=True)
|
||||
self._thread = thread
|
||||
thread.start()
|
||||
|
||||
if not self._ready.wait(timeout=10.0):
|
||||
raise RuntimeError("Timed out starting LSP keep-alive loop")
|
||||
|
||||
def stop(self) -> None:
|
||||
with self._lock:
|
||||
if self._stopped:
|
||||
return
|
||||
self._stopped = True
|
||||
loop = self._loop
|
||||
bridge = self._bridge
|
||||
thread = self._thread
|
||||
|
||||
if loop is not None and bridge is not None:
|
||||
try:
|
||||
fut = asyncio.run_coroutine_threadsafe(bridge.close(), loop)
|
||||
fut.result(timeout=5.0)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if thread is not None:
|
||||
try:
|
||||
thread.join(timeout=5.0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def run(self, fn: Callable[[LspBridge], Awaitable[T]], *, timeout: Optional[float] = None) -> T:
|
||||
"""Run an async function against the shared LspBridge and return its result."""
|
||||
self.start()
|
||||
loop = self._loop
|
||||
bridge = self._bridge
|
||||
if loop is None or bridge is None:
|
||||
raise RuntimeError("Keep-alive loop not initialized")
|
||||
|
||||
async def _call() -> T:
|
||||
return await fn(bridge)
|
||||
|
||||
# Serialize bridge usage to avoid overlapping LSP request storms.
|
||||
with self._call_lock:
|
||||
fut = asyncio.run_coroutine_threadsafe(_call(), loop)
|
||||
return fut.result(timeout=float(timeout or self._key.timeout) + 1.0)
|
||||
|
||||
def _run(self) -> None:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
bridge = LspBridge(
|
||||
workspace_root=self._key.workspace_root,
|
||||
config_file=self._key.config_file,
|
||||
timeout=self._key.timeout,
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._loop = loop
|
||||
self._bridge = bridge
|
||||
self._ready.set()
|
||||
|
||||
try:
|
||||
loop.run_forever()
|
||||
finally:
|
||||
try:
|
||||
if self._bridge is not None:
|
||||
loop.run_until_complete(self._bridge.close())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
loop.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -103,6 +103,7 @@ class StandaloneLspManager:
|
||||
self._configs: Dict[str, ServerConfig] = {} # language_id -> ServerConfig
|
||||
self._read_tasks: Dict[str, asyncio.Task] = {} # language_id -> read task
|
||||
self._stderr_tasks: Dict[str, asyncio.Task] = {} # language_id -> stderr read task
|
||||
self._processor_tasks: Dict[str, asyncio.Task] = {} # language_id -> message processor task
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
def _find_config_file(self) -> Optional[Path]:
|
||||
@@ -269,7 +270,7 @@ class StandaloneLspManager:
|
||||
)
|
||||
|
||||
# Start the message processor task to handle queued messages
|
||||
asyncio.create_task(self._process_messages(language_id))
|
||||
self._processor_tasks[language_id] = asyncio.create_task(self._process_messages(language_id))
|
||||
|
||||
# Initialize the server - now uses queue for reading responses
|
||||
await self._initialize_server(state)
|
||||
@@ -311,6 +312,15 @@ class StandaloneLspManager:
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Cancel message processor task
|
||||
processor_task = self._processor_tasks.pop(language_id, None)
|
||||
if processor_task:
|
||||
processor_task.cancel()
|
||||
try:
|
||||
await processor_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
# Send shutdown request
|
||||
try:
|
||||
await self._send_request(state, "shutdown", None, timeout=5.0)
|
||||
|
||||
@@ -66,6 +66,10 @@ class BinarySearcher:
|
||||
self._binary_matrix: Optional[np.ndarray] = None
|
||||
self._is_memmap = False
|
||||
self._loaded = False
|
||||
self._embedding_dim: Optional[int] = None
|
||||
self._backend: Optional[str] = None
|
||||
self._model: Optional[str] = None
|
||||
self._model_profile: Optional[str] = None
|
||||
|
||||
def load(self) -> bool:
|
||||
"""Load binary vectors using memory-mapped file or database fallback.
|
||||
@@ -90,6 +94,10 @@ class BinarySearcher:
|
||||
|
||||
shape = tuple(meta['shape'])
|
||||
self._chunk_ids = np.array(meta['chunk_ids'], dtype=np.int64)
|
||||
self._embedding_dim = meta.get("embedding_dim")
|
||||
self._backend = meta.get("backend")
|
||||
self._model = meta.get("model") or meta.get("model_name")
|
||||
self._model_profile = meta.get("model_profile")
|
||||
|
||||
# Memory-map the binary matrix (read-only)
|
||||
self._binary_matrix = np.memmap(
|
||||
@@ -141,6 +149,10 @@ class BinarySearcher:
|
||||
self._binary_matrix = np.vstack(binary_arrays)
|
||||
self._is_memmap = False
|
||||
self._loaded = True
|
||||
self._embedding_dim = None
|
||||
self._backend = None
|
||||
self._model = None
|
||||
self._model_profile = None
|
||||
|
||||
logger.info(
|
||||
"Loaded %d binary vectors from DB (%d bytes each)",
|
||||
@@ -261,6 +273,26 @@ class BinarySearcher:
|
||||
"""Get number of loaded binary vectors."""
|
||||
return len(self._chunk_ids) if self._chunk_ids is not None else 0
|
||||
|
||||
@property
|
||||
def embedding_dim(self) -> Optional[int]:
|
||||
"""Embedding dimension used to build these binary vectors (if known)."""
|
||||
return int(self._embedding_dim) if self._embedding_dim is not None else None
|
||||
|
||||
@property
|
||||
def backend(self) -> Optional[str]:
|
||||
"""Embedding backend used to build these vectors (if known)."""
|
||||
return self._backend
|
||||
|
||||
@property
|
||||
def model(self) -> Optional[str]:
|
||||
"""Embedding model name used to build these vectors (if known)."""
|
||||
return self._model
|
||||
|
||||
@property
|
||||
def model_profile(self) -> Optional[str]:
|
||||
"""Embedding profile name (fastembed) used to build these vectors (if known)."""
|
||||
return self._model_profile
|
||||
|
||||
@property
|
||||
def is_memmap(self) -> bool:
|
||||
"""Check if using memory-mapped file (vs in-memory array)."""
|
||||
|
||||
@@ -13,6 +13,7 @@ from typing import List, Optional, Dict, Any, Literal, Tuple, TYPE_CHECKING
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from codexlens.entities import SearchResult, Symbol
|
||||
@@ -32,7 +33,7 @@ from codexlens.storage.global_index import GlobalSymbolIndex
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.sqlite_store import SQLiteStore
|
||||
from codexlens.storage.vector_meta_store import VectorMetadataStore
|
||||
from codexlens.config import VECTORS_META_DB_NAME
|
||||
from codexlens.config import BINARY_VECTORS_MMAP_NAME, VECTORS_META_DB_NAME
|
||||
from codexlens.search.hybrid_search import HybridSearchEngine
|
||||
|
||||
|
||||
@@ -165,6 +166,9 @@ class ChainSearchEngine:
|
||||
self._max_workers = max_workers
|
||||
self._executor: Optional[ThreadPoolExecutor] = None
|
||||
self._config = config
|
||||
self._realtime_lsp_keepalive_lock = threading.RLock()
|
||||
self._realtime_lsp_keepalive = None
|
||||
self._realtime_lsp_keepalive_key = None
|
||||
|
||||
def _get_executor(self, max_workers: Optional[int] = None) -> ThreadPoolExecutor:
|
||||
"""Get or create the shared thread pool executor.
|
||||
@@ -187,6 +191,15 @@ class ChainSearchEngine:
|
||||
if self._executor is not None:
|
||||
self._executor.shutdown(wait=True)
|
||||
self._executor = None
|
||||
with self._realtime_lsp_keepalive_lock:
|
||||
keepalive = self._realtime_lsp_keepalive
|
||||
self._realtime_lsp_keepalive = None
|
||||
self._realtime_lsp_keepalive_key = None
|
||||
if keepalive is not None:
|
||||
try:
|
||||
keepalive.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self) -> "ChainSearchEngine":
|
||||
"""Context manager entry."""
|
||||
@@ -838,7 +851,11 @@ class ChainSearchEngine:
|
||||
# ========== Stage 1: Binary Coarse Search ==========
|
||||
stage1_start = time.time()
|
||||
coarse_results, index_root = self._stage1_binary_search(
|
||||
query, index_paths, coarse_k, stats
|
||||
query,
|
||||
index_paths,
|
||||
coarse_k,
|
||||
stats,
|
||||
index_root=start_index.parent,
|
||||
)
|
||||
stage_times["stage1_binary_ms"] = (time.time() - stage1_start) * 1000
|
||||
stage_counts["stage1_candidates"] = len(coarse_results)
|
||||
@@ -849,14 +866,47 @@ class ChainSearchEngine:
|
||||
)
|
||||
|
||||
if not coarse_results:
|
||||
self.logger.debug("No binary candidates found, falling back to standard search")
|
||||
return self.search(query, source_path, options=options)
|
||||
# Keep the staged pipeline running even when Stage 1 yields no candidates.
|
||||
# This makes "realtime LSP graph → clustering → rerank" comparable across queries.
|
||||
self.logger.debug(
|
||||
"No Stage 1 candidates found; seeding staged pipeline with FTS results"
|
||||
)
|
||||
stage1_fallback_start = time.time()
|
||||
try:
|
||||
seed_opts = SearchOptions(
|
||||
depth=options.depth,
|
||||
max_workers=options.max_workers,
|
||||
limit_per_dir=max(10, int(coarse_k)),
|
||||
total_limit=int(coarse_k),
|
||||
include_symbols=True,
|
||||
enable_vector=False,
|
||||
hybrid_mode=False,
|
||||
enable_cascade=False,
|
||||
)
|
||||
seed = self.search(query, source_path, options=seed_opts)
|
||||
coarse_results = list(seed.results or [])[: int(coarse_k)]
|
||||
stage_counts["stage1_fallback_used"] = 1
|
||||
except Exception as exc:
|
||||
self.logger.debug("Stage 1 fallback seeding failed: %r", exc)
|
||||
coarse_results = []
|
||||
|
||||
stage_times["stage1_fallback_search_ms"] = (time.time() - stage1_fallback_start) * 1000
|
||||
stage_counts["stage1_candidates"] = len(coarse_results)
|
||||
|
||||
if not coarse_results:
|
||||
return ChainSearchResult(query=query, results=[], symbols=[], stats=stats)
|
||||
|
||||
# ========== Stage 2: LSP Graph Expansion ==========
|
||||
stage2_start = time.time()
|
||||
expanded_results = self._stage2_lsp_expand(coarse_results, index_root, query=query)
|
||||
stage_times["stage2_expand_ms"] = (time.time() - stage2_start) * 1000
|
||||
stage_counts["stage2_expanded"] = len(expanded_results)
|
||||
try:
|
||||
stage2_unique_paths = len({(r.path or "").lower() for r in expanded_results if getattr(r, "path", None)})
|
||||
except Exception:
|
||||
stage2_unique_paths = 0
|
||||
stage_counts["stage2_unique_paths"] = stage2_unique_paths
|
||||
stage_counts["stage2_duplicate_paths"] = max(0, len(expanded_results) - stage2_unique_paths)
|
||||
|
||||
self.logger.debug(
|
||||
"Staged Stage 2: LSP expansion %d -> %d results in %.2fms",
|
||||
@@ -868,6 +918,11 @@ class ChainSearchEngine:
|
||||
clustered_results = self._stage3_cluster_prune(expanded_results, k * 2)
|
||||
stage_times["stage3_cluster_ms"] = (time.time() - stage3_start) * 1000
|
||||
stage_counts["stage3_clustered"] = len(clustered_results)
|
||||
if self._config is not None:
|
||||
try:
|
||||
stage_counts["stage3_strategy"] = str(getattr(self._config, "staged_clustering_strategy", "auto") or "auto")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self.logger.debug(
|
||||
"Staged Stage 3: Clustering %d -> %d representatives in %.2fms",
|
||||
@@ -944,6 +999,8 @@ class ChainSearchEngine:
|
||||
index_paths: List[Path],
|
||||
coarse_k: int,
|
||||
stats: SearchStats,
|
||||
*,
|
||||
index_root: Optional[Path] = None,
|
||||
) -> Tuple[List[SearchResult], Optional[Path]]:
|
||||
"""Stage 1: Binary vector coarse search using Hamming distance.
|
||||
|
||||
@@ -967,8 +1024,12 @@ class ChainSearchEngine:
|
||||
)
|
||||
return [], None
|
||||
|
||||
# Try centralized BinarySearcher first (preferred for mmap indexes)
|
||||
index_root = index_paths[0].parent if index_paths else None
|
||||
# Try centralized BinarySearcher first (preferred for mmap indexes).
|
||||
# Centralized binary vectors live at a project index root (where `index binary-mmap`
|
||||
# was run), which may be an ancestor of the nearest `_index.db` directory.
|
||||
index_root = Path(index_root).resolve() if index_root is not None else (index_paths[0].parent if index_paths else None)
|
||||
if index_root is not None:
|
||||
index_root = self._find_nearest_binary_mmap_root(index_root)
|
||||
coarse_candidates: List[Tuple[int, float, Path]] = [] # (chunk_id, distance, index_path)
|
||||
used_centralized = False
|
||||
using_dense_fallback = False
|
||||
@@ -977,9 +1038,26 @@ class ChainSearchEngine:
|
||||
binary_searcher = self._get_centralized_binary_searcher(index_root)
|
||||
if binary_searcher is not None:
|
||||
try:
|
||||
from codexlens.semantic.embedder import Embedder
|
||||
embedder = Embedder()
|
||||
query_dense = embedder.embed_to_numpy([query])[0]
|
||||
use_gpu = True
|
||||
if self._config is not None:
|
||||
use_gpu = getattr(self._config, "embedding_use_gpu", True)
|
||||
|
||||
query_dense = None
|
||||
backend = getattr(binary_searcher, "backend", None)
|
||||
model = getattr(binary_searcher, "model", None)
|
||||
profile = getattr(binary_searcher, "model_profile", None) or "code"
|
||||
|
||||
if backend == "litellm":
|
||||
try:
|
||||
from codexlens.semantic.factory import get_embedder as get_factory_embedder
|
||||
embedder = get_factory_embedder(backend="litellm", model=model or "code")
|
||||
query_dense = embedder.embed_to_numpy([query])[0]
|
||||
except Exception:
|
||||
query_dense = None
|
||||
if query_dense is None:
|
||||
from codexlens.semantic.embedder import get_embedder
|
||||
embedder = get_embedder(profile=str(profile), use_gpu=use_gpu)
|
||||
query_dense = embedder.embed_to_numpy([query])[0]
|
||||
|
||||
results = binary_searcher.search(query_dense, top_k=coarse_k)
|
||||
for chunk_id, distance in results:
|
||||
@@ -1531,34 +1609,26 @@ class ChainSearchEngine:
|
||||
if not seed_nodes:
|
||||
return coarse_results
|
||||
|
||||
async def expand_graph():
|
||||
async with LspBridge(
|
||||
workspace_root=str(workspace_root),
|
||||
config_file=str(lsp_config_file) if lsp_config_file else None,
|
||||
timeout=timeout_s,
|
||||
) as bridge:
|
||||
# Warm up analysis: open seed docs and wait a bit so references/call hierarchy are populated.
|
||||
if warmup_s > 0:
|
||||
for seed in seed_nodes[:3]:
|
||||
try:
|
||||
await bridge.get_document_symbols(seed.file_path)
|
||||
except Exception:
|
||||
continue
|
||||
async def expand_graph(bridge: LspBridge):
|
||||
# Warm up analysis: open seed docs and wait a bit so references/call hierarchy are populated.
|
||||
if warmup_s > 0:
|
||||
for seed in seed_nodes[:3]:
|
||||
try:
|
||||
warmup_budget = min(warmup_s, max(0.0, timeout_s * 0.1))
|
||||
await asyncio.sleep(min(warmup_budget, max(0.0, timeout_s - 0.5)))
|
||||
await bridge.get_document_symbols(seed.file_path)
|
||||
except Exception:
|
||||
pass
|
||||
builder = LspGraphBuilder(
|
||||
max_depth=max_depth,
|
||||
max_nodes=max_nodes,
|
||||
max_concurrent=max(1, max_concurrent),
|
||||
resolve_symbols=resolve_symbols,
|
||||
)
|
||||
return await builder.build_from_seeds(seed_nodes, bridge)
|
||||
|
||||
def run_coro_blocking():
|
||||
return asyncio.run(asyncio.wait_for(expand_graph(), timeout=timeout_s))
|
||||
continue
|
||||
try:
|
||||
warmup_budget = min(warmup_s, max(0.0, timeout_s * 0.1))
|
||||
await asyncio.sleep(min(warmup_budget, max(0.0, timeout_s - 0.5)))
|
||||
except Exception:
|
||||
pass
|
||||
builder = LspGraphBuilder(
|
||||
max_depth=max_depth,
|
||||
max_nodes=max_nodes,
|
||||
max_concurrent=max(1, max_concurrent),
|
||||
resolve_symbols=resolve_symbols,
|
||||
)
|
||||
return await builder.build_from_seeds(seed_nodes, bridge)
|
||||
|
||||
try:
|
||||
try:
|
||||
@@ -1569,9 +1639,43 @@ class ChainSearchEngine:
|
||||
|
||||
if has_running_loop:
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
graph = executor.submit(run_coro_blocking).result(timeout=timeout_s + 1.0)
|
||||
async def _expand_once():
|
||||
async with LspBridge(
|
||||
workspace_root=str(workspace_root),
|
||||
config_file=str(lsp_config_file) if lsp_config_file else None,
|
||||
timeout=timeout_s,
|
||||
) as bridge:
|
||||
return await expand_graph(bridge)
|
||||
|
||||
def _run():
|
||||
return asyncio.run(asyncio.wait_for(_expand_once(), timeout=timeout_s))
|
||||
|
||||
graph = executor.submit(_run).result(timeout=timeout_s + 1.0)
|
||||
else:
|
||||
graph = run_coro_blocking()
|
||||
from codexlens.lsp.keepalive_bridge import KeepAliveKey, KeepAliveLspBridge
|
||||
|
||||
key = KeepAliveKey(
|
||||
workspace_root=str(workspace_root),
|
||||
config_file=str(lsp_config_file) if lsp_config_file else None,
|
||||
timeout=float(timeout_s),
|
||||
)
|
||||
with self._realtime_lsp_keepalive_lock:
|
||||
keepalive = self._realtime_lsp_keepalive
|
||||
if keepalive is None or self._realtime_lsp_keepalive_key != key:
|
||||
if keepalive is not None:
|
||||
try:
|
||||
keepalive.stop()
|
||||
except Exception:
|
||||
pass
|
||||
keepalive = KeepAliveLspBridge(
|
||||
workspace_root=key.workspace_root,
|
||||
config_file=key.config_file,
|
||||
timeout=key.timeout,
|
||||
)
|
||||
self._realtime_lsp_keepalive = keepalive
|
||||
self._realtime_lsp_keepalive_key = key
|
||||
|
||||
graph = keepalive.run(expand_graph, timeout=timeout_s)
|
||||
except Exception as exc:
|
||||
self.logger.debug("Stage 2 (realtime) expansion failed: %r", exc)
|
||||
return coarse_results
|
||||
@@ -1705,6 +1809,57 @@ class ChainSearchEngine:
|
||||
if len(expanded_results) <= target_count:
|
||||
return expanded_results
|
||||
|
||||
strategy_name = "auto"
|
||||
if self._config is not None:
|
||||
strategy_name = getattr(self._config, "staged_clustering_strategy", "auto") or "auto"
|
||||
strategy_name = str(strategy_name).strip().lower()
|
||||
|
||||
if strategy_name in {"noop", "none", "off"}:
|
||||
return sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count]
|
||||
|
||||
if strategy_name in {"score", "top", "rank"}:
|
||||
return sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count]
|
||||
|
||||
if strategy_name in {"path", "file"}:
|
||||
best_by_path: Dict[str, SearchResult] = {}
|
||||
for r in expanded_results:
|
||||
if not r.path:
|
||||
continue
|
||||
key = str(r.path).lower()
|
||||
if key not in best_by_path or r.score > best_by_path[key].score:
|
||||
best_by_path[key] = r
|
||||
candidates = list(best_by_path.values()) or expanded_results
|
||||
candidates.sort(key=lambda r: r.score, reverse=True)
|
||||
return candidates[:target_count]
|
||||
|
||||
if strategy_name in {"dir_rr", "rr_dir", "round_robin_dir"}:
|
||||
results_sorted = sorted(expanded_results, key=lambda r: r.score, reverse=True)
|
||||
buckets: Dict[str, List[SearchResult]] = {}
|
||||
dir_order: List[str] = []
|
||||
for r in results_sorted:
|
||||
try:
|
||||
d = str(Path(r.path).parent).lower()
|
||||
except Exception:
|
||||
d = ""
|
||||
if d not in buckets:
|
||||
buckets[d] = []
|
||||
dir_order.append(d)
|
||||
buckets[d].append(r)
|
||||
|
||||
out: List[SearchResult] = []
|
||||
while len(out) < target_count:
|
||||
progressed = False
|
||||
for d in dir_order:
|
||||
if not buckets.get(d):
|
||||
continue
|
||||
out.append(buckets[d].pop(0))
|
||||
progressed = True
|
||||
if len(out) >= target_count:
|
||||
break
|
||||
if not progressed:
|
||||
break
|
||||
return out
|
||||
|
||||
try:
|
||||
from codexlens.search.clustering import (
|
||||
ClusteringConfig,
|
||||
@@ -2550,6 +2705,31 @@ class ChainSearchEngine:
|
||||
self.logger.debug("Failed to load centralized binary searcher: %s", exc)
|
||||
return None
|
||||
|
||||
def _find_nearest_binary_mmap_root(self, index_root: Path, *, max_levels: int = 10) -> Path:
|
||||
"""Walk up index_root parents to find the nearest centralized binary mmap.
|
||||
|
||||
Centralized staged-binary artifacts are stored at a project index root
|
||||
(e.g. `.../project/src/_binary_vectors.mmap`), but staged search often starts
|
||||
from the nearest ancestor `_index.db` path, which can be nested deeper.
|
||||
|
||||
This helper makes Stage 1 robust by locating the nearest ancestor directory
|
||||
that contains the centralized `_binary_vectors.mmap`.
|
||||
"""
|
||||
current_dir = Path(index_root).resolve()
|
||||
for _ in range(max(0, int(max_levels)) + 1):
|
||||
try:
|
||||
if (current_dir / BINARY_VECTORS_MMAP_NAME).exists():
|
||||
return current_dir
|
||||
except Exception:
|
||||
return Path(index_root).resolve()
|
||||
|
||||
parent = current_dir.parent
|
||||
if parent == current_dir:
|
||||
break
|
||||
current_dir = parent
|
||||
|
||||
return Path(index_root).resolve()
|
||||
|
||||
def _compute_cosine_similarity(
|
||||
self,
|
||||
query_vec: "np.ndarray",
|
||||
|
||||
56
codex-lens/tests/test_staged_stage3_fast_strategies.py
Normal file
56
codex-lens/tests/test_staged_stage3_fast_strategies.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.entities import SearchResult
|
||||
from codexlens.search.chain_search import ChainSearchEngine
|
||||
|
||||
|
||||
def _engine_with_strategy(name: str) -> ChainSearchEngine:
|
||||
cfg = Config.load()
|
||||
cfg.staged_clustering_strategy = name
|
||||
return ChainSearchEngine(registry=MagicMock(), mapper=MagicMock(), config=cfg)
|
||||
|
||||
|
||||
def test_stage3_strategy_score_skips_embedding(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"codexlens.semantic.factory.get_embedder",
|
||||
lambda *a, **k: (_ for _ in ()).throw(RuntimeError("should not embed")),
|
||||
)
|
||||
|
||||
engine = _engine_with_strategy("score")
|
||||
expanded = [
|
||||
SearchResult(path="D:/p/a.py", score=0.9),
|
||||
SearchResult(path="D:/p/a.py", score=0.1),
|
||||
SearchResult(path="D:/p/b.py", score=0.8),
|
||||
SearchResult(path="D:/p/c.py", score=0.7),
|
||||
]
|
||||
|
||||
reps = engine._stage3_cluster_prune(expanded, target_count=3)
|
||||
assert [r.path for r in reps] == ["D:/p/a.py", "D:/p/b.py", "D:/p/c.py"]
|
||||
|
||||
|
||||
def test_stage3_strategy_dir_rr_round_robins_dirs(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"codexlens.semantic.factory.get_embedder",
|
||||
lambda *a, **k: (_ for _ in ()).throw(RuntimeError("should not embed")),
|
||||
)
|
||||
|
||||
engine = _engine_with_strategy("dir_rr")
|
||||
expanded = [
|
||||
SearchResult(path="D:/p1/a.py", score=0.99),
|
||||
SearchResult(path="D:/p1/b.py", score=0.98),
|
||||
SearchResult(path="D:/p2/c.py", score=0.97),
|
||||
SearchResult(path="D:/p2/d.py", score=0.96),
|
||||
SearchResult(path="D:/p3/e.py", score=0.95),
|
||||
]
|
||||
|
||||
reps = engine._stage3_cluster_prune(expanded, target_count=4)
|
||||
assert len(reps) == 4
|
||||
assert reps[0].path.endswith("p1/a.py")
|
||||
assert reps[1].path.endswith("p2/c.py")
|
||||
assert reps[2].path.endswith("p3/e.py")
|
||||
|
||||
Reference in New Issue
Block a user